|
@@ -16,12 +16,15 @@
|
|
* Data structures.
|
|
* Data structures.
|
|
*/
|
|
*/
|
|
|
|
|
|
|
|
+/// Sub-string coordinates for IRI matches.
|
|
|
|
+typedef size_t match_coord_t[2];
|
|
|
|
+
|
|
/// Matching sub-patterns for IRI parts.
|
|
/// Matching sub-patterns for IRI parts.
|
|
struct iri_info_t {
|
|
struct iri_info_t {
|
|
LSUP_NSMap * nsm; ///< NSM handle for prefixed IRI.
|
|
LSUP_NSMap * nsm; ///< NSM handle for prefixed IRI.
|
|
- regmatch_t prefix; ///< Matching group #1.
|
|
|
|
- regmatch_t path; ///< Matching group #5.
|
|
|
|
- regmatch_t frag; ///< Matching group #10.
|
|
|
|
|
|
+ match_coord_t prefix; ///< Matching group #1.
|
|
|
|
+ match_coord_t path; ///< Matching group #5.
|
|
|
|
+ match_coord_t frag; ///< Matching group #10.
|
|
};
|
|
};
|
|
|
|
|
|
|
|
|
|
@@ -881,6 +884,127 @@ term_init (
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * @brief scan an IRI string and parse IRI parts.
|
|
|
|
+ *
|
|
|
|
+ * Experimental replacement of a regex engine for better performance.
|
|
|
|
+ *
|
|
|
|
+ * https://datatracker.ietf.org/doc/html/rfc3986#appendix-B
|
|
|
|
+ *
|
|
|
|
+ * Reference regex and group numbering:
|
|
|
|
+ * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
|
|
|
|
+ * 12 3 4 5 6 7 8 9
|
|
|
|
+ *
|
|
|
|
+ * Capturing groups:
|
|
|
|
+ *
|
|
|
|
+ * scheme = $2
|
|
|
|
+ * authority = $4
|
|
|
|
+ * path = $5
|
|
|
|
+ * query = $7
|
|
|
|
+ * fragment = $9
|
|
|
|
+ *
|
|
|
|
+ * #0: Full parsed URI (http://example.org/123/456/?query=blah#frag)
|
|
|
|
+ * #1: Domain prefix (http://example.org)
|
|
|
|
+ * #2: Protocol (http:)
|
|
|
|
+ * #4: Authority (example.org)
|
|
|
|
+ * #5: Path, excluding query and fragment (/123/456/)
|
|
|
|
+ * #7: Query (query=blah)
|
|
|
|
+ * #9: Fragment (frag)
|
|
|
|
+ */
|
|
|
|
+static LSUP_rc
|
|
|
|
+parse_iri (char *iri_str, LSUP_IRIInfo *iri_info) {
|
|
|
|
+ char *cur = iri_str;
|
|
|
|
+ size_t
|
|
|
|
+ coord[2][10] = {0}, // Store capture group relative position & length
|
|
|
|
+ tmp[2] = {0}; // Temporary storage for capture groups
|
|
|
|
+
|
|
|
|
+ // #1: (([^:/?#]+:)?(//([^/?#]*))?)
|
|
|
|
+ while (*cur != '\0') {
|
|
|
|
+ // #2: ([^:/?#]+:)?
|
|
|
|
+ tmp[0] = cur - iri_str;
|
|
|
|
+ while (
|
|
|
|
+ *cur != ':' && *cur != '/' && *cur != '?'
|
|
|
|
+ && *cur != '#' && *cur != '\0') {
|
|
|
|
+ tmp[1]++;
|
|
|
|
+ cur++;
|
|
|
|
+ }
|
|
|
|
+ if (tmp[1] > 0 && *cur == ':') {
|
|
|
|
+ // Got capture group #2. Store it.
|
|
|
|
+ tmp[1]++;
|
|
|
|
+ coord[3][0] = tmp[0];
|
|
|
|
+ coord[3][1] = tmp[1];
|
|
|
|
+ cur++;
|
|
|
|
+ }
|
|
|
|
+ // #3: (//([^/?#]*))?
|
|
|
|
+ if (*cur == '/' && *(cur + 1) == '/') {
|
|
|
|
+ tmp[0] = cur - iri_str;
|
|
|
|
+ tmp[1] = 2;
|
|
|
|
+ cur += 2;
|
|
|
|
+
|
|
|
|
+ // #4: ([^/?#]*)
|
|
|
|
+ while (*cur != '/' && *cur != '?' && *cur != '#' && *cur != '\0') {
|
|
|
|
+ // Continue recording length for #3, coordinates for #4 can
|
|
|
|
+ // be inferred.
|
|
|
|
+ tmp[1]++;
|
|
|
|
+ cur++;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (tmp[1] > 2) {
|
|
|
|
+ // Got capture groups #3 and #4. Store them.
|
|
|
|
+ coord[3][0] = tmp[0];
|
|
|
|
+ coord[3][1] = tmp[1];
|
|
|
|
+ coord[4][0] = tmp[0] + 2;
|
|
|
|
+ coord[4][1] = tmp[1] -2;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // #5: ([^?#]*)
|
|
|
|
+ tmp[0] = cur - iri_str;
|
|
|
|
+ tmp[1] = 0;
|
|
|
|
+ while (*cur != '?' && *cur != '#' && *cur != '\0') {
|
|
|
|
+ tmp[1]++;
|
|
|
|
+ cur++;
|
|
|
|
+ }
|
|
|
|
+ if (tmp[1] > 0) {
|
|
|
|
+ // Got capture group #5. Store it.
|
|
|
|
+ coord[5][0] = tmp[0];
|
|
|
|
+ coord[5][1] = tmp[1];
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // #6: (\?([^#]*))
|
|
|
|
+ if (*cur == '?') {
|
|
|
|
+ // Advance cursor by one and skip storing '?'.
|
|
|
|
+ tmp[0] = ++cur - iri_str;
|
|
|
|
+ tmp[1] = 0;
|
|
|
|
+
|
|
|
|
+ // 7: ([^#]*)
|
|
|
|
+ while (*cur != '#' && *cur != '\0') {
|
|
|
|
+ tmp[1]++;
|
|
|
|
+ cur++;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (tmp[1] > 0) {
|
|
|
|
+ // Got capture group #7. Store it.
|
|
|
|
+ // Group #6 (query including '?') is ignored.
|
|
|
|
+ coord[7][0] = tmp[0];
|
|
|
|
+ coord[7][1] = tmp[1];
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // 8: (#(.*))
|
|
|
|
+ if (*cur == '#') {
|
|
|
|
+ // 9: (.*)
|
|
|
|
+ // Skip storing '#'.
|
|
|
|
+ tmp[0] = ++cur - iri_str;
|
|
|
|
+ // Store the length of the remaining string.
|
|
|
|
+ tmp[1] = strlen(iri_str) + iri_str - cur;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return LSUP_OK;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Extern inline functions.
|
|
* Extern inline functions.
|
|
*/
|
|
*/
|