|
@@ -90,6 +90,10 @@ LSUP_TermSet *LSUP_term_cache = NULL;
|
|
|
// Characters not allowed in a URI string.
|
|
|
static const char *invalid_uri_chars = "<>\" {}|\\^`";
|
|
|
|
|
|
+/// Minimum valid type code.
|
|
|
+static const LSUP_TermType MIN_VALID_TYPE = LSUP_TERM_IRIREF;
|
|
|
+/// Maximum valid type code. Change this if adding to enum LSUP_TermType.
|
|
|
+static const LSUP_TermType MAX_VALID_TYPE = LSUP_TERM_BNODE;
|
|
|
|
|
|
/*
|
|
|
* Static prototypes.
|
|
@@ -245,9 +249,11 @@ LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
- char *data, *pfx = LSUP_iriref_prefix (iri);
|
|
|
+ char
|
|
|
+ *data,
|
|
|
+ *pfx = LSUP_iriref_prefix (iri);
|
|
|
|
|
|
- if (pfx) data = iri->data;
|
|
|
+ if (strlen (pfx) > 0) data = iri->data;
|
|
|
|
|
|
else if (iri->data[0] == '/') {
|
|
|
free (pfx);
|
|
@@ -402,7 +408,7 @@ LSUP_iriref_prefix (const LSUP_Term *iri)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
- if (iri->iri_info->prefix.size == 0) return NULL;
|
|
|
+ // if (iri->iri_info->prefix.size == 0) return NULL;
|
|
|
|
|
|
return strndup (
|
|
|
iri->data + iri->iri_info->prefix.offset,
|
|
@@ -418,7 +424,7 @@ LSUP_iriref_path (const LSUP_Term *iri)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
- if (iri->iri_info->path.size == 0) return NULL;
|
|
|
+ // if (iri->iri_info->path.size == 0) return NULL;
|
|
|
|
|
|
return strndup (
|
|
|
iri->data + iri->iri_info->path.offset,
|
|
@@ -434,7 +440,7 @@ LSUP_iriref_frag (const LSUP_Term *iri)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
- if (iri->iri_info->frag.size == 0) return NULL;
|
|
|
+ // if (iri->iri_info->frag.size == 0) return NULL;
|
|
|
|
|
|
return strndup (
|
|
|
iri->data + iri->iri_info->frag.offset,
|
|
@@ -748,12 +754,8 @@ term_init (
|
|
|
LSUP_Term *term, LSUP_TermType type,
|
|
|
const char *data, void *metadata)
|
|
|
{
|
|
|
- if (UNLIKELY (!LSUP_IS_INIT)) {
|
|
|
- log_error ("Environment not initialized. Did you call LSUP_init()?");
|
|
|
- return LSUP_ERROR;
|
|
|
- }
|
|
|
// This can never be LSUP_TERM_UNDEFINED.
|
|
|
- if (type == LSUP_TERM_UNDEFINED) {
|
|
|
+ if (type < MIN_VALID_TYPE || type > MAX_VALID_TYPE) {
|
|
|
log_error ("%d is not a valid term type.", type);
|
|
|
return LSUP_VALUE_ERR;
|
|
|
}
|
|
@@ -928,6 +930,10 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
|
|
|
size_t iri_len = strlen (iri_str);
|
|
|
MatchCoord tmp = {}; // Temporary storage for capture groups
|
|
|
|
|
|
+ // Redundant if only called by term_init.
|
|
|
+ // memset (coord, 0, sizeof(*coord));
|
|
|
+
|
|
|
+ log_debug ("Parsing IRI: %s", iri_str);
|
|
|
// #2: ([^:/?#]+)
|
|
|
while (
|
|
|
*cur != ':' && *cur != '/' && *cur != '?'
|
|
@@ -937,51 +943,50 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
|
|
|
}
|
|
|
|
|
|
// Non-capturing: (?([^:/?#]+):)?
|
|
|
- if (tmp.size > 0 && *(++cur) == ':') {
|
|
|
+ if (tmp.size > 0 && *cur == ':') {
|
|
|
// Got capture groups #2 and #3. Store them.
|
|
|
- tmp.size++;
|
|
|
- coord[3].offset = tmp.offset;
|
|
|
- coord[3].size = tmp.size - 1;
|
|
|
- }
|
|
|
+ coord[2].offset = 0;
|
|
|
+ coord[2].size = tmp.size;
|
|
|
+ cur++;
|
|
|
+ log_debug ("Group #2: %lu, %lu", coord[2].offset, coord[2].size);
|
|
|
+ } else cur = iri_str; // Backtrack if no match.
|
|
|
|
|
|
// Non-capturing: (?//([^/?#]*))?
|
|
|
- if (*(cur + 1) == '/' && *(cur + 2) == '/') {
|
|
|
- cur++;
|
|
|
- tmp.offset = cur - iri_str;
|
|
|
- tmp.size = 2;
|
|
|
+ if (*cur == '/' && *(cur + 1) == '/') {
|
|
|
cur += 2;
|
|
|
+ tmp.offset = cur - iri_str;
|
|
|
+ tmp.size = 0;
|
|
|
|
|
|
// #3: ([^/?#]*)
|
|
|
while (*cur != '/' && *cur != '?' && *cur != '#' && *cur != '\0') {
|
|
|
tmp.size++;
|
|
|
cur++;
|
|
|
}
|
|
|
-
|
|
|
- // Maybe got capture group #5.
|
|
|
- coord[3].offset = tmp.offset + 2;
|
|
|
- coord[3].size = tmp.size -2;
|
|
|
+ coord[3].offset = tmp.offset;
|
|
|
+ coord[3].size = tmp.size;
|
|
|
+ log_debug ("Group #3: %lu, %lu", coord[3].offset, coord[3].size);
|
|
|
}
|
|
|
|
|
|
- // Capture group 1 and advance cursor.
|
|
|
+ // Capture group 1.
|
|
|
coord[1].offset = 0;
|
|
|
- coord[1].size = cur++ - iri_str;
|
|
|
+ coord[1].size = cur - iri_str;
|
|
|
+ log_debug ("Group #1: %lu, %lu", coord[1].offset, coord[1].size);
|
|
|
|
|
|
- // Non-capturing: (?[^?#]*)
|
|
|
tmp.offset = cur - iri_str;
|
|
|
tmp.size = 0;
|
|
|
+
|
|
|
+ coord[4].offset = tmp.offset;
|
|
|
+ coord[4].size = iri_len - tmp.offset;
|
|
|
+ log_debug ("Group #4: %lu, %lu", coord[4].offset, coord[4].size);
|
|
|
+
|
|
|
+ // Non-capturing: (?[^?#]*)
|
|
|
while (*cur != '?' && *cur != '#' && *cur != '\0') {
|
|
|
tmp.size++;
|
|
|
cur++;
|
|
|
}
|
|
|
|
|
|
- if (tmp.size > 0) {
|
|
|
- coord[4].offset = tmp.offset;
|
|
|
- coord[4].size = iri_str + iri_len - cur;
|
|
|
-
|
|
|
- } else return LSUP_NORESULT; // This group is the only mandatory match.
|
|
|
-
|
|
|
// Non-capturing: (?\?([^#]*))
|
|
|
- if (*(++cur) == '?') {
|
|
|
+ if (*cur == '?') {
|
|
|
// 5: ([^#]*)
|
|
|
tmp.offset = ++cur - iri_str;
|
|
|
tmp.size = 0;
|
|
@@ -994,18 +999,21 @@ parse_iri (char *iri_str, MatchCoord coord[]) {
|
|
|
// Got capture group #5.
|
|
|
coord[5].offset = tmp.offset;
|
|
|
coord[5].size = tmp.size;
|
|
|
+ log_debug ("Group #5: %lu, %lu", coord[5].offset, coord[5].size);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Non-capturing: (?#(.*))?
|
|
|
- if (*(++cur) == '#') {
|
|
|
+ if (*cur == '#') {
|
|
|
// #6: (.*)
|
|
|
coord[6].offset = ++cur - iri_str;
|
|
|
coord[6].size = iri_str + iri_len - cur;
|
|
|
+ log_debug ("Group #6: %lu, %lu", coord[6].offset, coord[6].size);
|
|
|
}
|
|
|
|
|
|
coord[0].offset = 0;
|
|
|
coord[0].size = iri_len;
|
|
|
+ log_debug ("Full match: %lu, %lu", coord[0].offset, coord[0].size);
|
|
|
|
|
|
return LSUP_OK;
|
|
|
}
|