|
@@ -2,13 +2,28 @@
|
|
|
|
|
|
#include "term.h"
|
|
|
|
|
|
-
|
|
|
- * tpl packing format for the term structure.
|
|
|
+
|
|
|
+ *
|
|
|
+ * The pack elements are: 1. term type (char); 2. data (string); 3. void* type
|
|
|
+ * metadata, cast to 8-byte unsigned.
|
|
|
*/
|
|
|
-#define TERM_PACK_FMT "S(sUc)"
|
|
|
+#define TERM_PACK_FMT "csU"
|
|
|
|
|
|
#define MAX_VALID_TERM_TYPE LSUP_TERM_BNODE
|
|
|
|
|
|
+
|
|
|
+
|
|
|
+ * Data structures.
|
|
|
+ */
|
|
|
+
|
|
|
+struct iri_info_t {
|
|
|
+ LSUP_NSMap * nsm;
|
|
|
+ regmatch_t prefix;
|
|
|
+ regmatch_t path;
|
|
|
+ regmatch_t frag;
|
|
|
+};
|
|
|
+
|
|
|
+
|
|
|
|
|
|
* Extern variables.
|
|
|
*/
|
|
@@ -27,6 +42,15 @@ LSUP_Term *LSUP_default_datatype = NULL;
|
|
|
static const char *invalid_uri_chars = "<>\" {}|\\^`";
|
|
|
|
|
|
|
|
|
+
|
|
|
+ * Static prototypes.
|
|
|
+ */
|
|
|
+
|
|
|
+LSUP_rc
|
|
|
+term_init(
|
|
|
+ LSUP_Term *term, LSUP_TermType type, const char *data, void *metadata);
|
|
|
+
|
|
|
+
|
|
|
|
|
|
* Term API.
|
|
|
*/
|
|
@@ -41,7 +65,7 @@ LSUP_term_new (
|
|
|
|
|
|
if (type == LSUP_TERM_UNDEFINED) term->type = type;
|
|
|
|
|
|
- else if (UNLIKELY (LSUP_term_init (
|
|
|
+ else if (UNLIKELY (term_init (
|
|
|
term, type, data, metadata) != LSUP_OK)) {
|
|
|
free (term);
|
|
|
return NULL;
|
|
@@ -56,43 +80,90 @@ LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
|
|
|
{
|
|
|
if (UNLIKELY (!sterm)) return NULL;
|
|
|
|
|
|
- LSUP_Term *term;
|
|
|
- MALLOC_GUARD (term, NULL);
|
|
|
+ LSUP_Term *term = NULL;
|
|
|
+ LSUP_TermType type;
|
|
|
+ char *data = NULL;
|
|
|
+ void *metadata;
|
|
|
|
|
|
tpl_node *tn;
|
|
|
|
|
|
- tn = tpl_map (TERM_PACK_FMT, term);
|
|
|
- if (UNLIKELY (!tn)) goto fail;
|
|
|
+ tn = tpl_map (TERM_PACK_FMT, &type, &data, &metadata);
|
|
|
+ if (UNLIKELY (!tn)) goto finally;
|
|
|
|
|
|
- if (UNLIKELY (tpl_load (tn, TPL_MEM, sterm->addr, sterm->size) < 0))
|
|
|
- goto fail;
|
|
|
+ if (UNLIKELY (tpl_load (tn, TPL_MEM, sterm->addr, sterm->size) < 0)) {
|
|
|
+ log_error ("Error loading serialized term.");
|
|
|
+ goto finally;
|
|
|
+ }
|
|
|
+ if (UNLIKELY (tpl_unpack (tn, 0) < 0)) {
|
|
|
+ log_error ("Error unpacking serialized term.");
|
|
|
+ goto finally;
|
|
|
+ }
|
|
|
|
|
|
- if (UNLIKELY (tpl_unpack (tn, 0) < 0)) goto fail;
|
|
|
+ if (type == LSUP_TERM_LT_LITERAL)
|
|
|
+ term = LSUP_lt_literal_new (data, (char *)&metadata);
|
|
|
+ else term = LSUP_term_new (type, data, metadata);
|
|
|
|
|
|
+finally:
|
|
|
tpl_free (tn);
|
|
|
+ free (data);
|
|
|
|
|
|
return term;
|
|
|
-
|
|
|
-fail:
|
|
|
- tpl_free (tn);
|
|
|
- free (term);
|
|
|
-
|
|
|
- return NULL;
|
|
|
}
|
|
|
|
|
|
|
|
|
LSUP_Buffer *
|
|
|
LSUP_term_serialize (const LSUP_Term *term)
|
|
|
{
|
|
|
+
|
|
|
+ * In serializing a term, the fact that two terms of different types may
|
|
|
+ * be semantically identical must be taken into account. Specifically, a
|
|
|
+ * namespace-prefixed IRI ref is identical to its fully qualified version,
|
|
|
+ * and a LSUP_TERM_LT_LITERAL with no language tag is identical to a
|
|
|
+ * LSUP_TERM_LITERAL of xsd:string type, made up of the same string. Such
|
|
|
+ * terms must have identical serializations.
|
|
|
+ */
|
|
|
+
|
|
|
if (UNLIKELY (!term)) return NULL;
|
|
|
|
|
|
+ LSUP_Term *tmp_term;
|
|
|
+ void *metadata = NULL;
|
|
|
+
|
|
|
+ if (term->type == LSUP_TERM_NS_IRIREF) {
|
|
|
+
|
|
|
+ char *fq_uri;
|
|
|
+
|
|
|
+ if (LSUP_nsmap_normalize_uri (
|
|
|
+ term->iri_info->nsm, term->data, &fq_uri
|
|
|
+ ) != LSUP_OK) return NULL;
|
|
|
+
|
|
|
+ tmp_term = LSUP_iriref_new (fq_uri, NULL);
|
|
|
+
|
|
|
+ } else if (term->type == LSUP_TERM_LT_LITERAL) {
|
|
|
+
|
|
|
+ if (strlen (term->lang) == 0)
|
|
|
+ tmp_term = LSUP_literal_new (term->data, NULL);
|
|
|
+ else tmp_term = LSUP_lt_literal_new (term->data, (char *) term->lang);
|
|
|
+
|
|
|
+ } else tmp_term = LSUP_term_new (
|
|
|
+ term->type, term->data, (void *) term->datatype);
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ if (tmp_term->type == LSUP_TERM_LITERAL)
|
|
|
+ metadata = tmp_term->datatype;
|
|
|
+ else if (tmp_term->type == LSUP_TERM_LT_LITERAL)
|
|
|
+ memcpy (&metadata, tmp_term->lang, sizeof (metadata));
|
|
|
+
|
|
|
LSUP_Buffer *sterm;
|
|
|
MALLOC_GUARD (sterm, NULL);
|
|
|
|
|
|
int rc = tpl_jot (
|
|
|
- TPL_MEM, &sterm->addr, &sterm->size, TERM_PACK_FMT, term);
|
|
|
+ TPL_MEM, &sterm->addr, &sterm->size, TERM_PACK_FMT,
|
|
|
+ &tmp_term->type, &tmp_term->data, &metadata);
|
|
|
+ LSUP_term_free (tmp_term);
|
|
|
+
|
|
|
if (rc != 0) {
|
|
|
- free (sterm);
|
|
|
+ LSUP_buffer_free (sterm);
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
@@ -101,7 +172,7 @@ LSUP_term_serialize (const LSUP_Term *term)
|
|
|
|
|
|
|
|
|
LSUP_rc
|
|
|
-LSUP_term_init(
|
|
|
+term_init (
|
|
|
LSUP_Term *term, LSUP_TermType type,
|
|
|
const char *data, void *metadata)
|
|
|
{
|
|
@@ -117,29 +188,49 @@ LSUP_term_init(
|
|
|
|
|
|
term->type = type;
|
|
|
|
|
|
- char *data_tmp;
|
|
|
if (data) {
|
|
|
-
|
|
|
- if (term->type == LSUP_TERM_IRIREF) {
|
|
|
- if (strpbrk (data, invalid_uri_chars) != NULL) {
|
|
|
+
|
|
|
+ if (LSUP_IS_IRI (term)) {
|
|
|
+ char *fquri;
|
|
|
+
|
|
|
+
|
|
|
+ if (term->type == LSUP_TERM_NS_IRIREF) {
|
|
|
+ if (LSUP_nsmap_normalize_uri (
|
|
|
+ metadata, data, &fquri) != LSUP_OK
|
|
|
+ ) {
|
|
|
+ log_error ("Error normalizing IRI data.");
|
|
|
+
|
|
|
+ return LSUP_VALUE_ERR;
|
|
|
+ }
|
|
|
+ log_debug ("Fully qualified IRI: %s", fquri);
|
|
|
+ } else fquri = (char *) data;
|
|
|
+
|
|
|
+ if (strpbrk (fquri, invalid_uri_chars) != NULL) {
|
|
|
log_error (
|
|
|
"Characters %s are not allowed. Got: %s\n",
|
|
|
- invalid_uri_chars, data);
|
|
|
+ invalid_uri_chars, fquri);
|
|
|
|
|
|
return LSUP_VALUE_ERR;
|
|
|
}
|
|
|
|
|
|
- if (regexec (LSUP_uri_ptn, data, 0, NULL, 0) != 0) {
|
|
|
+
|
|
|
+ regmatch_t matches[11];
|
|
|
+ if (UNLIKELY (regexec (LSUP_uri_ptn, fquri, 11, matches, 0) != 0)) {
|
|
|
fprintf (stderr, "Error matching URI pattern.\n");
|
|
|
|
|
|
return LSUP_VALUE_ERR;
|
|
|
}
|
|
|
- }
|
|
|
+ if (term->type == LSUP_TERM_NS_IRIREF) free (fquri);
|
|
|
+
|
|
|
+ MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
|
|
|
|
|
|
- data_tmp = realloc (term->data, strlen (data) + 1);
|
|
|
- if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
|
|
|
+ term->iri_info->prefix = matches[1];
|
|
|
+ term->iri_info->path = matches[5];
|
|
|
+ term->iri_info->frag = matches[10];
|
|
|
+ term->iri_info->nsm = metadata;
|
|
|
+ }
|
|
|
|
|
|
- strcpy (data_tmp, data);
|
|
|
+ term->data = strdup (data);
|
|
|
|
|
|
} else {
|
|
|
|
|
@@ -151,41 +242,73 @@ LSUP_term_init(
|
|
|
uuid_unparse_lower (uuid, uuid_str);
|
|
|
|
|
|
if (type == LSUP_TERM_IRIREF) {
|
|
|
- data_tmp = realloc (term->data, UUID4_URN_SIZE);
|
|
|
- if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
|
|
|
- snprintf (data_tmp, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
|
|
|
- term->data = data_tmp;
|
|
|
- } else {
|
|
|
- data_tmp = realloc (term->data, sizeof(uuid_str));
|
|
|
- strcpy(data_tmp, uuid_str);
|
|
|
- }
|
|
|
+ term->data = malloc (UUID4_URN_SIZE);
|
|
|
+ snprintf (
|
|
|
+ term->data, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
|
|
|
+
|
|
|
+ MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
|
|
|
+
|
|
|
+
|
|
|
+ term->iri_info->prefix.rm_so = 0;
|
|
|
+ term->iri_info->prefix.rm_eo = 4;
|
|
|
+ term->iri_info->path.rm_so = 4;
|
|
|
+ term->iri_info->path.rm_eo = UUIDSTR_SIZE + 6;
|
|
|
+ term->iri_info->frag.rm_so = -1;
|
|
|
+ term->iri_info->frag.rm_eo = -1;
|
|
|
+ term->iri_info->nsm = NULL;
|
|
|
+
|
|
|
+ } else term->data = strdup (uuid_str);
|
|
|
} else {
|
|
|
log_error ("No data provided for term.");
|
|
|
return LSUP_VALUE_ERR;
|
|
|
}
|
|
|
}
|
|
|
- term->data = data_tmp;
|
|
|
|
|
|
if (term->type == LSUP_TERM_LT_LITERAL) {
|
|
|
-
|
|
|
- strncpy (term->lang, metadata, sizeof (term->lang) - 1);
|
|
|
- term->lang[7] = '\0';
|
|
|
+ if (!metadata) {
|
|
|
+ log_warn ("Lang tag is NULL. Creating a non-tagged literal.");
|
|
|
+ term->type = LSUP_TERM_LITERAL;
|
|
|
+ } else {
|
|
|
+ char *lang_str = (char *) metadata;
|
|
|
+ log_trace("Lang string: %s", lang_str);
|
|
|
+
|
|
|
+ strncpy(term->lang, lang_str, sizeof (term->lang) - 1);
|
|
|
+ if (strlen (term->lang) < 1) {
|
|
|
+ log_error ("Lang tag cannot be an empty string.");
|
|
|
+ return LSUP_VALUE_ERR;
|
|
|
+ }
|
|
|
+ term->lang[7] = '\0';
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- } else if (term->type == LSUP_TERM_LITERAL) {
|
|
|
- log_trace ("Storing data type.");
|
|
|
- if (metadata && strcmp (metadata, DEFAULT_DTYPE) != 0) {
|
|
|
- LSUP_Term *dtype = LSUP_uri_new ((char *) metadata);
|
|
|
- term->datatype = LSUP_term_hash (dtype);
|
|
|
+ if (term->type == LSUP_TERM_LITERAL) {
|
|
|
+ term->datatype = metadata;
|
|
|
+ if (! term->datatype) term->datatype = LSUP_default_datatype;
|
|
|
+ log_trace ("Storing data type: %s", term->datatype->data);
|
|
|
|
|
|
- if (LSUP_tcache_get (term->datatype) == NULL)
|
|
|
- LSUP_tcache_add (term->datatype, dtype);
|
|
|
+ if (! LSUP_IS_IRI (term->datatype )) {
|
|
|
+ log_error (
|
|
|
+ "Literal data tpe is not a IRI: %s",
|
|
|
+ term->datatype ->data);
|
|
|
|
|
|
- else LSUP_term_free (dtype);
|
|
|
+ return LSUP_VALUE_ERR;
|
|
|
+ }
|
|
|
|
|
|
- } else term->datatype = LSUP_default_dtype_key;
|
|
|
+ if (term->datatype != LSUP_default_datatype) {
|
|
|
+ uint32_t dtype_hash = LSUP_term_hash (term->datatype );
|
|
|
|
|
|
-
|
|
|
- } else {
|
|
|
+ LSUP_Term *tmp = (LSUP_Term *) LSUP_tcache_get (dtype_hash);
|
|
|
+ if (!tmp) LSUP_tcache_add (dtype_hash, term->datatype);
|
|
|
+ else if (term->datatype != tmp) {
|
|
|
+ free (term->datatype);
|
|
|
+ term->datatype = tmp;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ log_trace ("Datatype address: %p", term->datatype);
|
|
|
+ log_trace ("Datatype hash: %lu", LSUP_term_hash (term->datatype));
|
|
|
+
|
|
|
+ } else if (term->type == LSUP_TERM_BNODE) {
|
|
|
|
|
|
term->bnode_id = XXH64 (
|
|
|
term->data, strlen (term->data) + 1, HASH_SEED);
|
|
@@ -211,13 +334,40 @@ LSUP_term_hash (const LSUP_Term *term)
|
|
|
}
|
|
|
|
|
|
|
|
|
+
|
|
|
bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
|
|
|
{
|
|
|
- if (term1->type != term2->type)
|
|
|
- return false;
|
|
|
+ LSUP_TermType type1, type2;
|
|
|
+ char *data1, *data2;
|
|
|
+
|
|
|
+
|
|
|
+ if (term1->type == LSUP_TERM_NS_IRIREF) {
|
|
|
+ type1 = LSUP_TERM_IRIREF;
|
|
|
+ if (UNLIKELY (LSUP_nsmap_normalize_uri (
|
|
|
+ term1->iri_info->nsm, term1->data, &data1
|
|
|
+ ) != LSUP_OK)) return LSUP_ERROR;
|
|
|
+ } else {
|
|
|
+ type1 = term1->type;
|
|
|
+ data1 = term1->data;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (term2->type == LSUP_TERM_NS_IRIREF) {
|
|
|
+ type2 = LSUP_TERM_IRIREF;
|
|
|
+ if (UNLIKELY (LSUP_nsmap_normalize_uri (
|
|
|
+ term2->iri_info->nsm, term2->data, &data2
|
|
|
+ ) != LSUP_OK)) return LSUP_ERROR;
|
|
|
+ } else {
|
|
|
+ type2 = term2->type;
|
|
|
+ data2 = term2->data;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (type1 != type2) return false;
|
|
|
|
|
|
- if (strcmp (term1->data, term2->data) != 0)
|
|
|
- return false;
|
|
|
+ int cmp = strcmp (data1, data2);
|
|
|
+ if (term1->type == LSUP_TERM_NS_IRIREF) free (data1);
|
|
|
+ if (term2->type == LSUP_TERM_NS_IRIREF) free (data2);
|
|
|
+
|
|
|
+ if (cmp != 0) return false;
|
|
|
|
|
|
if (term1->type == LSUP_TERM_LITERAL)
|
|
|
return term1->datatype == term2->datatype;
|
|
@@ -227,6 +377,7 @@ bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
|
|
|
|
|
|
return true;
|
|
|
}
|
|
|
+*/
|
|
|
|
|
|
|
|
|
void LSUP_term_done (LSUP_Term *term)
|
|
@@ -245,6 +396,66 @@ void LSUP_term_free (LSUP_Term *term)
|
|
|
}
|
|
|
|
|
|
|
|
|
+LSUP_NSMap *
|
|
|
+LSUP_iriref_nsm (LSUP_Term *iri)
|
|
|
+{
|
|
|
+ if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
|
|
|
+ log_error ("Term is not a IRI ref type.");
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ return iri->iri_info->nsm;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+char *
|
|
|
+LSUP_iriref_prefix (LSUP_Term *iri)
|
|
|
+{
|
|
|
+ if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
|
|
|
+ log_error ("Term is not a IRI ref type.");
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (iri->iri_info->prefix.rm_so == -1) return NULL;
|
|
|
+
|
|
|
+ size_t len = iri->iri_info->prefix.rm_eo - iri->iri_info->prefix.rm_so;
|
|
|
+
|
|
|
+ return strndup (iri->data + iri->iri_info->prefix.rm_so, len);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+char *
|
|
|
+LSUP_iriref_path (LSUP_Term *iri)
|
|
|
+{
|
|
|
+ if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
|
|
|
+ log_error ("Term is not a IRI ref type.");
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (iri->iri_info->path.rm_so == -1) return NULL;
|
|
|
+
|
|
|
+ size_t len = iri->iri_info->path.rm_eo - iri->iri_info->path.rm_so;
|
|
|
+
|
|
|
+ return strndup (iri->data + iri->iri_info->path.rm_so, len);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+char *
|
|
|
+LSUP_iriref_frag (LSUP_Term *iri)
|
|
|
+{
|
|
|
+ if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
|
|
|
+ log_error ("Term is not a IRI ref type.");
|
|
|
+ return NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (iri->iri_info->frag.rm_so == -1) return NULL;
|
|
|
+
|
|
|
+ size_t len = iri->iri_info->frag.rm_eo - iri->iri_info->frag.rm_so;
|
|
|
+
|
|
|
+ return strndup (iri->data + iri->iri_info->frag.rm_so, len);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
|
|
|
* Triple API.
|
|
|
*/
|
|
@@ -359,10 +570,15 @@ LSUP_tcache_get (const LSUP_Key key)
|
|
|
}
|
|
|
|
|
|
|
|
|
-
|
|
|
+
|
|
|
+ * Extern inline functions.
|
|
|
+ */
|
|
|
|
|
|
LSUP_Key LSUP_term_hash (const LSUP_Term *term);
|
|
|
-LSUP_Term *LSUP_uri_new (const char *data);
|
|
|
-LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data);
|
|
|
+LSUP_Term *LSUP_iriref_new (const char *data, LSUP_NSMap *nsm);
|
|
|
+LSUP_Term *LSUP_literal_new (const char *data, LSUP_Term *datatype);
|
|
|
+LSUP_Term *LSUP_lt_literal_new (const char *data, char *lang);
|
|
|
+LSUP_Term *LSUP_bnode_new (const char *data);
|
|
|
+bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
|
|
|
LSUP_Term *LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n);
|
|
|
LSUP_Key LSUP_triple_hash (const LSUP_Triple *trp);
|