|
@@ -1,31 +1,36 @@
|
|
|
+#include "tpl.h"
|
|
|
+
|
|
|
#include "term.h"
|
|
|
|
|
|
-// URI parsing regular expression. Conforms to RFC3986.
|
|
|
-#define URI_REGEX_STR \
|
|
|
- "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
|
|
|
-#define NLEN(str) (str) == NULL ? 0 : strlen ((str))
|
|
|
-#define INVALID_URI_CHARS "<>\" {}|\\^`"
|
|
|
+/*
|
|
|
+ * tpl packing format for the term structure.
|
|
|
+ */
|
|
|
+#define TERM_PACK_FMT "S(suc)"
|
|
|
|
|
|
+/*
|
|
|
+ * Extern variables.
|
|
|
+ */
|
|
|
|
|
|
-static regex_t ptn;
|
|
|
-static bool ptn_init = false;
|
|
|
+IDCache *LSUP_id_cache = NULL;
|
|
|
+uint32_t LSUP_default_dtype_key = 0;
|
|
|
+regex_t *LSUP_uri_ptn;
|
|
|
|
|
|
|
|
|
-/* Global inline prototypes. */
|
|
|
+/*
|
|
|
+ * Static variables.
|
|
|
+ */
|
|
|
|
|
|
-LSUP_Term *LSUP_uri_new (const char *data);
|
|
|
-LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data);
|
|
|
+// Characters not allowed in a URI string.
|
|
|
+static const char *invalid_uri_chars = "<>\" {}|\\^`";
|
|
|
|
|
|
|
|
|
-/**
|
|
|
- * Free global regex struct. Register with atexit().
|
|
|
+/*
|
|
|
+ * API functions.
|
|
|
*/
|
|
|
-void term_cleanup() { if (ptn_init) regfree (&ptn); }
|
|
|
-
|
|
|
|
|
|
LSUP_Term *
|
|
|
LSUP_term_new (
|
|
|
- LSUP_term_type type, const char *data, char *datatype, char *lang)
|
|
|
+ LSUP_TermType type, const char *data, const char *metadata)
|
|
|
{
|
|
|
LSUP_Term *term;
|
|
|
CALLOC_GUARD (term, NULL);
|
|
@@ -34,7 +39,7 @@ LSUP_term_new (
|
|
|
if (type == LSUP_TERM_UNDEFINED) term->type = type;
|
|
|
|
|
|
else if (UNLIKELY (LSUP_term_init (
|
|
|
- term, type, data, datatype, lang) != LSUP_OK)) {
|
|
|
+ term, type, data, metadata) != LSUP_OK)) {
|
|
|
free (term);
|
|
|
return NULL;
|
|
|
}
|
|
@@ -46,26 +51,44 @@ LSUP_term_new (
|
|
|
LSUP_Term *
|
|
|
LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
|
|
|
{
|
|
|
+ if (UNLIKELY (!sterm)) return NULL;
|
|
|
+
|
|
|
LSUP_Term *term;
|
|
|
- CALLOC_GUARD (term, NULL);
|
|
|
+ MALLOC_GUARD (term, NULL);
|
|
|
|
|
|
- if (UNLIKELY (LSUP_term_deserialize (sterm, term) != LSUP_OK)) {
|
|
|
- free (term);
|
|
|
- return NULL;
|
|
|
- }
|
|
|
+ tpl_node *tn;
|
|
|
+
|
|
|
+ tn = tpl_map (TERM_PACK_FMT, term);
|
|
|
+ if (UNLIKELY (!tn)) goto fail;
|
|
|
+
|
|
|
+ if (UNLIKELY (tpl_load (tn, TPL_MEM, sterm->addr, sterm->size) < 0))
|
|
|
+ goto fail;
|
|
|
+
|
|
|
+ if (UNLIKELY (tpl_unpack (tn, 0) < 0)) goto fail;
|
|
|
+
|
|
|
+ tpl_free (tn);
|
|
|
|
|
|
return term;
|
|
|
+
|
|
|
+fail:
|
|
|
+ tpl_free (tn);
|
|
|
+ free (term);
|
|
|
+
|
|
|
+ return NULL;
|
|
|
}
|
|
|
|
|
|
|
|
|
LSUP_Buffer *
|
|
|
LSUP_buffer_new_from_term (const LSUP_Term *term)
|
|
|
{
|
|
|
+ if (UNLIKELY (!term)) return NULL;
|
|
|
+
|
|
|
LSUP_Buffer *sterm;
|
|
|
- CALLOC_GUARD (sterm, NULL);
|
|
|
- sterm->addr = NULL;
|
|
|
+ MALLOC_GUARD (sterm, NULL);
|
|
|
|
|
|
- if (LSUP_term_serialize (term, sterm) != LSUP_OK) {
|
|
|
+ int rc = tpl_jot (
|
|
|
+ TPL_MEM, &sterm->addr, &sterm->size, TERM_PACK_FMT, term);
|
|
|
+ if (rc != 0) {
|
|
|
free (sterm);
|
|
|
return NULL;
|
|
|
}
|
|
@@ -76,32 +99,24 @@ LSUP_buffer_new_from_term (const LSUP_Term *term)
|
|
|
|
|
|
LSUP_rc
|
|
|
LSUP_term_init(
|
|
|
- LSUP_Term *term, LSUP_term_type type,
|
|
|
- const char *data, char *datatype, char *lang)
|
|
|
+ LSUP_Term *term, LSUP_TermType type,
|
|
|
+ const char *data, const char *metadata)
|
|
|
{
|
|
|
// This can never be LSUP_TERM_UNDEFINED.
|
|
|
if (!data) return LSUP_VALUE_ERR;
|
|
|
term->type = type;
|
|
|
|
|
|
// Validate URI.
|
|
|
- if (term->type == LSUP_TERM_URI) {
|
|
|
- // TODO Cheap fix. Should url-encode all invalid chars.
|
|
|
- if (strpbrk (data, INVALID_URI_CHARS) != NULL) {
|
|
|
- fprintf (
|
|
|
- stderr, "Characters %s are not allowed. Got: %s\n",
|
|
|
- INVALID_URI_CHARS, data);
|
|
|
+ if (term->type == LSUP_TERM_IRIREF) {
|
|
|
+ if (strpbrk (data, invalid_uri_chars) != NULL) {
|
|
|
+ log_error (
|
|
|
+ "Characters %s are not allowed. Got: %s\n",
|
|
|
+ invalid_uri_chars, data);
|
|
|
|
|
|
return LSUP_VALUE_ERR;
|
|
|
}
|
|
|
|
|
|
- if (UNLIKELY (!ptn_init)) {
|
|
|
- int rc = regcomp (&ptn, URI_REGEX_STR, REG_EXTENDED);
|
|
|
- if (rc != 0) return LSUP_ERROR;
|
|
|
- ptn_init = true;
|
|
|
- atexit (term_cleanup);
|
|
|
- }
|
|
|
-
|
|
|
- if (regexec (&ptn, data, 0, NULL, 0) != 0) {
|
|
|
+ if (regexec (LSUP_uri_ptn, data, 0, NULL, 0) != 0) {
|
|
|
fprintf (stderr, "Error matching URI pattern.\n");
|
|
|
|
|
|
return LSUP_VALUE_ERR;
|
|
@@ -113,107 +128,13 @@ LSUP_term_init(
|
|
|
term->data = data_tmp;
|
|
|
strcpy (term->data, data);
|
|
|
|
|
|
- if (term->type == LSUP_TERM_LITERAL && !datatype)
|
|
|
- datatype = DEFAULT_DTYPE;
|
|
|
-
|
|
|
- if (datatype) {
|
|
|
- data_tmp = realloc (term->datatype, strlen (datatype) + 1);
|
|
|
- if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
|
|
|
- term->datatype = data_tmp;
|
|
|
- strcpy (term->datatype, datatype);
|
|
|
- } else {
|
|
|
- free (term->datatype);
|
|
|
- term->datatype = NULL;
|
|
|
- }
|
|
|
- if (lang) {
|
|
|
- // TODO validate language and country code
|
|
|
- //char lsize = 5 ? lang[2] == "-" : 2;
|
|
|
- memcpy (term->lang, lang, LANG_SIZE);
|
|
|
- } else {
|
|
|
- memset (term->lang, 0, LANG_SIZE);
|
|
|
- }
|
|
|
-
|
|
|
- return LSUP_OK;
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-/*
|
|
|
- * This function allocates and returns the following byte sequence:
|
|
|
- *
|
|
|
- * - `sizeof (char)` bytes for the term type;
|
|
|
- * - `LANG_SIZE` bytes for the language tag;
|
|
|
- * - Arbitrary bytes with NUL-terminated strings for data and datatype.
|
|
|
- *
|
|
|
- * The index for `data` is consistently `LANG_SIZE + sizeof (char)`. The
|
|
|
- * index for `datatype` is found by the terminating NULL for `data`.
|
|
|
- *
|
|
|
- * Serialized representations of some RDF terms:
|
|
|
- *
|
|
|
- * <http://hello.org>
|
|
|
- *
|
|
|
- * 0 1 size=19
|
|
|
- * | \x01 | http://hello.org\x00 |
|
|
|
- * type data
|
|
|
- *
|
|
|
- * "hello"
|
|
|
- *
|
|
|
- * 0 1 size=7
|
|
|
- * | \x03 | hello\x00 |
|
|
|
- * type data
|
|
|
- *
|
|
|
- * "hello"^^xsd:string
|
|
|
- *
|
|
|
- * 0 1 7 size=18
|
|
|
- * | \x03 | hello\x00 | xsd:string\x00 |
|
|
|
- * type data datatype
|
|
|
- *
|
|
|
- * (note: the "xsd:" prefix is used for simplification here, it would be
|
|
|
- * normally be a fully qualified URI)
|
|
|
- *
|
|
|
- * "hello"@en-US
|
|
|
- *
|
|
|
- * 0 1 7 18 size=24
|
|
|
- * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00 |
|
|
|
- * type data datatype lang
|
|
|
- */
|
|
|
-LSUP_rc
|
|
|
-LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm)
|
|
|
-{
|
|
|
- size_t size, data_len, datatype_len = 0,
|
|
|
- data_idx = 1, datatype_idx = 0, lang_idx = 0;
|
|
|
-
|
|
|
- if (UNLIKELY (term == NULL)) return LSUP_NOACTION;
|
|
|
+ if (term->type == LSUP_TERM_LT_LITERAL) {
|
|
|
+ term->lang = XXH64 (metadata, strlen (metadata) + 1, HASH_SEED);
|
|
|
+ LSUP_tcache_add_id (term->lang, metadata);
|
|
|
|
|
|
- data_len = strlen (term->data) + 1;
|
|
|
-
|
|
|
- size = data_idx + data_len;
|
|
|
-
|
|
|
- if (term->datatype != NULL) {
|
|
|
- datatype_idx = size;
|
|
|
- datatype_len = strlen (term->datatype) + 1;
|
|
|
- size += datatype_len;
|
|
|
-
|
|
|
- if (strlen (term->lang) > 0) {
|
|
|
- lang_idx = size;
|
|
|
- size += strlen (term->lang) + 1;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- //log_debug ("Serialized term size: %lu", size);
|
|
|
- LSUP_buffer_init (sterm, size, NULL);
|
|
|
-
|
|
|
- // Copy type.
|
|
|
- memcpy (sterm->addr, &term->type, 1);
|
|
|
- // Copy data.
|
|
|
- memcpy (sterm->addr + data_idx, term->data, data_len);
|
|
|
-
|
|
|
- if (term->datatype != NULL) {
|
|
|
- // Copy data type.
|
|
|
- memcpy (sterm->addr + datatype_idx, term->datatype, datatype_len);
|
|
|
-
|
|
|
- // Copy lang tag.
|
|
|
- if (strlen (term->lang) > 0)
|
|
|
- strcpy (sterm->addr + lang_idx, term->lang);
|
|
|
+ } else if (metadata && strcmp (metadata, DEFAULT_DTYPE) != 0) {
|
|
|
+ term->datatype = XXH64 (metadata, strlen (metadata) + 1, HASH_SEED);
|
|
|
+ LSUP_tcache_add_id (term->datatype, metadata);
|
|
|
}
|
|
|
|
|
|
return LSUP_OK;
|
|
@@ -221,29 +142,22 @@ LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm)
|
|
|
|
|
|
|
|
|
LSUP_rc
|
|
|
-LSUP_term_deserialize (const LSUP_Buffer *sterm, LSUP_Term *term)
|
|
|
+LSUP_uri_init (LSUP_Term *term, const char *data)
|
|
|
{
|
|
|
- size_t cur;
|
|
|
- char *data, *datatype = NULL;
|
|
|
- langtag lang = "\00";
|
|
|
+ if (!data) {
|
|
|
+ uuid_t uuid;
|
|
|
+ uuid_generate_random (uuid);
|
|
|
|
|
|
- char type = ((char*)(sterm->addr))[0];
|
|
|
+ uuid_str_t uuid_str;
|
|
|
+ uuid_unparse_lower (uuid, uuid_str);
|
|
|
|
|
|
- cur = 1;
|
|
|
- data = (char*)sterm->addr + cur;
|
|
|
- cur += strlen (data) + 1;
|
|
|
+ char uri[UUIDSTR_SIZE + 10];
|
|
|
+ sprintf (uri, "urn:uuid4:%s", uuid_str);
|
|
|
|
|
|
- if (type == LSUP_TERM_LITERAL && cur < sterm->size) {
|
|
|
- datatype = (char*)sterm->addr + cur;
|
|
|
- cur += strlen (datatype) + 1;
|
|
|
- if (strlen (datatype) == 0)
|
|
|
- datatype = NULL;
|
|
|
-
|
|
|
- if (cur < sterm->size)
|
|
|
- strcpy (lang, sterm->addr + cur);
|
|
|
+ data = uri;
|
|
|
}
|
|
|
|
|
|
- return LSUP_term_init (term, type, data, datatype, lang);
|
|
|
+ return LSUP_term_init (term, LSUP_TERM_IRIREF, data, NULL);
|
|
|
}
|
|
|
|
|
|
|
|
@@ -255,23 +169,11 @@ bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
|
|
|
if (strcmp (term1->data, term2->data) != 0)
|
|
|
return false;
|
|
|
|
|
|
- if (term1->type == LSUP_TERM_LITERAL) {
|
|
|
- if ((term1->datatype == NULL) != (term2->datatype == NULL)) // XOR
|
|
|
- return false;
|
|
|
-
|
|
|
- if (
|
|
|
- term1->datatype != NULL &&
|
|
|
- strcmp (term1->datatype, term2->datatype) != 0)
|
|
|
- return false;
|
|
|
+ if (term1->type == LSUP_TERM_LITERAL)
|
|
|
+ return term1->datatype == term2->datatype;
|
|
|
|
|
|
- if ((term1->lang == NULL) != (term2->lang == NULL)) // XOR
|
|
|
- return false;
|
|
|
-
|
|
|
- if (
|
|
|
- term1->lang != NULL &&
|
|
|
- strcmp (term1->lang, term2->lang) != 0)
|
|
|
- return false;
|
|
|
- }
|
|
|
+ if (term1->type == LSUP_TERM_LT_LITERAL)
|
|
|
+ return term1->lang == term2->lang;
|
|
|
|
|
|
return true;
|
|
|
}
|
|
@@ -279,29 +181,52 @@ bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
|
|
|
|
|
|
void LSUP_term_done (LSUP_Term *term)
|
|
|
{
|
|
|
- if (LIKELY (term->data != NULL)) {
|
|
|
- free (term->data);
|
|
|
- term->data = NULL;
|
|
|
- }
|
|
|
-
|
|
|
- if (term->datatype != NULL) {
|
|
|
- free (term->datatype);
|
|
|
- term->datatype = NULL;
|
|
|
- }
|
|
|
+ free (term->data);
|
|
|
+ term->data = NULL;
|
|
|
}
|
|
|
|
|
|
|
|
|
void LSUP_term_free (LSUP_Term *term)
|
|
|
{
|
|
|
if (LIKELY (term != NULL)) {
|
|
|
- LSUP_term_done (term);
|
|
|
+ free (term->data);
|
|
|
free (term);
|
|
|
- term = NULL;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
+LSUP_rc
|
|
|
+LSUP_tcache_add_id (const uint32_t key, const char *data)
|
|
|
+{
|
|
|
+ struct id_cache_t *entry;
|
|
|
+
|
|
|
+ HASH_FIND_INT (LSUP_id_cache, &key, entry);
|
|
|
+ if (entry) return LSUP_NOACTION;
|
|
|
+
|
|
|
+ MALLOC_GUARD (entry, LSUP_MEM_ERR);
|
|
|
+ entry->key = key;
|
|
|
+ entry->data = strdup (data);
|
|
|
+ HASH_ADD_INT (LSUP_id_cache, key, entry);
|
|
|
+
|
|
|
+ return LSUP_OK;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+const char *
|
|
|
+LSUP_tcache_get_id (const uint32_t key)
|
|
|
+{
|
|
|
+ struct id_cache_t *entry;
|
|
|
+
|
|
|
+ HASH_FIND_INT (LSUP_id_cache, &key, entry);
|
|
|
+ if (entry) log_trace ("Id found for key %d: %s", key, entry->data);
|
|
|
+ else log_trace ("No ID found for key %d.", key);
|
|
|
+
|
|
|
+ return (entry) ? entry->data : NULL;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
// Extern inline functions.
|
|
|
|
|
|
LSUP_Key LSUP_term_hash (const LSUP_Term *term);
|
|
|
-
|
|
|
+LSUP_Term *LSUP_uri_new (const char *data);
|
|
|
+LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data);
|