123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623 |
- #include "tpl.h"
- #include "term.h"
- /** @brief tpl packing format for a term.
- *
- * The pack elements are: 1. term type (char); 2. data (string); 3. void* type
- * metadata, cast to 8-byte unsigned.
- */
- #define TERM_PACK_FMT "csU"
- #define MAX_VALID_TERM_TYPE LSUP_TERM_BNODE /* For type validation. */
- /*
- * Data structures.
- */
- struct iri_info_t {
- LSUP_NSMap * nsm; // NSM handle for prefixed IRI.
- regmatch_t prefix; // Matching group #1.
- regmatch_t path; // Matching group #5.
- regmatch_t frag; // Matching group #10.
- };
- /*
- * Extern variables.
- */
- struct hashmap *LSUP_term_cache = NULL;
- uint32_t LSUP_default_dtype_key = 0;
- regex_t *LSUP_uri_ptn;
- LSUP_Term *LSUP_default_datatype = NULL;
- /*
- * Static variables.
- */
- // Characters not allowed in a URI string.
- static const char *invalid_uri_chars = "<>\" {}|\\^`";
- /*
- * Static prototypes.
- */
- static LSUP_rc
- term_init (
- LSUP_Term *term, LSUP_TermType type, const char *data, void *metadata);
- /*
- * Term API.
- */
- LSUP_Term *
- LSUP_term_new (
- LSUP_TermType type, const char *data, void *metadata)
- {
- LSUP_Term *term;
- CALLOC_GUARD (term, NULL);
- // If undefined, just set the type.
- if (type == LSUP_TERM_UNDEFINED) term->type = type;
- else if (UNLIKELY (term_init (
- term, type, data, metadata) != LSUP_OK)) {
- free (term);
- return NULL;
- }
- return term;
- }
- LSUP_Term *
- LSUP_term_copy (const LSUP_Term *src)
- {
- void *metadata = NULL;
- if (LSUP_IS_IRI (src))
- metadata = (void *) LSUP_iriref_nsm (src);
- else if (src->type == LSUP_TERM_LITERAL)
- metadata = (void *) src->datatype;
- else if (src->type == LSUP_TERM_LT_LITERAL) {
- metadata = (void *) src->lang;
- }
- return LSUP_term_new (src->type, src->data, metadata);
- }
- LSUP_Term *
- LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
- {
- if (UNLIKELY (!sterm)) return NULL;
- LSUP_Term *term = NULL;
- LSUP_TermType type = LSUP_TERM_UNDEFINED;
- char *data = NULL;
- void *metadata;
- tpl_node *tn;
- tn = tpl_map (TERM_PACK_FMT, &type, &data, &metadata);
- if (UNLIKELY (!tn)) goto finally;
- if (UNLIKELY (tpl_load (tn, TPL_MEM, sterm->addr, sterm->size) < 0)) {
- log_error ("Error loading serialized term.");
- goto finally;
- }
- if (UNLIKELY (tpl_unpack (tn, 0) < 0)) {
- log_error ("Error unpacking serialized term.");
- goto finally;
- }
- if (type == LSUP_TERM_LT_LITERAL)
- term = LSUP_lt_literal_new (data, (char *)&metadata);
- else term = LSUP_term_new (type, data, metadata);
- finally:
- tpl_free (tn);
- free (data);
- return term;
- }
- LSUP_Term *
- LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri)
- {
- if (! LSUP_IS_IRI (iri)) {
- log_error ("Provided path is not an IRI.");
- return NULL;
- }
- if (! LSUP_IS_IRI (root)) {
- log_error ("Provided root is not an IRI.");
- return NULL;
- }
- char *data, *pfx = LSUP_iriref_prefix (iri);
- if (pfx) data = iri->data;
- else if (iri->data[0] == '/') {
- free (pfx);
- pfx = LSUP_iriref_prefix (root);
- data = malloc (strlen (iri->data) + strlen (pfx) + 1);
- if (!data) return NULL;
- sprintf (data, "%s%s", pfx, iri->data);
- } else {
- data = malloc (strlen (iri->data) + strlen (root->data) + 1);
- if (!data) return NULL;
- sprintf (data, "%s%s", root->data, iri->data);
- }
- free (pfx);
- LSUP_Term *ret = LSUP_iriref_new (data, NULL);
- if (data != iri->data) free (data);
- return ret;
- }
- LSUP_Term *
- LSUP_iriref_relative (const LSUP_Term *root, const LSUP_Term *iri)
- {
- if (! LSUP_IS_IRI (iri)) {
- log_error ("Provided path is not an IRI.");
- return NULL;
- }
- if (! LSUP_IS_IRI (root)) {
- log_error ("Provided root is not an IRI.");
- return NULL;
- }
- size_t offset = (
- strstr (iri->data, root->data) == iri->data ?
- strlen (root->data) : 0);
- return LSUP_iriref_new (iri->data + offset, LSUP_iriref_nsm (iri));
- }
- LSUP_Buffer *
- LSUP_term_serialize (const LSUP_Term *term)
- {
- /*
- * In serializing a term, the fact that two terms of different types may
- * be semantically identical must be taken into account. Specifically, a
- * namespace-prefixed IRI ref is identical to its fully qualified version,
- * and a LSUP_TERM_LT_LITERAL with no language tag is identical to a
- * LSUP_TERM_LITERAL of xsd:string type, made up of the same string. Such
- * terms must have identical serializations.
- */
- if (UNLIKELY (!term)) return NULL;
- LSUP_Term *tmp_term;
- void *metadata = NULL;
- if (term->type == LSUP_TERM_NS_IRIREF) {
- // For IRI refs, simply serialize the FQ version of the term.
- char *fq_uri;
- if (LSUP_nsmap_normalize_uri (
- term->iri_info->nsm, term->data, &fq_uri
- ) != LSUP_OK) return NULL;
- tmp_term = LSUP_iriref_new (fq_uri, NULL);
- free (fq_uri);
- } else if (term->type == LSUP_TERM_LT_LITERAL) {
- // For LT literals with empty lang tag, convert to a normal xsd:string.
- if (strlen (term->lang) == 0)
- tmp_term = LSUP_literal_new (term->data, NULL);
- else tmp_term = LSUP_lt_literal_new (term->data, (char *) term->lang);
- } else tmp_term = LSUP_term_new (
- term->type, term->data, (void *) term->datatype);
- // "datatype" can be anything here since it's cast to void *.
- // metadata field is ignored for IRI ref.
- if (tmp_term->type == LSUP_TERM_LITERAL)
- metadata = tmp_term->datatype;
- else if (tmp_term->type == LSUP_TERM_LT_LITERAL)
- memcpy (&metadata, tmp_term->lang, sizeof (metadata));
- LSUP_Buffer *sterm;
- MALLOC_GUARD (sterm, NULL);
- int rc = tpl_jot (
- TPL_MEM, &sterm->addr, &sterm->size, TERM_PACK_FMT,
- &tmp_term->type, &tmp_term->data, &metadata);
- LSUP_term_free (tmp_term);
- if (rc != 0) {
- LSUP_buffer_free (sterm);
- return NULL;
- }
- return sterm;
- }
- LSUP_Key
- LSUP_term_hash (const LSUP_Term *term)
- {
- LSUP_Buffer *buf;
- if (UNLIKELY (!term)) buf = BUF_DUMMY;
- else buf = LSUP_term_serialize (term);
- LSUP_Key key = LSUP_buffer_hash (buf);
- LSUP_buffer_free (buf);
- return key;
- }
- void
- LSUP_term_free (LSUP_Term *term)
- {
- if (UNLIKELY (!term)) return;
- if (LSUP_IS_IRI (term)) free (term->iri_info);
- free (term->data);
- free (term);
- }
- LSUP_NSMap *
- LSUP_iriref_nsm (const LSUP_Term *iri)
- {
- if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
- log_error ("Term is not a IRI ref type.");
- return NULL;
- }
- return iri->iri_info->nsm;
- }
- char *
- LSUP_iriref_prefix (const LSUP_Term *iri)
- {
- if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
- log_error ("Term is not a IRI ref type.");
- return NULL;
- }
- if (iri->iri_info->prefix.rm_so == -1) return NULL;
- size_t len = iri->iri_info->prefix.rm_eo - iri->iri_info->prefix.rm_so;
- if (len == 0) return NULL;
- return strndup (iri->data + iri->iri_info->prefix.rm_so, len);
- }
- char *
- LSUP_iriref_path (const LSUP_Term *iri)
- {
- if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
- log_error ("Term is not a IRI ref type.");
- return NULL;
- }
- if (iri->iri_info->path.rm_so == -1) return NULL;
- size_t len = iri->iri_info->path.rm_eo - iri->iri_info->path.rm_so;
- if (len == 0) return NULL;
- return strndup (iri->data + iri->iri_info->path.rm_so, len);
- }
- char *
- LSUP_iriref_frag (const LSUP_Term *iri)
- {
- if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) {
- log_error ("Term is not a IRI ref type.");
- return NULL;
- }
- if (iri->iri_info->frag.rm_so == -1) return NULL;
- size_t len = iri->iri_info->frag.rm_eo - iri->iri_info->frag.rm_so;
- return strndup (iri->data + iri->iri_info->frag.rm_so, len);
- }
- /*
- * Triple API.
- */
- LSUP_Triple *
- LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o)
- {
- LSUP_Triple *spo = malloc (sizeof (*spo));
- if (!spo) return NULL;
- if (UNLIKELY (LSUP_triple_init (spo, s, p, o))) {
- free (spo);
- return NULL;
- }
- return spo;
- }
- LSUP_Triple *
- LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo)
- {
- LSUP_Triple *spo = malloc (sizeof (*spo));
- if (!spo) return NULL;
- spo->s = LSUP_term_new_from_buffer (sspo->s);
- spo->p = LSUP_term_new_from_buffer (sspo->p);
- spo->o = LSUP_term_new_from_buffer (sspo->o);
- return spo;
- }
- LSUP_BufferTriple *
- LSUP_triple_serialize (const LSUP_Triple *spo)
- {
- LSUP_BufferTriple *sspo = malloc (sizeof (*sspo));
- if (!sspo) return NULL;
- sspo->s = LSUP_term_serialize (spo->s);
- sspo->p = LSUP_term_serialize (spo->p);
- sspo->o = LSUP_term_serialize (spo->o);
- return sspo;
- }
- LSUP_rc
- LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o)
- {
- /* FIXME TRP_DUMMY is a problem here.
- if (! LSUP_IS_IRI (s) && s->type != LSUP_TERM_BNODE) {
- log_error ("Subject is not of a valid term type: %d", s->type);
- return LSUP_VALUE_ERR;
- }
- if (! LSUP_IS_IRI (p)) {
- log_error ("Predicate is not of a valid term type: %d", p->type);
- return LSUP_VALUE_ERR;
- }
- */
- spo->s = s;
- spo->p = p;
- spo->o = o;
- return LSUP_OK;
- }
- void
- LSUP_triple_done (LSUP_Triple *spo)
- {
- if (UNLIKELY (!spo)) return;
- LSUP_term_free (spo->s);
- LSUP_term_free (spo->p);
- LSUP_term_free (spo->o);
- }
- void
- LSUP_triple_free (LSUP_Triple *spo)
- {
- if (UNLIKELY (!spo)) return;
- LSUP_term_free (spo->s);
- LSUP_term_free (spo->p);
- LSUP_term_free (spo->o);
- free (spo);
- }
- LSUP_rc
- LSUP_tcache_add (const LSUP_Key key, const LSUP_Term *term)
- {
- LSUP_KeyedTerm entry_s = {.key=key, .term=(LSUP_Term *)term};
- // Many calls will likely attempt inserting duplicates after the first one.
- if (LIKELY (hashmap_get (LSUP_term_cache, &entry_s))) return LSUP_NOACTION;
- hashmap_set (LSUP_term_cache, &entry_s);
- return LSUP_OK;
- }
- const LSUP_Term *
- LSUP_tcache_get (LSUP_Key key)
- {
- LSUP_KeyedTerm *entry = hashmap_get (
- LSUP_term_cache, &(LSUP_KeyedTerm){.key=key});
- if (entry) log_trace ("ID found for key %lx: %s", key, entry->term->data);
- else log_trace ("No ID found for key %lx.", key);
- return (entry) ? entry->term : NULL;
- }
- /*
- * Static functions.
- */
- static LSUP_rc
- term_init (
- LSUP_Term *term, LSUP_TermType type,
- const char *data, void *metadata)
- {
- if (UNLIKELY (!LSUP_uri_ptn)) {
- log_error ("Environment not initialized. Did you call LSUP_init()?");
- return LSUP_ERROR;
- }
- // This can never be LSUP_TERM_UNDEFINED.
- if (type == LSUP_TERM_UNDEFINED) {
- log_error ("%d is not a valid term type.", type);
- return LSUP_VALUE_ERR;
- }
- term->type = type;
- if (data) {
- // Validate IRI.
- if (LSUP_IS_IRI (term)) {
- char *fquri;
- // Find fully qualified IRI to parse.
- if (term->type == LSUP_TERM_NS_IRIREF) {
- if (LSUP_nsmap_normalize_uri (
- metadata, data, &fquri) != LSUP_OK
- ) {
- log_error ("Error normalizing IRI data.");
- return LSUP_VALUE_ERR;
- }
- log_debug ("Fully qualified IRI: %s", fquri);
- } else fquri = (char *) data;
- if (strpbrk (fquri, invalid_uri_chars) != NULL) {
- log_warn (
- "Characters %s are not valid in a URI. Got: %s\n",
- invalid_uri_chars, fquri);
- #if 0
- // TODO This causes W3C TTL test #29 to fail. Remove?
- return LSUP_VALUE_ERR;
- #endif
- }
- // Capture interesting IRI parts.
- regmatch_t matches[11];
- if (UNLIKELY (regexec (LSUP_uri_ptn, fquri, 11, matches, 0) != 0)) {
- fprintf (stderr, "Error matching URI pattern.\n");
- return LSUP_VALUE_ERR;
- }
- if (term->type == LSUP_TERM_NS_IRIREF) free (fquri);
- MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
- term->iri_info->prefix = matches[1];
- term->iri_info->path = matches[5];
- term->iri_info->frag = matches[10];
- term->iri_info->nsm = metadata;
- }
- term->data = strdup (data);
- } else {
- // No data. Make up a random UUID or URI if allowed.
- if (type == LSUP_TERM_IRIREF || type == LSUP_TERM_BNODE) {
- uuid_t uuid;
- uuid_generate_random (uuid);
- uuid_str_t uuid_str;
- uuid_unparse_lower (uuid, uuid_str);
- if (type == LSUP_TERM_IRIREF) {
- term->data = malloc (UUID4_URN_SIZE);
- snprintf (
- term->data, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
- MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR);
- // Allocate IRI match patterns manually.
- term->iri_info->prefix.rm_so = 0;
- term->iri_info->prefix.rm_eo = 4;
- term->iri_info->path.rm_so = 4;
- term->iri_info->path.rm_eo = UUIDSTR_SIZE + 6;
- term->iri_info->frag.rm_so = -1;
- term->iri_info->frag.rm_eo = -1;
- term->iri_info->nsm = NULL;
- } else term->data = strdup (uuid_str);
- } else {
- log_error ("No data provided for term.");
- return LSUP_VALUE_ERR;
- }
- }
- if (term->type == LSUP_TERM_LT_LITERAL) {
- if (!metadata) {
- log_warn ("Lang tag is NULL. Creating a non-tagged literal.");
- term->type = LSUP_TERM_LITERAL;
- } else {
- char *lang_str = (char *) metadata;
- log_trace("Lang string: '%s'", lang_str);
- // Lang tags longer than 7 characters will be truncated.
- strncpy(term->lang, lang_str, sizeof (term->lang) - 1);
- if (strlen (term->lang) < 1) {
- log_error ("Lang tag cannot be an empty string.");
- return LSUP_VALUE_ERR;
- }
- term->lang[7] = '\0';
- }
- }
- if (term->type == LSUP_TERM_LITERAL) {
- term->datatype = metadata;
- if (! term->datatype) term->datatype = LSUP_default_datatype;
- log_trace ("Storing data type: %s", term->datatype->data);
- if (! LSUP_IS_IRI (term->datatype)) {
- log_error (
- "Literal data type is not an IRI: %s",
- term->datatype->data);
- return LSUP_VALUE_ERR;
- }
- uint32_t dtype_hash = LSUP_term_hash (term->datatype);
- const LSUP_Term *tmp = LSUP_tcache_get (dtype_hash);
- if (!tmp) LSUP_tcache_add (dtype_hash, term->datatype);
- else if (term->datatype != tmp) {
- if (term->datatype != LSUP_default_datatype)
- LSUP_term_free (term->datatype);
- term->datatype = (LSUP_Term *)tmp;
- }
- //log_trace ("Datatype address: %p", term->datatype);
- log_trace ("Datatype hash: %lx", LSUP_term_hash (term->datatype));
- } else if (term->type == LSUP_TERM_BNODE) {
- // TODO This is not usable for global skolemization.
- term->bnode_id = LSUP_HASH (
- term->data, strlen (term->data) + 1, LSUP_HASH_SEED);
- }
- return LSUP_OK;
- }
- /*
- * Extern inline functions.
- */
- LSUP_Key LSUP_term_hash (const LSUP_Term *term);
- LSUP_Term *LSUP_iriref_new (const char *data, LSUP_NSMap *nsm);
- LSUP_Term *LSUP_literal_new (const char *data, LSUP_Term *datatype);
- LSUP_Term *LSUP_lt_literal_new (const char *data, char *lang);
- LSUP_Term *LSUP_bnode_new (const char *data);
- bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
- LSUP_Term *LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n);
- LSUP_Key LSUP_triple_hash (const LSUP_Triple *trp);
|