#include "tpl.h" #include "term.h" /** @brief tpl packing format for a term. * * The pack elements are: 1. term type (char); 2. data (string); 3. void* type * metadata, cast to 8-byte unsigned. */ #define TERM_PACK_FMT "csU" #define MAX_VALID_TERM_TYPE LSUP_TERM_BNODE /* For type validation. */ /* * Data structures. */ struct iri_info_t { LSUP_NSMap * nsm; // NSM handle for prefixed IRI. regmatch_t prefix; // Matching group #1. regmatch_t path; // Matching group #5. regmatch_t frag; // Matching group #10. }; /* * Extern variables. */ struct hashmap *LSUP_term_cache = NULL; uint32_t LSUP_default_dtype_key = 0; regex_t *LSUP_uri_ptn; LSUP_Term *LSUP_default_datatype = NULL; /* * Static variables. */ // Characters not allowed in a URI string. static const char *invalid_uri_chars = "<>\" {}|\\^`"; /* * Static prototypes. */ static LSUP_rc term_init ( LSUP_Term *term, LSUP_TermType type, const char *data, void *metadata); /* * Term API. */ LSUP_Term * LSUP_term_new ( LSUP_TermType type, const char *data, void *metadata) { LSUP_Term *term; CALLOC_GUARD (term, NULL); // If undefined, just set the type. if (type == LSUP_TERM_UNDEFINED) term->type = type; else if (UNLIKELY (term_init ( term, type, data, metadata) != LSUP_OK)) { free (term); return NULL; } return term; } LSUP_Term * LSUP_term_copy (const LSUP_Term *src) { void *metadata = NULL; if (LSUP_IS_IRI (src)) metadata = (void *) LSUP_iriref_nsm (src); else if (src->type == LSUP_TERM_LITERAL) metadata = (void *) src->datatype; else if (src->type == LSUP_TERM_LT_LITERAL) { metadata = (void *) src->lang; } return LSUP_term_new (src->type, src->data, metadata); } LSUP_Term * LSUP_term_new_from_buffer (const LSUP_Buffer *sterm) { if (UNLIKELY (!sterm)) return NULL; LSUP_Term *term = NULL; LSUP_TermType type = LSUP_TERM_UNDEFINED; char *data = NULL; void *metadata; tpl_node *tn; tn = tpl_map (TERM_PACK_FMT, &type, &data, &metadata); if (UNLIKELY (!tn)) goto finally; if (UNLIKELY (tpl_load (tn, TPL_MEM, sterm->addr, sterm->size) < 0)) { log_error ("Error loading serialized term."); goto finally; } if (UNLIKELY (tpl_unpack (tn, 0) < 0)) { log_error ("Error unpacking serialized term."); goto finally; } if (type == LSUP_TERM_LT_LITERAL) term = LSUP_lt_literal_new (data, (char *)&metadata); else term = LSUP_term_new (type, data, metadata); finally: tpl_free (tn); free (data); return term; } LSUP_Term * LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri) { if (! LSUP_IS_IRI (iri)) { log_error ("Provided path is not an IRI."); return NULL; } if (! LSUP_IS_IRI (root)) { log_error ("Provided root is not an IRI."); return NULL; } char *data, *pfx = LSUP_iriref_prefix (iri); if (pfx) data = iri->data; else if (iri->data[0] == '/') { free (pfx); pfx = LSUP_iriref_prefix (root); data = malloc (strlen (iri->data) + strlen (pfx) + 1); if (!data) return NULL; sprintf (data, "%s%s", pfx, iri->data); } else { data = malloc (strlen (iri->data) + strlen (root->data) + 1); if (!data) return NULL; sprintf (data, "%s%s", root->data, iri->data); } free (pfx); LSUP_Term *ret = LSUP_iriref_new (data, NULL); if (data != iri->data) free (data); return ret; } LSUP_Term * LSUP_iriref_relative (const LSUP_Term *root, const LSUP_Term *iri) { if (! LSUP_IS_IRI (iri)) { log_error ("Provided path is not an IRI."); return NULL; } if (! LSUP_IS_IRI (root)) { log_error ("Provided root is not an IRI."); return NULL; } size_t offset = ( strstr (iri->data, root->data) == iri->data ? strlen (root->data) : 0); return LSUP_iriref_new (iri->data + offset, LSUP_iriref_nsm (iri)); } LSUP_Buffer * LSUP_term_serialize (const LSUP_Term *term) { /* * In serializing a term, the fact that two terms of different types may * be semantically identical must be taken into account. Specifically, a * namespace-prefixed IRI ref is identical to its fully qualified version, * and a LSUP_TERM_LT_LITERAL with no language tag is identical to a * LSUP_TERM_LITERAL of xsd:string type, made up of the same string. Such * terms must have identical serializations. */ if (UNLIKELY (!term)) return NULL; LSUP_Term *tmp_term; void *metadata = NULL; if (term->type == LSUP_TERM_NS_IRIREF) { // For IRI refs, simply serialize the FQ version of the term. char *fq_uri; if (LSUP_nsmap_normalize_uri ( term->iri_info->nsm, term->data, &fq_uri ) != LSUP_OK) return NULL; tmp_term = LSUP_iriref_new (fq_uri, NULL); free (fq_uri); } else if (term->type == LSUP_TERM_LT_LITERAL) { // For LT literals with empty lang tag, convert to a normal xsd:string. if (strlen (term->lang) == 0) tmp_term = LSUP_literal_new (term->data, NULL); else tmp_term = LSUP_lt_literal_new (term->data, (char *) term->lang); } else tmp_term = LSUP_term_new ( term->type, term->data, (void *) term->datatype); // "datatype" can be anything here since it's cast to void *. // metadata field is ignored for IRI ref. if (tmp_term->type == LSUP_TERM_LITERAL) metadata = tmp_term->datatype; else if (tmp_term->type == LSUP_TERM_LT_LITERAL) memcpy (&metadata, tmp_term->lang, sizeof (metadata)); LSUP_Buffer *sterm; MALLOC_GUARD (sterm, NULL); int rc = tpl_jot ( TPL_MEM, &sterm->addr, &sterm->size, TERM_PACK_FMT, &tmp_term->type, &tmp_term->data, &metadata); LSUP_term_free (tmp_term); if (rc != 0) { LSUP_buffer_free (sterm); return NULL; } return sterm; } LSUP_Key LSUP_term_hash (const LSUP_Term *term) { LSUP_Buffer *buf; if (UNLIKELY (!term)) buf = BUF_DUMMY; else buf = LSUP_term_serialize (term); LSUP_Key key = LSUP_buffer_hash (buf); LSUP_buffer_free (buf); return key; } void LSUP_term_free (LSUP_Term *term) { if (UNLIKELY (!term)) return; if (LSUP_IS_IRI (term)) free (term->iri_info); free (term->data); free (term); } LSUP_NSMap * LSUP_iriref_nsm (const LSUP_Term *iri) { if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) { log_error ("Term is not a IRI ref type."); return NULL; } return iri->iri_info->nsm; } char * LSUP_iriref_prefix (const LSUP_Term *iri) { if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) { log_error ("Term is not a IRI ref type."); return NULL; } if (iri->iri_info->prefix.rm_so == -1) return NULL; size_t len = iri->iri_info->prefix.rm_eo - iri->iri_info->prefix.rm_so; if (len == 0) return NULL; return strndup (iri->data + iri->iri_info->prefix.rm_so, len); } char * LSUP_iriref_path (const LSUP_Term *iri) { if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) { log_error ("Term is not a IRI ref type."); return NULL; } if (iri->iri_info->path.rm_so == -1) return NULL; size_t len = iri->iri_info->path.rm_eo - iri->iri_info->path.rm_so; if (len == 0) return NULL; return strndup (iri->data + iri->iri_info->path.rm_so, len); } char * LSUP_iriref_frag (const LSUP_Term *iri) { if (iri->type != LSUP_TERM_IRIREF && iri->type != LSUP_TERM_NS_IRIREF) { log_error ("Term is not a IRI ref type."); return NULL; } if (iri->iri_info->frag.rm_so == -1) return NULL; size_t len = iri->iri_info->frag.rm_eo - iri->iri_info->frag.rm_so; return strndup (iri->data + iri->iri_info->frag.rm_so, len); } /* * Triple API. */ LSUP_Triple * LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o) { LSUP_Triple *spo = malloc (sizeof (*spo)); if (!spo) return NULL; if (UNLIKELY (LSUP_triple_init (spo, s, p, o))) { free (spo); return NULL; } return spo; } LSUP_Triple * LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo) { LSUP_Triple *spo = malloc (sizeof (*spo)); if (!spo) return NULL; spo->s = LSUP_term_new_from_buffer (sspo->s); spo->p = LSUP_term_new_from_buffer (sspo->p); spo->o = LSUP_term_new_from_buffer (sspo->o); return spo; } LSUP_BufferTriple * LSUP_triple_serialize (const LSUP_Triple *spo) { LSUP_BufferTriple *sspo = malloc (sizeof (*sspo)); if (!sspo) return NULL; sspo->s = LSUP_term_serialize (spo->s); sspo->p = LSUP_term_serialize (spo->p); sspo->o = LSUP_term_serialize (spo->o); return sspo; } LSUP_rc LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o) { /* FIXME TRP_DUMMY is a problem here. if (! LSUP_IS_IRI (s) && s->type != LSUP_TERM_BNODE) { log_error ("Subject is not of a valid term type: %d", s->type); return LSUP_VALUE_ERR; } if (! LSUP_IS_IRI (p)) { log_error ("Predicate is not of a valid term type: %d", p->type); return LSUP_VALUE_ERR; } */ spo->s = s; spo->p = p; spo->o = o; return LSUP_OK; } void LSUP_triple_done (LSUP_Triple *spo) { if (UNLIKELY (!spo)) return; LSUP_term_free (spo->s); LSUP_term_free (spo->p); LSUP_term_free (spo->o); } void LSUP_triple_free (LSUP_Triple *spo) { if (UNLIKELY (!spo)) return; LSUP_term_free (spo->s); LSUP_term_free (spo->p); LSUP_term_free (spo->o); free (spo); } LSUP_rc LSUP_tcache_add (const LSUP_Key key, const LSUP_Term *term) { LSUP_KeyedTerm entry_s = {.key=key, .term=(LSUP_Term *)term}; // Many calls will likely attempt inserting duplicates after the first one. if (LIKELY (hashmap_get (LSUP_term_cache, &entry_s))) return LSUP_NOACTION; hashmap_set (LSUP_term_cache, &entry_s); return LSUP_OK; } const LSUP_Term * LSUP_tcache_get (LSUP_Key key) { LSUP_KeyedTerm *entry = hashmap_get ( LSUP_term_cache, &(LSUP_KeyedTerm){.key=key}); if (entry) log_trace ("ID found for key %lx: %s", key, entry->term->data); else log_trace ("No ID found for key %lx.", key); return (entry) ? entry->term : NULL; } /* * Static functions. */ static LSUP_rc term_init ( LSUP_Term *term, LSUP_TermType type, const char *data, void *metadata) { if (UNLIKELY (!LSUP_uri_ptn)) { log_error ("Environment not initialized. Did you call LSUP_init()?"); return LSUP_ERROR; } // This can never be LSUP_TERM_UNDEFINED. if (type == LSUP_TERM_UNDEFINED) { log_error ("%d is not a valid term type.", type); return LSUP_VALUE_ERR; } term->type = type; if (data) { // Validate IRI. if (LSUP_IS_IRI (term)) { char *fquri; // Find fully qualified IRI to parse. if (term->type == LSUP_TERM_NS_IRIREF) { if (LSUP_nsmap_normalize_uri ( metadata, data, &fquri) != LSUP_OK ) { log_error ("Error normalizing IRI data."); return LSUP_VALUE_ERR; } log_debug ("Fully qualified IRI: %s", fquri); } else fquri = (char *) data; if (strpbrk (fquri, invalid_uri_chars) != NULL) { log_warn ( "Characters %s are not valid in a URI. Got: %s\n", invalid_uri_chars, fquri); #if 0 // TODO This causes W3C TTL test #29 to fail. Remove? return LSUP_VALUE_ERR; #endif } // Capture interesting IRI parts. regmatch_t matches[11]; if (UNLIKELY (regexec (LSUP_uri_ptn, fquri, 11, matches, 0) != 0)) { fprintf (stderr, "Error matching URI pattern.\n"); return LSUP_VALUE_ERR; } if (term->type == LSUP_TERM_NS_IRIREF) free (fquri); MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR); term->iri_info->prefix = matches[1]; term->iri_info->path = matches[5]; term->iri_info->frag = matches[10]; term->iri_info->nsm = metadata; } term->data = strdup (data); } else { // No data. Make up a random UUID or URI if allowed. if (type == LSUP_TERM_IRIREF || type == LSUP_TERM_BNODE) { uuid_t uuid; uuid_generate_random (uuid); uuid_str_t uuid_str; uuid_unparse_lower (uuid, uuid_str); if (type == LSUP_TERM_IRIREF) { term->data = malloc (UUID4_URN_SIZE); snprintf ( term->data, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str); MALLOC_GUARD (term->iri_info, LSUP_MEM_ERR); // Allocate IRI match patterns manually. term->iri_info->prefix.rm_so = 0; term->iri_info->prefix.rm_eo = 4; term->iri_info->path.rm_so = 4; term->iri_info->path.rm_eo = UUIDSTR_SIZE + 6; term->iri_info->frag.rm_so = -1; term->iri_info->frag.rm_eo = -1; term->iri_info->nsm = NULL; } else term->data = strdup (uuid_str); } else { log_error ("No data provided for term."); return LSUP_VALUE_ERR; } } if (term->type == LSUP_TERM_LT_LITERAL) { if (!metadata) { log_warn ("Lang tag is NULL. Creating a non-tagged literal."); term->type = LSUP_TERM_LITERAL; } else { char *lang_str = (char *) metadata; log_trace("Lang string: '%s'", lang_str); // Lang tags longer than 7 characters will be truncated. strncpy(term->lang, lang_str, sizeof (term->lang) - 1); if (strlen (term->lang) < 1) { log_error ("Lang tag cannot be an empty string."); return LSUP_VALUE_ERR; } term->lang[7] = '\0'; } } if (term->type == LSUP_TERM_LITERAL) { term->datatype = metadata; if (! term->datatype) term->datatype = LSUP_default_datatype; log_trace ("Storing data type: %s", term->datatype->data); if (! LSUP_IS_IRI (term->datatype)) { log_error ( "Literal data type is not an IRI: %s", term->datatype->data); return LSUP_VALUE_ERR; } uint32_t dtype_hash = LSUP_term_hash (term->datatype); const LSUP_Term *tmp = LSUP_tcache_get (dtype_hash); if (!tmp) LSUP_tcache_add (dtype_hash, term->datatype); else if (term->datatype != tmp) { if (term->datatype != LSUP_default_datatype) LSUP_term_free (term->datatype); term->datatype = (LSUP_Term *)tmp; } //log_trace ("Datatype address: %p", term->datatype); log_trace ("Datatype hash: %lx", LSUP_term_hash (term->datatype)); } else if (term->type == LSUP_TERM_BNODE) { // TODO This is not usable for global skolemization. term->bnode_id = LSUP_HASH ( term->data, strlen (term->data) + 1, LSUP_HASH_SEED); } return LSUP_OK; } /* * Extern inline functions. */ LSUP_Key LSUP_term_hash (const LSUP_Term *term); LSUP_Term *LSUP_iriref_new (const char *data, LSUP_NSMap *nsm); LSUP_Term *LSUP_literal_new (const char *data, LSUP_Term *datatype); LSUP_Term *LSUP_lt_literal_new (const char *data, char *lang); LSUP_Term *LSUP_bnode_new (const char *data); bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2); LSUP_Term *LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n); LSUP_Key LSUP_triple_hash (const LSUP_Triple *trp);