#ifndef _LSUP_TERM_H #define _LSUP_TERM_H #include #include #include "uthash.h" #include "buffer.h" #include "namespace.h" #define UUID4_URN_SIZE UUIDSTR_SIZE + 10 /* * Term types. */ /* Undefined placeholder or result of an error. Invalid for most operations. */ #define LSUP_TERM_UNDEFINED 0 /* IRI reference. */ #define LSUP_TERM_IRIREF 1 /* Namespace-prefixed IRI reference. */ #define LSUP_TERM_NS_IRIREF 2 /* Literal without language tag. */ #define LSUP_TERM_LITERAL 3 /* Language-tagged string literal. */ #define LSUP_TERM_LT_LITERAL 4 /* Blank node. */ #define LSUP_TERM_BNODE 5 /** @brief Default data type for untyped literals (prefixed IRI). */ #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string" /** @brief URI parsing regular expression. Conforms to RFC3986. */ #define LSUP_URI_REGEX_STR \ "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" /* * Data types. */ typedef XXH64_hash_t LSUP_Hash64; typedef char LSUP_TermType; typedef char LSUP_LangTag[8]; typedef struct term_t { char * data; // URI, literal value, or BNode label. union { LSUP_Key datatype; // Data type key for LSUP_TERM_LITERAL. LSUP_LangTag lang; // Lang tag for LSUP_TERM_LT_LITERAL. LSUP_Key bnode_id; // BNode ID for comparison & skolemization. LSUP_NSMap * nsm; // NSM handle for prefixed IRI. }; LSUP_TermType type; // Term type. } LSUP_Term; /** @brief Hash cache for data types. */ struct term_cache_t { LSUP_Key key; LSUP_Term * term; UT_hash_handle hh; }; typedef struct triple_t { LSUP_Term *s; LSUP_Term *p; LSUP_Term *o; } LSUP_Triple; /* * Extern variables. */ /** @brief Global term cache. * * Stores frequently used terms, e.g. data type URIs. */ extern struct term_cache_t *LSUP_term_cache; /** @brief Compiled hash of default literal data type. */ extern uint32_t LSUP_default_dtype_key; /** @brief URI validation pattern, compiled in #LSUP_init(). */ extern regex_t *LSUP_uri_ptn; /** @brief Default literal data type URI. * * Literal terms created with undefined data type will have it set to this * URI implicitly. */ extern LSUP_Term *LSUP_default_datatype; /* * Function prototypes. */ /** @brief Create a new term. * * @param type[in] Term type. One of #LSUP_TermType. * * @param data[in] Term data: textual URI, literal value without data type * or langtag, etc. * * @param metadata[in]: language tag (LSUP_LangTag) for language-tagged * literals; or data type (LSUP_Term *) for other literals. It may be NULL. * * @return New term, which must be freed with #LSUP_term_free after use; or * NULL on error. */ LSUP_Term * LSUP_term_new (LSUP_TermType type, const char *data, void *metadata); /** @brief Placeholder term to use with LSUP_term_reset. */ #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL) /** @brief Shortcut to create a URI. * * Must be freed with #LSUP_term_free. * * @param data[in] The URI string. If NULL, a UUID4-based URN is generated. * * @param uri[out] The URI to be created. * * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails. */ inline LSUP_Term * LSUP_uri_new (const char *data) { if (!data) { uuid_t uuid; uuid_generate_random (uuid); uuid_str_t uuid_str; uuid_unparse_lower (uuid, uuid_str); char uri[UUID4_URN_SIZE]; snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str); data = uri; } return LSUP_term_new (LSUP_TERM_IRIREF, data, NULL); } /* @brief Initialize or reuse a pre-allocated term structure. * * The structure must have been previously created with #LSUP_term_new. It can * be reinitialized multiple times without freeing it. It must be eventually * freed with #LSUP_term_free. */ LSUP_rc LSUP_term_init( LSUP_Term *term, LSUP_TermType type, const char *data, void *metadata); /** @brief Deserialize a buffer into a term. * * @param[in] sterm Buffer to convert into a term. It must be a valid * serialized term from store or obtained with #LSUP_term_serialize(). * * @return New term handle. It must be freed with #LSUP_term_free(). */ LSUP_Term * LSUP_term_new_from_buffer (const LSUP_Buffer *sterm); /** @brief Serialize a term into a buffer. * * @param[in] sterm Term to convert into a buffer. * * @return New buffer handle. It must be freed with #LSUP_buffer_free(). */ LSUP_Buffer * LSUP_term_serialize (const LSUP_Term *term); /** * @brief Shortcut to initialize a URI. */ LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data); /** @brief Hash a buffer. */ LSUP_Key LSUP_term_hash (const LSUP_Term *term); /** * Compare two terms. */ bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2); void LSUP_term_done (LSUP_Term *term); void LSUP_term_free (LSUP_Term *term); /** @brief Create a new triple from three terms. * * TODO Term types are not validated at the moment. * * @param[in] s Triple subject. It must be an IRIRef or BNode. * * @param[in] p Triple predicate. It must be an IRIRef. * * @param[in] o Triple object. * */ LSUP_Triple * LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o); /** @brief Dummy triple with NULL slots. It is not a valid triple. */ #define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL) LSUP_Triple * LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo); LSUP_BufferTriple * LSUP_triple_serialize (const LSUP_Triple *spo); /** @brief Initialize internal term pointers in a heap-allocated triple. * * NOTE: the term structures are not copied. If the triple is freed with * #LSUP_triple_free(), the originally provided terms are freed too. * * @param spo[in] Triple pointer to initialize. */ LSUP_rc LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o); /** @brief Free the internal pointers of a triple. * * @param spo[in] Triple to be freed. */ void LSUP_triple_done (LSUP_Triple *spo); /** @brief Free a triple and all its internal pointers. * * NOTE: If the term pointers are not to be freed (e.g. they are owned by a * back end), use a simple free(spo) instead of this. * * @param spo[in] Triple to be freed. */ void LSUP_triple_free (LSUP_Triple *spo); /** @brief Get triple by term position. * * Useful for looping over all terms. * * @param trp[in] Triple pointer. * * @param n[in] A number between 0รท2. * * @return Corresponding triple term or NULL if n is out of range. */ inline LSUP_Term * LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n) { if (n == TRP_POS_S) return trp->s; if (n == TRP_POS_P) return trp->p; if (n == TRP_POS_O) return trp->o; return NULL; } /** @brief Hash a triple. * * TODO This doesn't handle blank nodes correctly. */ inline LSUP_Key LSUP_triple_hash (const LSUP_Triple *trp) { LSUP_BufferTriple *strp = LSUP_triple_serialize (trp); LSUP_Key hash = LSUP_btriple_hash (strp); LSUP_btriple_free (strp); return hash; } /** @brief Add an identifier to the term cache. * * @param[in] key Hash of the inserted term. * * @param[in] term Term to insert. */ LSUP_rc LSUP_tcache_add (const LSUP_Key key, LSUP_Term *term); /** @brief Get an identifier from the cache. * * @param[in] key Key for the queried term. * * @return The retrieved term if found, or NULL. The string must not be * modified or freed. */ const LSUP_Term * LSUP_tcache_get (const LSUP_Key key); #endif