#ifndef _LSUP_TERM_H #define _LSUP_TERM_H #include #include #include "uthash.h" #include "buffer.h" #include "namespace.h" // "NULL" triple, a value that is never user-provided. Used to fill deleted // triples in a keyset. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY} #define UUID4_URN_SIZE UUIDSTR_SIZE + 10 /* * Term types. */ /* Undefined placeholder or result of an error. Invalid for most operations. */ #define LSUP_TERM_UNDEFINED 0 /* IRI reference. */ #define LSUP_TERM_IRIREF 1 /* Blank node. */ #define LSUP_TERM_BNODE 2 /* Literal without language tag. */ #define LSUP_TERM_LITERAL 3 /* Language-tagged string literal. */ #define LSUP_TERM_LT_LITERAL 4 /* * In-term identifier types. */ /* Data type IRI. */ #define LSUP_ID_DATATYPE 10 /* Language tag string. */ #define LSUP_ID_LANG 11 /* Temporary blank node ID. TODO implement. */ #define LSUP_ID_BNODE 12 /** @brief Default data type for untyped literals. */ #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string" /** @brief URI parsing regular expression. Conforms to RFC3986. */ #define LSUP_URI_REGEX_STR \ "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" /* * Data types. */ typedef XXH64_hash_t LSUP_Hash64; typedef char LSUP_TermType; typedef struct term_t { char * data; // URI, literal value, or BNode label. union { uint32_t datatype; // Data type hash for LSUP_TERM_LITERAL. uint32_t lang; // Lang tag hash for LSUP_TERM_LT_LITERAL. uint32_t bnode_id; // Blank node ID. TODO implement. }; LSUP_TermType type; // Term type. } LSUP_Term; /** @brief Hash cache for lang tags and data types. */ typedef struct id_cache_t { uint32_t key; char * data; UT_hash_handle hh; } IDCache; /* * Extern variables. */ /** @brief Global ID cache. * * Map of internal term identifiers, such as literal data types, language tags * and BNode identifiers. */ extern IDCache *LSUP_id_cache; /** @brief Compiled hash of default literal data type. */ extern uint32_t LSUP_default_dtype_key; /** @brief URI validation pattern, compiled in #LSUP_init(). */ extern regex_t *LSUP_uri_ptn; /* * Function prototypes. */ /** @brief Create a new term. * * @param type[in] Term type. One of #LSUP_TermType. * * @param data[in] Term data: textual URI, literal value without data type * or langtag, etc. * * @param metadata[in]: language tag for language-tagged literals or data type * for other literals. * * @param term[out] Pointer to a new term, which must be freed with * #LSUP_term_free after use. * * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails. */ LSUP_Term * LSUP_term_new (LSUP_TermType type, const char *data, const char *metadata); /** @brief Placeholder term to use with LSUP_term_reset. */ #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL) /** @brief Shortcut to create a URI. * * Must be freed with #LSUP_term_free. * * @param data[in] The URI string. If NULL, a UUID4-based URN is generated. * * @param uri[out] The URI to be created. * * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails. */ inline LSUP_Term * LSUP_uri_new (const char *data) { if (!data) { uuid_t uuid; uuid_generate_random (uuid); uuid_str_t uuid_str; uuid_unparse_lower (uuid, uuid_str); char uri[UUID4_URN_SIZE]; snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str); data = uri; } return LSUP_term_new (LSUP_TERM_IRIREF, data, NULL); } /* @brief Initialize or reuse a pre-allocated term structure. * * The structure must have been previously created with #LSUP_term_new. It can * be reinitialized multiple times without freeing it. It must be eventually * freed with #LSUP_term_free. */ LSUP_rc LSUP_term_init( LSUP_Term *term, LSUP_TermType type, const char *data, const char *metadata); LSUP_Term * LSUP_term_new_from_buffer (const LSUP_Buffer *sterm); LSUP_Buffer * LSUP_buffer_new_from_term (const LSUP_Term *term); /** * @brief Shortcut to initialize a URI. */ LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data); /** @brief Hash a buffer. */ inline LSUP_Key LSUP_term_hash (const LSUP_Term *term) { LSUP_Buffer *buf; if (UNLIKELY (!term)) buf = BUF_DUMMY; else buf = LSUP_buffer_new_from_term (term); LSUP_Key key = LSUP_buffer_hash (buf); LSUP_buffer_free (buf); return key; } /** * Compare two terms. */ bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2); void LSUP_term_done (LSUP_Term *term); void LSUP_term_free (LSUP_Term *term); /** @brief Add an identifier to the term cache. * * @param[in] key 32-bit hash of the inserted term. * * @param[in] data Term to insert. */ LSUP_rc LSUP_tcache_add_id (const uint32_t key, const char *data); /** @brief Get an identifier from the cache. * * @param[in] key Key for the queried term. * * @return The retieved term if found, or NULL. The string must not be modified * or freed. */ const char * LSUP_tcache_get_id (const uint32_t key); #endif