123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241 |
- #ifndef _LSUP_TERM_H
- #define _LSUP_TERM_H
- #include <assert.h>
- #include <regex.h>
- #include "uthash.h"
- #include "buffer.h"
- #include "namespace.h"
- // "NULL" triple, a value that is never user-provided. Used to fill deleted
- // triples in a keyset.
- #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
- #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
- /*
- * Term types.
- */
- /* Undefined placeholder or result of an error. Invalid for most operations. */
- #define LSUP_TERM_UNDEFINED 0
- /* IRI reference. */
- #define LSUP_TERM_IRIREF 1
- /* Namespace-prefixed IRI reference. */
- #define LSUP_TERM_NS_IRIREF 2
- /* Literal without language tag. */
- #define LSUP_TERM_LITERAL 3
- /* Language-tagged string literal. */
- #define LSUP_TERM_LT_LITERAL 4
- /* Blank node. */
- #define LSUP_TERM_BNODE 5
- /*
- * In-term identifier types.
- */
- /* Namespace prefix string. */
- #define LSUP_ID_NS 10
- /* Data type IRI. */
- #define LSUP_ID_DATATYPE 11
- /* Language tag string. */
- #define LSUP_ID_LANG 12
- /* Temporary blank node ID. TODO implement. */
- #define LSUP_ID_BNODE 13
- /** @brief Default data type for untyped literals (prefixed IRI).
- */
- #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string"
- /** @brief URI parsing regular expression. Conforms to RFC3986.
- */
- #define LSUP_URI_REGEX_STR \
- "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
- /*
- * Data types.
- */
- typedef XXH64_hash_t LSUP_Hash64;
- typedef char LSUP_TermType;
- typedef struct term_t {
- char * data; // URI, literal value, or BNode label.
- union {
- uint32_t datatype; // Data type hash for LSUP_TERM_LITERAL.
- uint32_t lang; // Lang tag hash for LSUP_TERM_LT_LITERAL.
- uint32_t bnode_id; // Blank node ID. TODO implement.
- LSUP_NSMap * nsm; // NSM handle for prefixed IRI.
- };
- LSUP_TermType type; // Term type.
- } LSUP_Term;
- /** @brief Hash cache for lang tags and data types.
- */
- typedef struct id_cache_t {
- uint32_t key;
- char * data;
- UT_hash_handle hh;
- } IDCache;
- /*
- * Extern variables.
- */
- /** @brief Global ID cache.
- *
- * Map of internal term identifiers, such as literal data types, language tags
- * and BNode identifiers.
- */
- extern IDCache *LSUP_id_cache;
- /** @brief Compiled hash of default literal data type.
- */
- extern uint32_t LSUP_default_dtype_key;
- /** @brief URI validation pattern, compiled in #LSUP_init().
- */
- extern regex_t *LSUP_uri_ptn;
- /*
- * Function prototypes.
- */
- /** @brief Create a new term.
- *
- * @param type[in] Term type. One of #LSUP_TermType.
- *
- * @param data[in] Term data: textual URI, literal value without data type
- * or langtag, etc.
- *
- * @param metadata[in]: language tag for language-tagged literals or data type
- * for other literals.
- *
- * @param term[out] Pointer to a new term, which must be freed with
- * #LSUP_term_free after use.
- *
- * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
- */
- LSUP_Term *
- LSUP_term_new (LSUP_TermType type, const char *data, const char *metadata);
- /** @brief Placeholder term to use with LSUP_term_reset.
- */
- #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
- /** @brief Shortcut to create a URI.
- *
- * Must be freed with #LSUP_term_free.
- *
- * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
- *
- * @param uri[out] The URI to be created.
- *
- * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
- */
- inline LSUP_Term *
- LSUP_uri_new (const char *data)
- {
- if (!data) {
- uuid_t uuid;
- uuid_generate_random (uuid);
- uuid_str_t uuid_str;
- uuid_unparse_lower (uuid, uuid_str);
- char uri[UUID4_URN_SIZE];
- snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
- data = uri;
- }
- return LSUP_term_new (LSUP_TERM_IRIREF, data, NULL);
- }
- /* @brief Initialize or reuse a pre-allocated term structure.
- *
- * The structure must have been previously created with #LSUP_term_new. It can
- * be reinitialized multiple times without freeing it. It must be eventually
- * freed with #LSUP_term_free.
- */
- LSUP_rc
- LSUP_term_init(
- LSUP_Term *term, LSUP_TermType type,
- const char *data, const char *metadata);
- LSUP_Term *
- LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
- LSUP_Buffer *
- LSUP_buffer_new_from_term (const LSUP_Term *term);
- /**
- * @brief Shortcut to initialize a URI.
- */
- LSUP_rc
- LSUP_uri_init (LSUP_Term *term, const char *data);
- /** @brief Hash a buffer.
- */
- inline LSUP_Key
- LSUP_term_hash (const LSUP_Term *term)
- {
- LSUP_Buffer *buf;
- if (UNLIKELY (!term)) buf = BUF_DUMMY;
- else buf = LSUP_buffer_new_from_term (term);
- LSUP_Key key = LSUP_buffer_hash (buf);
- LSUP_buffer_free (buf);
- return key;
- }
- /**
- * Compare two terms.
- */
- bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
- void
- LSUP_term_done (LSUP_Term *term);
- void
- LSUP_term_free (LSUP_Term *term);
- /** @brief Add an identifier to the term cache.
- *
- * @param[in] key 32-bit hash of the inserted term.
- *
- * @param[in] data Term to insert.
- */
- LSUP_rc
- LSUP_tcache_add_id (const uint32_t key, const char *data);
- /** @brief Get an identifier from the cache.
- *
- * @param[in] key Key for the queried term.
- *
- * @return The retrieved term if found, or NULL. The string must not be
- * modified or freed.
- */
- const char *
- LSUP_tcache_get_id (const uint32_t key);
- #endif
|