123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- #ifndef LSUP_TERM_H
- #define LSUP_TERM_H
- #include <assert.h>
- #include <regex.h>
- #include "xxhash.h"
- #include "buffer.h"
- // URI parsing regular expression. Conforms to RFC3986.
- #define URI_REGEX_STR \
- "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
- #define SEED 0 // TODO Make configurable.
- #define LANG_SIZE 8 // Size in chars of lang tag
- // "NULL" key, a value that is never user-provided. Used to mark special
- // values (e.g. deleted records).
- #define NULL_KEY 0
- // "NULL" triple, a value that is never user-provided. Used to fill deleted
- // triples in a keyset.
- #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
- #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
- typedef XXH64_hash_t LSUP_TermHash64;
- typedef char langtag[LANG_SIZE];
- #define TTYPE_TBL \
- ENTRY (UNDEFINED, 0) \
- ENTRY (URI, 1) \
- ENTRY (BNODE, 2) \
- ENTRY (LITERAL, 3)
- typedef enum LSUP_term_type {
- #define ENTRY(a, b) LSUP_TERM_##a = b,
- TTYPE_TBL
- #undef ENTRY
- } LSUP_term_type;
- typedef struct LSUP_Term {
- LSUP_term_type type;
- // This language variable currently supports a 2-digit ISO 639 language
- // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
- // See https://tools.ietf.org/html/bcp47#section-2.1
- langtag lang;
- char *datatype;
- char *data;
- } LSUP_Term;
- /** @brief Create a new term.
- *
- * @param type[in] Term type. One of #LSUP_term_type.
- *
- * @param data[in] Term data: textual URI, literal value without data type
- * or langtag, etc.
- *
- * @param datatype[in]: data type for literals.
- *
- * @param lang[in]: language tag for string literals.
- *
- * @param term[out] Pointer to a new term, which must be freed with
- * #LSUP_term_free after use.
- *
- * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
- */
- LSUP_Term *
- LSUP_term_new(
- LSUP_term_type type, const char *data, char *datatype, char *lang);
- /** @brief Placeholder term to use with LSUP_term_reset.
- */
- #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL, NULL)
- /** @brief Shortcut to create a URI.
- *
- * Must be freed with #LSUP_term_free.
- *
- * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
- *
- * @param uri[out] The URI to be created.
- *
- * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
- */
- inline LSUP_Term *
- LSUP_uri_new (const char *data)
- {
- if (!data) {
- uuid_t uuid;
- uuid_generate_random (uuid);
- uuid_str_t uuid_str;
- uuid_unparse_lower (uuid, uuid_str);
- char uri[UUID4_URN_SIZE];
- snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
- data = uri;
- }
- return LSUP_term_new (LSUP_TERM_URI, data, NULL, NULL);
- }
- /* @brief Reuse a pre-allocated term structure.
- *
- * The structure must have been previously created with #LSUP_term_new. It can
- * be reinitialized multiple times without freeing it. It must be eventually
- * freed with #LSUP_term_free.
- */
- LSUP_rc
- LSUP_term_init(
- LSUP_Term *term, LSUP_term_type type,
- const char *data, char *datatype, char *lang);
- LSUP_Term *
- LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
- LSUP_Buffer *
- LSUP_buffer_new_from_term (const LSUP_Term *term);
- /**
- * @brief Shortcut to initialize a URI.
- */
- inline LSUP_rc
- LSUP_uri_init (LSUP_Term *term, const char *data)
- {
- if (!data) {
- uuid_t uuid;
- uuid_generate_random (uuid);
- uuid_str_t uuid_str;
- uuid_unparse_lower (uuid, uuid_str);
- char uri[UUIDSTR_SIZE + 10];
- sprintf (uri, "urn:uuid4:%s", uuid_str);
- data = uri;
- }
- return LSUP_term_init (term, LSUP_TERM_URI, data, NULL, NULL);
- }
- /** @brief Simple ad-hoc serialization function.
- *
- * The resulting term must be freed with #LSUP_term_free after use.
- */
- LSUP_rc
- LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm);
- /** @brief Deserialize a buffer into a term.
- *
- * The buffer must be a well-formed serialization of a term, e.g. as obtained
- * by #LSUP_term_serialize.
- */
- LSUP_rc
- LSUP_term_deserialize (const LSUP_Buffer *sterm, LSUP_Term *term);
- inline LSUP_Key
- LSUP_sterm_to_key (const LSUP_Buffer *sterm)
- {
- if (UNLIKELY (sterm == NULL)) return NULL_KEY;
- return XXH64(sterm->addr, sterm->size, SEED);
- }
- /** @brief Hash a term into a key.
- *
- * If NULL is passed, the result is NULL_KEY.
- */
- inline LSUP_Key
- LSUP_term_to_key (const LSUP_Term *term)
- {
- if (UNLIKELY (term == NULL)) return NULL_KEY;
- LSUP_Buffer *sterm = LSUP_buffer_new_from_term (term);
- LSUP_Key key = XXH64(sterm->addr, sterm->size, SEED);
- LSUP_buffer_free (sterm);
- return key;
- }
- /**
- * Compare two terms.
- */
- bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
- /*
- // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
- inline XXH128_hash_t
- LSUP_term_hash128(const LSUP_Term *term);
- */
- void
- LSUP_term_done (LSUP_Term *term);
- void
- LSUP_term_free (LSUP_Term *term);
- #endif
|