#ifndef LSUP_TERM_H #define LSUP_TERM_H #include #include #include "xxhash.h" #include "buffer.h" // URI parsing regular expression. Conforms to RFC3986. #define URI_REGEX_STR \ "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" #define SEED 0 // TODO Make configurable. #define LANG_SIZE 8 // Size in chars of lang tag // "NULL" key, a value that is never user-provided. Used to mark special // values (e.g. deleted records). #define NULL_KEY 0 // "NULL" triple, a value that is never user-provided. Used to fill deleted // triples in a keyset. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY} typedef XXH64_hash_t LSUP_TermHash64; typedef char langtag[LANG_SIZE]; typedef enum LSUP_term_type { LSUP_TERM_UNDEFINED, LSUP_TERM_URI, LSUP_TERM_BNODE, LSUP_TERM_LITERAL } LSUP_term_type; typedef struct LSUP_Term { LSUP_term_type type; // This language variable currently supports a 2-digit ISO 639 language // code and a 2-character ISO 3166-1 country code, separated by a hyphen. // See https://tools.ietf.org/html/bcp47#section-2.1 langtag lang; char *datatype; char *data; } LSUP_Term; /** @brief Create a new term. * * @param type[in] Term type. One of #LSUP_term_type. * * @param data[in] Term data: textual URI, literal value without data type * or langtag, etc. * * @param datatype[in]: data type for literals. * * @param lang[in]: language tag for string literals. * * @param term[out] Pointer to a new term, which must be freed with * #LSUP_term_free after use. * * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails. */ LSUP_rc LSUP_term_new( LSUP_term_type type, const char *data, char *datatype, char *lang, LSUP_Term **term); /** @brief Shortcut to create a URI. * * Must be freed with #LSUP_term_free. * * @param data[in] The URI string. If NULL, a UUID4-based URN is generated. * * @param uri[out] The URI to be created. * * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails. */ inline LSUP_rc LSUP_uri_new(const char *data, LSUP_Term **uri) { if (!data) { uuid_t uuid; uuid_generate_random(uuid); uuid_str_t uuid_str; uuid_unparse_lower(uuid, uuid_str); char uri[UUIDSTR_SIZE + 10]; sprintf(uri, "urn:uuid4:%s", uuid_str); data = uri; } return LSUP_term_new(LSUP_TERM_URI, data, NULL, NULL, uri); } /* @brief Reuse a pre-allocated term structure. * * The structure must have been previously created with #LSUP_term_new. It can * be reinitialized multiple times without freeing it. It must be eventually * freed with #LSUP_term_free. */ LSUP_rc LSUP_term_reset( LSUP_Term *term, LSUP_term_type type, const char *data, char *datatype, char *lang); /** * @brief Shortcut to initialize a URI. */ inline LSUP_rc LSUP_uri_reset(LSUP_Term *term, const char *data) { return LSUP_term_reset(term, LSUP_TERM_URI, data, NULL, NULL); } /** @brief Simple ad-hoc serialization function. * * The resulting term must be freed with #LSUP_term_free after use. */ LSUP_rc LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer **sterm); LSUP_rc LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term **term); inline LSUP_Key LSUP_sterm_to_key(const LSUP_Buffer *sterm) { if (UNLIKELY (sterm == NULL)) return NULL_KEY; return XXH64(sterm->addr, sterm->size, SEED); } /** @brief Hash a term into a key. * * If NULL is passed, the result is NULL_KEY. */ inline LSUP_Key LSUP_term_to_key(const LSUP_Term *term) { if (UNLIKELY (term == NULL)) return NULL_KEY; LSUP_Buffer *sterm; LSUP_term_serialize(term, &sterm); LSUP_Key key = XXH64(sterm->addr, sterm->size, SEED); LSUP_buffer_free(sterm); return key; } /** * Compare two terms. */ bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2); /* // TODO Implement when xxhash v0.8 is released with stable xxhash128 function. inline XXH128_hash_t LSUP_term_hash128(const LSUP_Term *term); */ void LSUP_term_free(LSUP_Term *term); #endif