#ifndef LSUP_TERM_H #define LSUP_TERM_H #include #include #include "xxhash.h" #include "buffer.h" // URI parsing regular expression. Conforms to RFC3986. #define URI_REGEX_STR \ "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" #define SEED 0 // TODO Make configurable. #define LANG_SIZE 8 // Size in chars of lang tag // "NULL" key, a value that is never user-provided. Used to mark special // values (e.g. deleted records). #define NULL_KEY 0 // "NULL" triple, a value that is never user-provided. Used to fill deleted // triples in a keyset. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY} typedef XXH64_hash_t LSUP_TermHash64; typedef char langtag[LANG_SIZE]; typedef enum LSUP_term_type { LSUP_TERM_UNDEFINED, LSUP_TERM_URI, LSUP_TERM_BNODE, LSUP_TERM_LITERAL } LSUP_term_type; typedef struct LSUP_Term { LSUP_term_type type; // This language variable currently supports a 2-digit ISO 639 language // code and a 2-character ISO 3166-1 country code, separated by a hyphen. // See https://tools.ietf.org/html/bcp47#section-2.1 langtag lang; char *datatype; char *data; } LSUP_Term; /* * Initialize a pre-allocated term structure. * * the structure can be an already initialized term, and can be reused * without freeing it. */ LSUP_rc LSUP_term_init( LSUP_Term *term, LSUP_term_type type, const char *data, char *datatype, char *lang); /** * @brief Shortcut to initialize a URI. */ inline LSUP_rc LSUP_uri_init(LSUP_Term *term, const char *data) { return LSUP_term_init(term, LSUP_TERM_URI, data, NULL, NULL); } LSUP_Term * LSUP_term_new(LSUP_term_type type, const char *data, char *datatype, char *lang); /** * @brief Shortcut to create a URI. */ inline LSUP_Term * LSUP_uri_new(const char *data) { return LSUP_term_new(LSUP_TERM_URI, data, NULL, NULL); } /** * Generate a random URN with the format: `urn:lsup:`. */ inline LSUP_Term * LSUP_uri_random() { uuid_t uuid; uuid_generate_random(uuid); uuid_str_t uuid_str; uuid_unparse_lower(uuid, uuid_str); char uri[UUIDSTR_SIZE + 10]; sprintf(uri, "urn:uuid4:%s", uuid_str); return LSUP_uri_new(uri); } /** Simple ad-hoc serialization function. * * The resulting term must be freed with #LSUP_term_done after use. */ LSUP_rc LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm); LSUP_rc LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term); inline LSUP_Key LSUP_sterm_to_key(const LSUP_Buffer *sterm) { if (UNLIKELY (sterm == NULL)) return NULL_KEY; return XXH64(sterm->addr, sterm->size, SEED); } /** * Hash a term into a key. If NULL is passed, the result is NULL_KEY. */ inline LSUP_Key LSUP_term_to_key(const LSUP_Term *term) { if (UNLIKELY (term == NULL)) return NULL_KEY; LSUP_Buffer sterm_s; LSUP_Buffer *sterm = &sterm_s; LSUP_term_serialize(term, sterm); LSUP_Key key = XXH64(sterm->addr, sterm->size, SEED); LSUP_buffer_done(sterm); return key; } /** * Compare two terms. */ bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2); /* // TODO Implement when xxhash v0.8 is released with stable xxhash128 function. inline XXH128_hash_t LSUP_term_hash128(const LSUP_Term *term); */ void LSUP_term_done(LSUP_Term *term); void LSUP_term_free(LSUP_Term *term); #endif