#ifndef LSUP_TERM_H #define LSUP_TERM_H #include #include #include "xxhash.h" #include "buffer.h" // URI parsing regular expression. Conforms to RFC3986. #define URI_REGEX_STR \ "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" #define SEED 0 // TODO Make configurable. #define LANG_SIZE 8 // Size in chars of lang tag // "NULL" key, a value that is never user-provided. Used to mark special // values (e.g. deleted records). #define NULL_KEY 0 // "NULL" triple, a value that is never user-provided. Used to fill deleted // triples in a keyset. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY} typedef LSUP_Buffer LSUP_SerTerm; typedef XXH64_hash_t LSUP_TermHash64; typedef char langtag[LANG_SIZE]; typedef enum LSUP_term_type { LSUP_TERM_UNDEFINED, LSUP_TERM_URI, LSUP_TERM_BNODE, LSUP_TERM_LITERAL } LSUP_term_type; typedef struct LSUP_Term { LSUP_term_type type; // This language variable currently supports a 2-digit ISO 639 language // code and a 2-character ISO 3166-1 country code, separated by a hyphen. // See https://tools.ietf.org/html/bcp47#section-2.1 langtag lang; char *datatype; char *data; } LSUP_Term; /* * Initialize a pre-allocated term structure. * * the structure can be an already initialized term, and can be reused * without freeing it. */ LSUP_rc LSUP_term_init( LSUP_Term *term, LSUP_term_type type, const char *data, char *datatype, char *lang); /** * @brief Shortcut to initialize a URI. */ inline LSUP_rc LSUP_uri_init(LSUP_Term *term, const char *data) { return LSUP_term_init(term, LSUP_TERM_URI, data, NULL, NULL); } LSUP_Term * LSUP_term_new(LSUP_term_type type, const char *data, char *datatype, char *lang); /** * @brief Shortcut to create a URI. */ inline LSUP_Term * LSUP_uri_new(const char *data) { return LSUP_term_new(LSUP_TERM_URI, data, NULL, NULL); } /** * Generate a random URN with the format: `urn:lsup:`. */ char * LSUP_term_gen_random_str(); /** Simple ad-hoc serialization function. * * This function allocates and returns the following byte sequence: * * - `sizeof(char)` bytes for the term type; * - `LANG_SIZE` bytes for the language tag; * - Arbitrary bytes with NUL-terminated strings for data and datatype. * * The index for `data` is consistently `LANG_SIZE + sizeof(char)`. The * index for `datatype` is found by the terminating NULL for `data`. * * Serialized representations of some RDF terms: * * * * 0 1 size=19 * | \x01 | http://hello.org\x00 | * type data * * "hello" * * 0 1 size=7 * | \x03 | hello\x00 | * type data * * "hello"^^xsd:string * * 0 1 7 size=18 * | \x03 | hello\x00 | xsd:string\x00 | * type data datatype * * (note: the "xsd:" prefix is used for simplification here, it would be * normally be a fully qualified URI) * * "hello"@en-US * * 0 1 7 18 size=26 * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 | * type data datatype lang */ LSUP_rc LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm); LSUP_rc LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term); inline LSUP_Key LSUP_sterm_to_key(const LSUP_SerTerm *sterm) { if (sterm == NULL) return NULL_KEY; return (LSUP_Key)XXH64(sterm->addr, sterm->size, SEED); } /** * Hash a term into a key. If NULL is passed, the result is NULL_KEY. */ inline LSUP_Key LSUP_term_to_key(const LSUP_Term *term) { if (term == NULL) return NULL_KEY; LSUP_Buffer sterm_s; LSUP_Buffer *sterm = &sterm_s; LSUP_term_serialize(term, sterm); LSUP_Key key = LSUP_sterm_to_key(sterm); LSUP_buffer_done(sterm); return key; } /** * Compare two terms. */ bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2); /* // TODO Implement when xxhash v0.8 is released with stable xxhash128 function. inline XXH128_hash_t LSUP_term_hash128(const LSUP_Term *term); */ void LSUP_term_done(LSUP_Term *term); void LSUP_term_free(LSUP_Term *term); #endif