#ifndef LSUP_TERM_H #define LSUP_TERM_H #include #include #include "xxhash.h" #include "buffer.h" // URI parsing regular expression. Conforms to RFC3986. #define URI_REGEX_STR \ "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?" #define SEED 0 // TODO Make configurable. #define LANG_SIZE 8 // Size in chars of lang tag // "NULL" key, a value that is never user-provided. Used to mark special // values (e.g. deleted records). #define NULL_KEY 0 // "NULL" triple, a value that is never user-provided. Used to fill deleted // triples in a keyset. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY} #define UUID4_URN_SIZE UUIDSTR_SIZE + 10 typedef XXH64_hash_t LSUP_TermHash64; typedef char langtag[LANG_SIZE]; #define TTYPE_TABLE \ ENTRY (UNDEFINED, 0) \ ENTRY (URI, 1) \ ENTRY (BNODE, 2) \ ENTRY (LITERAL, 3) typedef enum LSUP_term_type { #define ENTRY(a, b) LSUP_TERM_##a = b, TTYPE_TABLE #undef ENTRY } LSUP_term_type; typedef struct LSUP_Term { LSUP_term_type type; // This language variable currently supports a 2-digit ISO 639 language // code and a 2-character ISO 3166-1 country code, separated by a hyphen. // See https://tools.ietf.org/html/bcp47#section-2.1 langtag lang; char *datatype; char *data; } LSUP_Term; /** @brief Create a new term. * * @param type[in] Term type. One of #LSUP_term_type. * * @param data[in] Term data: textual URI, literal value without data type * or langtag, etc. * * @param datatype[in]: data type for literals. * * @param lang[in]: language tag for string literals. * * @param term[out] Pointer to a new term, which must be freed with * #LSUP_term_free after use. * * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails. */ LSUP_Term * LSUP_term_new( LSUP_term_type type, const char *data, char *datatype, char *lang); /** @brief Placeholder term to use with LSUP_term_reset. */ #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL, NULL) /** @brief Shortcut to create a URI. * * Must be freed with #LSUP_term_free. * * @param data[in] The URI string. If NULL, a UUID4-based URN is generated. * * @param uri[out] The URI to be created. * * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails. */ inline LSUP_Term * LSUP_uri_new (const char *data) { if (!data) { uuid_t uuid; uuid_generate_random (uuid); uuid_str_t uuid_str; uuid_unparse_lower (uuid, uuid_str); char uri[UUID4_URN_SIZE]; snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str); data = uri; } return LSUP_term_new (LSUP_TERM_URI, data, NULL, NULL); } /* @brief Reuse a pre-allocated term structure. * * The structure must have been previously created with #LSUP_term_new. It can * be reinitialized multiple times without freeing it. It must be eventually * freed with #LSUP_term_free. */ LSUP_rc LSUP_term_init( LSUP_Term *term, LSUP_term_type type, const char *data, char *datatype, char *lang); LSUP_Term * LSUP_term_new_from_buffer (const LSUP_Buffer *sterm); LSUP_Buffer * LSUP_buffer_new_from_term (const LSUP_Term *term); /** * @brief Shortcut to initialize a URI. */ inline LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data) { if (!data) { uuid_t uuid; uuid_generate_random (uuid); uuid_str_t uuid_str; uuid_unparse_lower (uuid, uuid_str); char uri[UUIDSTR_SIZE + 10]; sprintf (uri, "urn:uuid4:%s", uuid_str); data = uri; } return LSUP_term_init (term, LSUP_TERM_URI, data, NULL, NULL); } /** @brief Simple ad-hoc serialization function. * * The resulting term must be freed with #LSUP_term_free after use. */ LSUP_rc LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm); /** @brief Deserialize a buffer into a term. * * The buffer must be a well-formed serialization of a term, e.g. as obtained * by #LSUP_term_serialize. */ LSUP_rc LSUP_term_deserialize (const LSUP_Buffer *sterm, LSUP_Term *term); inline LSUP_Key LSUP_sterm_to_key (const LSUP_Buffer *sterm) { if (UNLIKELY (sterm == NULL)) return NULL_KEY; return XXH64(sterm->addr, sterm->size, SEED); } /** @brief Hash a term into a key. * * If NULL is passed, the result is NULL_KEY. */ inline LSUP_Key LSUP_term_to_key (const LSUP_Term *term) { if (UNLIKELY (term == NULL)) return NULL_KEY; LSUP_Buffer *sterm = LSUP_buffer_new_from_term (term); LSUP_Key key = XXH64(sterm->addr, sterm->size, SEED); LSUP_buffer_free (sterm); return key; } /** * Compare two terms. */ bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2); /* // TODO Implement when xxhash v0.8 is released with stable xxhash128 function. inline XXH128_hash_t LSUP_term_hash128(const LSUP_Term *term); */ void LSUP_term_done (LSUP_Term *term); void LSUP_term_free (LSUP_Term *term); #endif