123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 |
- #ifndef LSUP_TERM_H
- #define LSUP_TERM_H
- #include <assert.h>
- #include <regex.h>
- #include "xxhash.h"
- #include "buffer.h"
- // URI parsing regular expression. Conforms to RFC3986.
- #define URI_REGEX_STR \
- "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
- #define SEED 0 // TODO Make configurable.
- #define LANG_SIZE 8 // Size in chars of lang tag
- typedef LSUP_Buffer LSUP_SerTerm;
- typedef XXH64_hash_t LSUP_TermHash64;
- typedef char langtag[LANG_SIZE];
- typedef enum LSUP_term_type {
- LSUP_TERM_UNDEFINED,
- LSUP_TERM_URI,
- LSUP_TERM_BNODE,
- LSUP_TERM_LITERAL
- } LSUP_term_type;
- typedef struct LSUP_Term {
- LSUP_term_type type;
- // This language variable currently supports a 2-digit ISO 639 language
- // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
- // See https://tools.ietf.org/html/bcp47#section-2.1
- langtag lang;
- char *datatype;
- char *data;
- } LSUP_Term;
- /*
- * Initialize a pre-allocated term structure.
- *
- * the structure can be an already initialized term, and can be reused
- * without freeing it.
- */
- int
- LSUP_term_init(
- LSUP_Term *term, LSUP_term_type type,
- char *data, char *datatype, char *lang);
- LSUP_Term *
- LSUP_term_new(LSUP_term_type type, char *data, char *datatype, char *lang);
- /** Simple ad-hoc serialization function.
- *
- * This function allocates and returns the following byte sequence:
- *
- * - `sizeof(char)` bytes for the term type;
- * - `LANG_SIZE` bytes for the language tag;
- * - Arbitrary bytes with NUL-terminated strings for data and datatype.
- *
- * The index for `data` is consistently `LANG_SIZE + sizeof(char)`. The
- * index for `datatype` is found by the terminating NULL for `data`.
- *
- * Serialized representations of some RDF terms:
- *
- * <http://hello.org>
- *
- * 0 1 size=19
- * | \x01 | http://hello.org\x00 |
- * type data
- *
- * "hello"
- *
- * 0 1 size=7
- * | \x03 | hello\x00 |
- * type data
- *
- * "hello"^^xsd:string
- *
- * 0 1 7 size=18
- * | \x03 | hello\x00 | xsd:string\x00 |
- * type data datatype
- *
- * (note: the "xsd:" prefix is used for simplification here, it would be
- * normally be a fully qualified URI)
- *
- * "hello"@en-US
- *
- * 0 1 7 18 size=26
- * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 |
- * type data datatype lang
- */
- int LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm);
- int
- LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term);
- inline LSUP_Key
- LSUP_sterm_to_key(const LSUP_SerTerm *sterm)
- {
- LSUP_Key key = (LSUP_Key)XXH64(sterm->addr, sterm->size, SEED);
- return key;
- }
- /**
- * Hash a term into a key. If NULL is passed, the result is NULL_KEY.
- */
- inline LSUP_Key
- LSUP_term_to_key(const LSUP_Term *term)
- {
- if (term == NULL)
- return NULL_KEY;
- LSUP_Buffer sterm_s;
- LSUP_Buffer *sterm = &sterm_s;
- LSUP_term_serialize(term, sterm);
- LSUP_Key key = LSUP_sterm_to_key(sterm);
- LSUP_buffer_done(sterm);
- return key;
- }
- /**
- * Compare two terms.
- */
- bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2);
- /*
- // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
- XXH128_hash_t
- LSUP_term_hash128(const LSUP_Term *term);
- */
- void
- LSUP_term_done(LSUP_Term *term);
- void
- LSUP_term_free(LSUP_Term *term);
- #endif
|