123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304 |
- #include "term.h"
- // URI parsing regular expression. Conforms to RFC3986.
- #define URI_REGEX_STR \
- "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
- #define NLEN(str) (str) == NULL ? 0 : strlen ((str))
- #define INVALID_URI_CHARS "<>\" {}|\\^`"
- static regex_t ptn;
- static bool ptn_init = false;
- /* Global inline prototypes. */
- LSUP_Term *LSUP_uri_new (const char *data);
- LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data);
- /**
- * Free global regex struct. Register with atexit().
- */
- void term_cleanup() { if (ptn_init) regfree (&ptn); }
- LSUP_Term *
- LSUP_term_new (
- LSUP_term_type type, const char *data, char *datatype, char *lang)
- {
- LSUP_Term *term;
- CALLOC_GUARD (term, NULL);
- // If undefined, just set the type.
- if (type == LSUP_TERM_UNDEFINED) term->type = type;
- else if (UNLIKELY (LSUP_term_init (
- term, type, data, datatype, lang) != LSUP_OK)) {
- free (term);
- return NULL;
- }
- return term;
- }
- LSUP_Term *
- LSUP_term_new_from_buffer (const LSUP_Buffer *sterm)
- {
- LSUP_Term *term;
- MALLOC_GUARD (term, NULL);
- if (UNLIKELY (LSUP_term_deserialize (sterm, term) != LSUP_OK)) {
- free (term);
- return NULL;
- }
- return term;
- }
- LSUP_Buffer *
- LSUP_buffer_new_from_term (const LSUP_Term *term)
- {
- LSUP_Buffer *sterm;
- MALLOC_GUARD (sterm, NULL);
- sterm->addr = NULL;
- if (LSUP_term_serialize (term, sterm) != LSUP_OK) {
- free (sterm);
- return NULL;
- }
- return sterm;
- }
- LSUP_rc
- LSUP_term_init(
- LSUP_Term *term, LSUP_term_type type,
- const char *data, char *datatype, char *lang)
- {
- // This can never be LSUP_TERM_UNDEFINED.
- if (!data) return LSUP_VALUE_ERR;
- term->type = type;
- // Validate URI.
- if (term->type == LSUP_TERM_URI) {
- // TODO Cheap fix. Should url-encode all invalid chars.
- if (strpbrk (data, INVALID_URI_CHARS) != NULL) {
- fprintf (
- stderr, "Characters %s are not allowed.\n",
- INVALID_URI_CHARS);
- return LSUP_VALUE_ERR;
- }
- if (UNLIKELY (!ptn_init)) {
- int rc = regcomp (&ptn, URI_REGEX_STR, REG_EXTENDED);
- if (rc != 0) return LSUP_ERROR;
- ptn_init = true;
- atexit (term_cleanup);
- }
- if (regexec (&ptn, data, 0, NULL, 0) != 0) {
- fprintf (stderr, "Error matching URI pattern.\n");
- return LSUP_VALUE_ERR;
- }
- }
- char *data_tmp = realloc (term->data, strlen (data) + 1);
- if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
- term->data = data_tmp;
- strcpy (term->data, data);
- if (datatype) {
- data_tmp = realloc (term->datatype, strlen (datatype) + 1);
- if (UNLIKELY (!data_tmp)) return LSUP_MEM_ERR;
- term->datatype = data_tmp;
- strcpy (term->datatype, datatype);
- } else {
- free (term->datatype);
- term->datatype = NULL;
- }
- if (lang) {
- // TODO validate language and country code
- //char lsize = 5 ? lang[2] == "-" : 2;
- memcpy (term->lang, lang, LANG_SIZE);
- } else {
- memset (term->lang, 0, LANG_SIZE);
- }
- return LSUP_OK;
- }
- /*
- * This function allocates and returns the following byte sequence:
- *
- * - `sizeof (char)` bytes for the term type;
- * - `LANG_SIZE` bytes for the language tag;
- * - Arbitrary bytes with NUL-terminated strings for data and datatype.
- *
- * The index for `data` is consistently `LANG_SIZE + sizeof (char)`. The
- * index for `datatype` is found by the terminating NULL for `data`.
- *
- * Serialized representations of some RDF terms:
- *
- * <http://hello.org>
- *
- * 0 1 size=19
- * | \x01 | http://hello.org\x00 |
- * type data
- *
- * "hello"
- *
- * 0 1 size=7
- * | \x03 | hello\x00 |
- * type data
- *
- * "hello"^^xsd:string
- *
- * 0 1 7 size=18
- * | \x03 | hello\x00 | xsd:string\x00 |
- * type data datatype
- *
- * (note: the "xsd:" prefix is used for simplification here, it would be
- * normally be a fully qualified URI)
- *
- * "hello"@en-US
- *
- * 0 1 7 18 size=24
- * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00 |
- * type data datatype lang
- */
- LSUP_rc
- LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm)
- {
- size_t size, data_len, datatype_len = 0,
- data_idx = 1, datatype_idx = 0, lang_idx = 0;
- if (UNLIKELY (term == NULL)) return LSUP_NOACTION;
- data_len = strlen (term->data) + 1;
- size = data_idx + data_len;
- if (term->datatype != NULL) {
- datatype_idx = size;
- datatype_len = strlen (term->datatype) + 1;
- size += datatype_len;
- if (strlen (term->lang) > 0) {
- lang_idx = size;
- size += strlen (term->lang) + 1;
- }
- }
- //TRACE ("Serialized term size: %lu", size);
- LSUP_buffer_init (sterm, size, NULL);
- // Copy type.
- memcpy (sterm->addr, &term->type, 1);
- // Copy data.
- memcpy (sterm->addr + data_idx, term->data, data_len);
- if (term->datatype != NULL) {
- // Copy data type.
- memcpy (sterm->addr + datatype_idx, term->datatype, datatype_len);
- // Copy lang tag.
- if (strlen (term->lang) > 0)
- strcpy (sterm->addr + lang_idx, term->lang);
- }
- return LSUP_OK;
- }
- LSUP_rc
- LSUP_term_deserialize (const LSUP_Buffer *sterm, LSUP_Term *term)
- {
- size_t cur;
- char *data, *datatype = NULL;
- langtag lang = "\00";
- char type = ((char*)(sterm->addr))[0];
- cur = 1;
- data = (char*)sterm->addr + cur;
- cur += strlen (data) + 1;
- if (type == LSUP_TERM_LITERAL && cur < sterm->size) {
- datatype = (char*)sterm->addr + cur;
- cur += strlen (datatype) + 1;
- if (strlen (datatype) == 0)
- datatype = NULL;
- if (cur < sterm->size)
- strcpy (lang, sterm->addr + cur);
- }
- return LSUP_term_init (term, type, data, datatype, lang);
- }
- bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
- {
- if (term1->type != term2->type)
- return false;
- if (strcmp (term1->data, term2->data) != 0)
- return false;
- if (term1->type == LSUP_TERM_LITERAL) {
- if ((term1->datatype == NULL) != (term2->datatype == NULL)) // XOR
- return false;
- if (
- term1->datatype != NULL &&
- strcmp (term1->datatype, term2->datatype) != 0)
- return false;
- if ((term1->lang == NULL) != (term2->lang == NULL)) // XOR
- return false;
- if (
- term1->lang != NULL &&
- strcmp (term1->lang, term2->lang) != 0)
- return false;
- }
- return true;
- }
- void LSUP_term_done (LSUP_Term *term)
- {
- if (LIKELY (term->data != NULL)) {
- free (term->data);
- term->data = NULL;
- }
- if (term->datatype != NULL) {
- free (term->datatype);
- term->datatype = NULL;
- }
- }
- void LSUP_term_free (LSUP_Term *term)
- {
- if (LIKELY (term != NULL)) {
- LSUP_term_done (term);
- free (term);
- term = NULL;
- }
- }
- // Extern inline functions.
- LSUP_Key LSUP_term_hash (const LSUP_Term *term);
|