term.h 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. #ifndef LSUP_TERM_H
  2. #define LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "xxhash.h"
  6. #include "buffer.h"
  7. // URI parsing regular expression. Conforms to RFC3986.
  8. #define URI_REGEX_STR \
  9. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  10. #define SEED 0 // TODO Make configurable.
  11. #define LANG_SIZE 8 // Size in chars of lang tag
  12. // "NULL" key, a value that is never user-provided. Used to mark special
  13. // values (e.g. deleted records).
  14. #define NULL_KEY 0
  15. // "NULL" triple, a value that is never user-provided. Used to fill deleted
  16. // triples in a keyset.
  17. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
  18. typedef LSUP_Buffer LSUP_SerTerm;
  19. typedef XXH64_hash_t LSUP_TermHash64;
  20. typedef char langtag[LANG_SIZE];
  21. typedef enum LSUP_term_type {
  22. LSUP_TERM_UNDEFINED,
  23. LSUP_TERM_URI,
  24. LSUP_TERM_BNODE,
  25. LSUP_TERM_LITERAL
  26. } LSUP_term_type;
  27. typedef struct LSUP_Term {
  28. LSUP_term_type type;
  29. // This language variable currently supports a 2-digit ISO 639 language
  30. // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
  31. // See https://tools.ietf.org/html/bcp47#section-2.1
  32. langtag lang;
  33. char *datatype;
  34. char *data;
  35. } LSUP_Term;
  36. /*
  37. * Initialize a pre-allocated term structure.
  38. *
  39. * the structure can be an already initialized term, and can be reused
  40. * without freeing it.
  41. */
  42. LSUP_rc
  43. LSUP_term_init(
  44. LSUP_Term *term, LSUP_term_type type,
  45. const char *data, char *datatype, char *lang);
  46. /**
  47. * @brief Shortcut to initialize a URI.
  48. */
  49. inline LSUP_rc
  50. LSUP_uri_init(LSUP_Term *term, const char *data)
  51. { return LSUP_term_init(term, LSUP_TERM_URI, data, NULL, NULL); }
  52. LSUP_Term *
  53. LSUP_term_new(LSUP_term_type type, const char *data, char *datatype, char *lang);
  54. /**
  55. * @brief Shortcut to create a URI.
  56. */
  57. inline LSUP_Term *
  58. LSUP_uri_new(const char *data)
  59. { return LSUP_term_new(LSUP_TERM_URI, data, NULL, NULL); }
  60. /**
  61. * Generate a random URN with the format: `urn:lsup:<uuid4>`.
  62. */
  63. char *
  64. LSUP_term_gen_random_str();
  65. /** Simple ad-hoc serialization function.
  66. *
  67. * This function allocates and returns the following byte sequence:
  68. *
  69. * - `sizeof(char)` bytes for the term type;
  70. * - `LANG_SIZE` bytes for the language tag;
  71. * - Arbitrary bytes with NUL-terminated strings for data and datatype.
  72. *
  73. * The index for `data` is consistently `LANG_SIZE + sizeof(char)`. The
  74. * index for `datatype` is found by the terminating NULL for `data`.
  75. *
  76. * Serialized representations of some RDF terms:
  77. *
  78. * <http://hello.org>
  79. *
  80. * 0 1 size=19
  81. * | \x01 | http://hello.org\x00 |
  82. * type data
  83. *
  84. * "hello"
  85. *
  86. * 0 1 size=7
  87. * | \x03 | hello\x00 |
  88. * type data
  89. *
  90. * "hello"^^xsd:string
  91. *
  92. * 0 1 7 size=18
  93. * | \x03 | hello\x00 | xsd:string\x00 |
  94. * type data datatype
  95. *
  96. * (note: the "xsd:" prefix is used for simplification here, it would be
  97. * normally be a fully qualified URI)
  98. *
  99. * "hello"@en-US
  100. *
  101. * 0 1 7 18 size=26
  102. * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 |
  103. * type data datatype lang
  104. */
  105. LSUP_rc LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm);
  106. LSUP_rc LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term);
  107. inline LSUP_Key
  108. LSUP_sterm_to_key(const LSUP_SerTerm *sterm)
  109. {
  110. if (sterm == NULL) return NULL_KEY;
  111. return (LSUP_Key)XXH64(sterm->addr, sterm->size, SEED);
  112. }
  113. /**
  114. * Hash a term into a key. If NULL is passed, the result is NULL_KEY.
  115. */
  116. inline LSUP_Key
  117. LSUP_term_to_key(const LSUP_Term *term)
  118. {
  119. if (term == NULL) return NULL_KEY;
  120. LSUP_Buffer sterm_s;
  121. LSUP_Buffer *sterm = &sterm_s;
  122. LSUP_term_serialize(term, sterm);
  123. LSUP_Key key = LSUP_sterm_to_key(sterm);
  124. LSUP_buffer_done(sterm);
  125. return key;
  126. }
  127. /**
  128. * Compare two terms.
  129. */
  130. bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2);
  131. /*
  132. // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
  133. inline XXH128_hash_t
  134. LSUP_term_hash128(const LSUP_Term *term);
  135. */
  136. void
  137. LSUP_term_done(LSUP_Term *term);
  138. void
  139. LSUP_term_free(LSUP_Term *term);
  140. #endif