term.h 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. #ifndef LSUP_TERM_H
  2. #define LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "xxhash.h"
  6. #include "buffer.h"
  7. // URI parsing regular expression. Conforms to RFC3986.
  8. #define URI_REGEX_STR \
  9. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  10. #define SEED 0 // TODO Make configurable.
  11. #define LANG_SIZE 8 // Size in chars of lang tag
  12. // "NULL" key, a value that is never user-provided. Used to mark special
  13. // values (e.g. deleted records).
  14. #define NULL_KEY 0
  15. // "NULL" triple, a value that is never user-provided. Used to fill deleted
  16. // triples in a keyset.
  17. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
  18. #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
  19. typedef XXH64_hash_t LSUP_TermHash64;
  20. typedef char langtag[LANG_SIZE];
  21. #define TTYPE_TABLE \
  22. ENTRY (UNDEFINED, 0) \
  23. ENTRY (URI, 1) \
  24. ENTRY (BNODE, 2) \
  25. ENTRY (LITERAL, 3)
  26. typedef enum LSUP_term_type {
  27. #define ENTRY(a, b) LSUP_TERM_##a = b,
  28. TTYPE_TABLE
  29. #undef ENTRY
  30. } LSUP_term_type;
  31. typedef struct LSUP_Term {
  32. LSUP_term_type type;
  33. // This language variable currently supports a 2-digit ISO 639 language
  34. // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
  35. // See https://tools.ietf.org/html/bcp47#section-2.1
  36. langtag lang;
  37. char *datatype;
  38. char *data;
  39. } LSUP_Term;
  40. /** @brief Create a new term.
  41. *
  42. * @param type[in] Term type. One of #LSUP_term_type.
  43. *
  44. * @param data[in] Term data: textual URI, literal value without data type
  45. * or langtag, etc.
  46. *
  47. * @param datatype[in]: data type for literals.
  48. *
  49. * @param lang[in]: language tag for string literals.
  50. *
  51. * @param term[out] Pointer to a new term, which must be freed with
  52. * #LSUP_term_free after use.
  53. *
  54. * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
  55. */
  56. LSUP_Term *
  57. LSUP_term_new(
  58. LSUP_term_type type, const char *data, char *datatype, char *lang);
  59. /** @brief Placeholder term to use with LSUP_term_reset.
  60. */
  61. #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL, NULL)
  62. /** @brief Shortcut to create a URI.
  63. *
  64. * Must be freed with #LSUP_term_free.
  65. *
  66. * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
  67. *
  68. * @param uri[out] The URI to be created.
  69. *
  70. * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
  71. */
  72. inline LSUP_Term *
  73. LSUP_uri_new (const char *data)
  74. {
  75. if (!data) {
  76. uuid_t uuid;
  77. uuid_generate_random (uuid);
  78. uuid_str_t uuid_str;
  79. uuid_unparse_lower (uuid, uuid_str);
  80. char uri[UUID4_URN_SIZE];
  81. snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
  82. data = uri;
  83. }
  84. return LSUP_term_new (LSUP_TERM_URI, data, NULL, NULL);
  85. }
  86. /* @brief Reuse a pre-allocated term structure.
  87. *
  88. * The structure must have been previously created with #LSUP_term_new. It can
  89. * be reinitialized multiple times without freeing it. It must be eventually
  90. * freed with #LSUP_term_free.
  91. */
  92. LSUP_rc
  93. LSUP_term_init(
  94. LSUP_Term *term, LSUP_term_type type,
  95. const char *data, char *datatype, char *lang);
  96. LSUP_Term *
  97. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
  98. LSUP_Buffer *
  99. LSUP_buffer_new_from_term (const LSUP_Term *term);
  100. /**
  101. * @brief Shortcut to initialize a URI.
  102. */
  103. inline LSUP_rc
  104. LSUP_uri_init (LSUP_Term *term, const char *data)
  105. {
  106. if (!data) {
  107. uuid_t uuid;
  108. uuid_generate_random (uuid);
  109. uuid_str_t uuid_str;
  110. uuid_unparse_lower (uuid, uuid_str);
  111. char uri[UUIDSTR_SIZE + 10];
  112. sprintf (uri, "urn:uuid4:%s", uuid_str);
  113. data = uri;
  114. }
  115. return LSUP_term_init (term, LSUP_TERM_URI, data, NULL, NULL);
  116. }
  117. /** @brief Simple ad-hoc serialization function.
  118. *
  119. * The resulting term must be freed with #LSUP_term_free after use.
  120. */
  121. LSUP_rc
  122. LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm);
  123. /** @brief Deserialize a buffer into a term.
  124. *
  125. * The buffer must be a well-formed serialization of a term, e.g. as obtained
  126. * by #LSUP_term_serialize.
  127. */
  128. LSUP_rc
  129. LSUP_term_deserialize (const LSUP_Buffer *sterm, LSUP_Term *term);
  130. inline LSUP_Key
  131. LSUP_sterm_to_key (const LSUP_Buffer *sterm)
  132. {
  133. if (UNLIKELY (sterm == NULL)) return NULL_KEY;
  134. return XXH64(sterm->addr, sterm->size, SEED);
  135. }
  136. /** @brief Hash a term into a key.
  137. *
  138. * If NULL is passed, the result is NULL_KEY.
  139. */
  140. inline LSUP_Key
  141. LSUP_term_to_key (const LSUP_Term *term)
  142. {
  143. if (UNLIKELY (term == NULL)) return NULL_KEY;
  144. LSUP_Buffer *sterm = LSUP_buffer_new_from_term (term);
  145. LSUP_Key key = XXH64(sterm->addr, sterm->size, SEED);
  146. LSUP_buffer_free (sterm);
  147. return key;
  148. }
  149. /**
  150. * Compare two terms.
  151. */
  152. bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
  153. /*
  154. // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
  155. inline XXH128_hash_t
  156. LSUP_term_hash128(const LSUP_Term *term);
  157. */
  158. void
  159. LSUP_term_done (LSUP_Term *term);
  160. void
  161. LSUP_term_free (LSUP_Term *term);
  162. #endif