term.h 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #ifndef LSUP_TERM_H
  2. #define LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "xxhash.h"
  6. #include "buffer.h"
  7. // URI parsing regular expression. Conforms to RFC3986.
  8. #define URI_REGEX_STR \
  9. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  10. #define SEED 0 // TODO Make configurable.
  11. #define LANG_SIZE 8 // Size in chars of lang tag
  12. // "NULL" key, a value that is never user-provided. Used to mark special
  13. // values (e.g. deleted records).
  14. #define NULL_KEY 0
  15. // "NULL" triple, a value that is never user-provided. Used to fill deleted
  16. // triples in a keyset.
  17. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
  18. typedef XXH64_hash_t LSUP_TermHash64;
  19. typedef char langtag[LANG_SIZE];
  20. typedef enum LSUP_term_type {
  21. LSUP_TERM_UNDEFINED,
  22. LSUP_TERM_URI,
  23. LSUP_TERM_BNODE,
  24. LSUP_TERM_LITERAL
  25. } LSUP_term_type;
  26. typedef struct LSUP_Term {
  27. LSUP_term_type type;
  28. // This language variable currently supports a 2-digit ISO 639 language
  29. // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
  30. // See https://tools.ietf.org/html/bcp47#section-2.1
  31. langtag lang;
  32. char *datatype;
  33. char *data;
  34. } LSUP_Term;
  35. /** @brief Create a new term.
  36. *
  37. * @param type[in] Term type. One of #LSUP_term_type.
  38. *
  39. * @param data[in] Term data: textual URI, literal value without data type
  40. * or langtag, etc.
  41. *
  42. * @param datatype[in]: data type for literals.
  43. *
  44. * @param lang[in]: language tag for string literals.
  45. *
  46. * @param term[out] Pointer to a new term, which must be freed with
  47. * #LSUP_term_free after use.
  48. *
  49. * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
  50. */
  51. LSUP_rc
  52. LSUP_term_new(
  53. LSUP_term_type type, const char *data, char *datatype, char *lang,
  54. LSUP_Term **term);
  55. /** @brief Shortcut to create a URI.
  56. *
  57. * Must be freed with #LSUP_term_free.
  58. *
  59. * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
  60. *
  61. * @param uri[out] The URI to be created.
  62. *
  63. * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
  64. */
  65. inline LSUP_rc
  66. LSUP_uri_new(const char *data, LSUP_Term **uri)
  67. {
  68. if (!data) {
  69. uuid_t uuid;
  70. uuid_generate_random(uuid);
  71. uuid_str_t uuid_str;
  72. uuid_unparse_lower(uuid, uuid_str);
  73. char uri[UUIDSTR_SIZE + 10];
  74. sprintf(uri, "urn:uuid4:%s", uuid_str);
  75. data = uri;
  76. }
  77. return LSUP_term_new(LSUP_TERM_URI, data, NULL, NULL, uri);
  78. }
  79. /* @brief Reuse a pre-allocated term structure.
  80. *
  81. * The structure must have been previously created with #LSUP_term_new. It can
  82. * be reinitialized multiple times without freeing it. It must be eventually
  83. * freed with #LSUP_term_free.
  84. */
  85. LSUP_rc
  86. LSUP_term_reset(
  87. LSUP_Term *term, LSUP_term_type type,
  88. const char *data, char *datatype, char *lang);
  89. /**
  90. * @brief Shortcut to initialize a URI.
  91. */
  92. inline LSUP_rc
  93. LSUP_uri_reset(LSUP_Term *term, const char *data)
  94. { return LSUP_term_reset(term, LSUP_TERM_URI, data, NULL, NULL); }
  95. /** @brief Simple ad-hoc serialization function.
  96. *
  97. * The resulting term must be freed with #LSUP_term_free after use.
  98. */
  99. LSUP_rc LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer **sterm);
  100. LSUP_rc LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term **term);
  101. inline LSUP_Key
  102. LSUP_sterm_to_key(const LSUP_Buffer *sterm)
  103. {
  104. if (UNLIKELY (sterm == NULL)) return NULL_KEY;
  105. return XXH64(sterm->addr, sterm->size, SEED);
  106. }
  107. /** @brief Hash a term into a key.
  108. *
  109. * If NULL is passed, the result is NULL_KEY.
  110. */
  111. inline LSUP_Key
  112. LSUP_term_to_key(const LSUP_Term *term)
  113. {
  114. if (UNLIKELY (term == NULL)) return NULL_KEY;
  115. LSUP_Buffer *sterm;
  116. LSUP_term_serialize(term, &sterm);
  117. LSUP_Key key = XXH64(sterm->addr, sterm->size, SEED);
  118. LSUP_buffer_free(sterm);
  119. return key;
  120. }
  121. /**
  122. * Compare two terms.
  123. */
  124. bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2);
  125. /*
  126. // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
  127. inline XXH128_hash_t
  128. LSUP_term_hash128(const LSUP_Term *term);
  129. */
  130. void
  131. LSUP_term_free(LSUP_Term *term);
  132. #endif