term.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. #ifndef LSUP_TERM_H
  2. #define LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "buffer.h"
  6. // URI parsing regular expression. Conforms to RFC3986.
  7. #define URI_REGEX_STR \
  8. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  9. #define LANG_SIZE 8 // Size in chars of lang tag
  10. // "NULL" triple, a value that is never user-provided. Used to fill deleted
  11. // triples in a keyset.
  12. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
  13. #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
  14. typedef XXH64_hash_t LSUP_TermHash64;
  15. typedef char langtag[LANG_SIZE];
  16. #define TTYPE_TBL \
  17. ENTRY (UNDEFINED, 0) \
  18. ENTRY (URI, 1) \
  19. ENTRY (BNODE, 2) \
  20. ENTRY (LITERAL, 3)
  21. typedef enum LSUP_term_type {
  22. #define ENTRY(a, b) LSUP_TERM_##a = b,
  23. TTYPE_TBL
  24. #undef ENTRY
  25. } LSUP_term_type;
  26. typedef struct LSUP_Term {
  27. LSUP_term_type type;
  28. // This language variable currently supports a 2-digit ISO 639 language
  29. // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
  30. // See https://tools.ietf.org/html/bcp47#section-2.1
  31. langtag lang;
  32. char *datatype;
  33. char *data;
  34. } LSUP_Term;
  35. /** @brief Create a new term.
  36. *
  37. * @param type[in] Term type. One of #LSUP_term_type.
  38. *
  39. * @param data[in] Term data: textual URI, literal value without data type
  40. * or langtag, etc.
  41. *
  42. * @param datatype[in]: data type for literals.
  43. *
  44. * @param lang[in]: language tag for string literals.
  45. *
  46. * @param term[out] Pointer to a new term, which must be freed with
  47. * #LSUP_term_free after use.
  48. *
  49. * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
  50. */
  51. LSUP_Term *
  52. LSUP_term_new(
  53. LSUP_term_type type, const char *data, char *datatype, char *lang);
  54. /** @brief Placeholder term to use with LSUP_term_reset.
  55. */
  56. #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL, NULL)
  57. /** @brief Shortcut to create a URI.
  58. *
  59. * Must be freed with #LSUP_term_free.
  60. *
  61. * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
  62. *
  63. * @param uri[out] The URI to be created.
  64. *
  65. * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
  66. */
  67. inline LSUP_Term *
  68. LSUP_uri_new (const char *data)
  69. {
  70. if (!data) {
  71. uuid_t uuid;
  72. uuid_generate_random (uuid);
  73. uuid_str_t uuid_str;
  74. uuid_unparse_lower (uuid, uuid_str);
  75. char uri[UUID4_URN_SIZE];
  76. snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
  77. data = uri;
  78. }
  79. return LSUP_term_new (LSUP_TERM_URI, data, NULL, NULL);
  80. }
  81. /* @brief Reuse a pre-allocated term structure.
  82. *
  83. * The structure must have been previously created with #LSUP_term_new. It can
  84. * be reinitialized multiple times without freeing it. It must be eventually
  85. * freed with #LSUP_term_free.
  86. */
  87. LSUP_rc
  88. LSUP_term_init(
  89. LSUP_Term *term, LSUP_term_type type,
  90. const char *data, char *datatype, char *lang);
  91. LSUP_Term *
  92. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
  93. LSUP_Buffer *
  94. LSUP_buffer_new_from_term (const LSUP_Term *term);
  95. /**
  96. * @brief Shortcut to initialize a URI.
  97. */
  98. inline LSUP_rc
  99. LSUP_uri_init (LSUP_Term *term, const char *data)
  100. {
  101. if (!data) {
  102. uuid_t uuid;
  103. uuid_generate_random (uuid);
  104. uuid_str_t uuid_str;
  105. uuid_unparse_lower (uuid, uuid_str);
  106. char uri[UUIDSTR_SIZE + 10];
  107. sprintf (uri, "urn:uuid4:%s", uuid_str);
  108. data = uri;
  109. }
  110. return LSUP_term_init (term, LSUP_TERM_URI, data, NULL, NULL);
  111. }
  112. /** @brief Simple ad-hoc serialization function.
  113. *
  114. * The resulting term must be freed with #LSUP_term_free after use.
  115. */
  116. LSUP_rc
  117. LSUP_term_serialize (const LSUP_Term *term, LSUP_Buffer *sterm);
  118. /** @brief Deserialize a buffer into a term.
  119. *
  120. * The buffer must be a well-formed serialization of a term, e.g. as obtained
  121. * by #LSUP_term_serialize.
  122. */
  123. LSUP_rc
  124. LSUP_term_deserialize (const LSUP_Buffer *sterm, LSUP_Term *term);
  125. /** @brief Hash a buffer.
  126. */
  127. inline LSUP_Key
  128. LSUP_term_hash (const LSUP_Term *term)
  129. {
  130. LSUP_Buffer *buf;
  131. if (UNLIKELY (!term)) buf = BUF_DUMMY;
  132. else buf = LSUP_buffer_new_from_term (term);
  133. LSUP_Key key = LSUP_buffer_hash (buf);
  134. LSUP_buffer_free (buf);
  135. return key;
  136. }
  137. /**
  138. * Compare two terms.
  139. */
  140. bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
  141. /*
  142. // TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
  143. inline XXH128_hash_t
  144. LSUP_term_hash128(const LSUP_Term *term);
  145. */
  146. void
  147. LSUP_term_done (LSUP_Term *term);
  148. void
  149. LSUP_term_free (LSUP_Term *term);
  150. #endif