term.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. #ifndef _LSUP_TERM_H
  2. #define _LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "uthash.h"
  6. #include "buffer.h"
  7. #include "namespace.h"
  8. // "NULL" triple, a value that is never user-provided. Used to fill deleted
  9. // triples in a keyset.
  10. #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
  11. #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
  12. /*
  13. * Term types.
  14. */
  15. /* Undefined placeholder or result of an error. Invalid for most operations. */
  16. #define LSUP_TERM_UNDEFINED 0
  17. /* IRI reference. */
  18. #define LSUP_TERM_IRIREF 1
  19. /* Namespace-prefixed IRI reference. */
  20. #define LSUP_TERM_NS_IRIREF 2
  21. /* Literal without language tag. */
  22. #define LSUP_TERM_LITERAL 3
  23. /* Language-tagged string literal. */
  24. #define LSUP_TERM_LT_LITERAL 4
  25. /* Blank node. */
  26. #define LSUP_TERM_BNODE 5
  27. /*
  28. * In-term identifier types.
  29. */
  30. /* Namespace prefix string. */
  31. #define LSUP_ID_NS 10
  32. /* Data type IRI. */
  33. #define LSUP_ID_DATATYPE 11
  34. /* Language tag string. */
  35. #define LSUP_ID_LANG 12
  36. /* Temporary blank node ID. TODO implement. */
  37. #define LSUP_ID_BNODE 13
  38. /** @brief Default data type for untyped literals (prefixed IRI).
  39. */
  40. #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string"
  41. /** @brief URI parsing regular expression. Conforms to RFC3986.
  42. */
  43. #define LSUP_URI_REGEX_STR \
  44. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  45. /*
  46. * Data types.
  47. */
  48. typedef XXH64_hash_t LSUP_Hash64;
  49. typedef char LSUP_TermType;
  50. typedef struct term_t {
  51. char * data; // URI, literal value, or BNode label.
  52. union {
  53. uint32_t datatype; // Data type hash for LSUP_TERM_LITERAL.
  54. uint32_t lang; // Lang tag hash for LSUP_TERM_LT_LITERAL.
  55. uint32_t bnode_id; // Blank node ID. TODO implement.
  56. LSUP_NSMap * nsm; // NSM handle for prefixed IRI.
  57. };
  58. LSUP_TermType type; // Term type.
  59. } LSUP_Term;
  60. /** @brief Hash cache for lang tags and data types.
  61. */
  62. typedef struct id_cache_t {
  63. uint32_t key;
  64. char * data;
  65. UT_hash_handle hh;
  66. } IDCache;
  67. /*
  68. * Extern variables.
  69. */
  70. /** @brief Global ID cache.
  71. *
  72. * Map of internal term identifiers, such as literal data types, language tags
  73. * and BNode identifiers.
  74. */
  75. extern IDCache *LSUP_id_cache;
  76. /** @brief Compiled hash of default literal data type.
  77. */
  78. extern uint32_t LSUP_default_dtype_key;
  79. /** @brief URI validation pattern, compiled in #LSUP_init().
  80. */
  81. extern regex_t *LSUP_uri_ptn;
  82. /*
  83. * Function prototypes.
  84. */
  85. /** @brief Create a new term.
  86. *
  87. * @param type[in] Term type. One of #LSUP_TermType.
  88. *
  89. * @param data[in] Term data: textual URI, literal value without data type
  90. * or langtag, etc.
  91. *
  92. * @param metadata[in]: language tag for language-tagged literals or data type
  93. * for other literals.
  94. *
  95. * @param term[out] Pointer to a new term, which must be freed with
  96. * #LSUP_term_free after use.
  97. *
  98. * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
  99. */
  100. LSUP_Term *
  101. LSUP_term_new (LSUP_TermType type, const char *data, const char *metadata);
  102. /** @brief Placeholder term to use with LSUP_term_reset.
  103. */
  104. #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
  105. /** @brief Shortcut to create a URI.
  106. *
  107. * Must be freed with #LSUP_term_free.
  108. *
  109. * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
  110. *
  111. * @param uri[out] The URI to be created.
  112. *
  113. * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
  114. */
  115. inline LSUP_Term *
  116. LSUP_uri_new (const char *data)
  117. {
  118. if (!data) {
  119. uuid_t uuid;
  120. uuid_generate_random (uuid);
  121. uuid_str_t uuid_str;
  122. uuid_unparse_lower (uuid, uuid_str);
  123. char uri[UUID4_URN_SIZE];
  124. snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
  125. data = uri;
  126. }
  127. return LSUP_term_new (LSUP_TERM_IRIREF, data, NULL);
  128. }
  129. /* @brief Initialize or reuse a pre-allocated term structure.
  130. *
  131. * The structure must have been previously created with #LSUP_term_new. It can
  132. * be reinitialized multiple times without freeing it. It must be eventually
  133. * freed with #LSUP_term_free.
  134. */
  135. LSUP_rc
  136. LSUP_term_init(
  137. LSUP_Term *term, LSUP_TermType type,
  138. const char *data, const char *metadata);
  139. LSUP_Term *
  140. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
  141. LSUP_Buffer *
  142. LSUP_buffer_new_from_term (const LSUP_Term *term);
  143. /**
  144. * @brief Shortcut to initialize a URI.
  145. */
  146. LSUP_rc
  147. LSUP_uri_init (LSUP_Term *term, const char *data);
  148. /** @brief Hash a buffer.
  149. */
  150. inline LSUP_Key
  151. LSUP_term_hash (const LSUP_Term *term)
  152. {
  153. LSUP_Buffer *buf;
  154. if (UNLIKELY (!term)) buf = BUF_DUMMY;
  155. else buf = LSUP_buffer_new_from_term (term);
  156. LSUP_Key key = LSUP_buffer_hash (buf);
  157. LSUP_buffer_free (buf);
  158. return key;
  159. }
  160. /**
  161. * Compare two terms.
  162. */
  163. bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
  164. void
  165. LSUP_term_done (LSUP_Term *term);
  166. void
  167. LSUP_term_free (LSUP_Term *term);
  168. /** @brief Add an identifier to the term cache.
  169. *
  170. * @param[in] key 32-bit hash of the inserted term.
  171. *
  172. * @param[in] data Term to insert.
  173. */
  174. LSUP_rc
  175. LSUP_tcache_add_id (const uint32_t key, const char *data);
  176. /** @brief Get an identifier from the cache.
  177. *
  178. * @param[in] key Key for the queried term.
  179. *
  180. * @return The retrieved term if found, or NULL. The string must not be
  181. * modified or freed.
  182. */
  183. const char *
  184. LSUP_tcache_get_id (const uint32_t key);
  185. #endif