term.h 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. #ifndef _LSUP_TERM_H
  2. #define _LSUP_TERM_H
  3. #include <assert.h>
  4. #include <regex.h>
  5. #include "uthash.h"
  6. #include "buffer.h"
  7. #include "namespace.h"
  8. #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
  9. /*
  10. * Term types.
  11. */
  12. /* Undefined placeholder or result of an error. Invalid for most operations. */
  13. #define LSUP_TERM_UNDEFINED 0
  14. /* IRI reference. */
  15. #define LSUP_TERM_IRIREF 1
  16. /* Namespace-prefixed IRI reference. */
  17. #define LSUP_TERM_NS_IRIREF 2
  18. /* Literal without language tag. */
  19. #define LSUP_TERM_LITERAL 3
  20. /* Language-tagged string literal. */
  21. #define LSUP_TERM_LT_LITERAL 4
  22. /* Blank node. */
  23. #define LSUP_TERM_BNODE 5
  24. /** @brief Default data type for untyped literals (prefixed IRI).
  25. */
  26. #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string"
  27. /** @brief URI parsing regular expression. Conforms to RFC3986.
  28. */
  29. #define LSUP_URI_REGEX_STR \
  30. "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
  31. /*
  32. * Data types.
  33. */
  34. typedef XXH64_hash_t LSUP_Hash64;
  35. typedef char LSUP_TermType;
  36. typedef char LSUP_LangTag[8];
  37. typedef struct term_t {
  38. char * data; // URI, literal value, or BNode label.
  39. union {
  40. LSUP_Key datatype; // Data type key for LSUP_TERM_LITERAL.
  41. LSUP_LangTag lang; // Lang tag for LSUP_TERM_LT_LITERAL.
  42. LSUP_Key bnode_id; // BNode ID for comparison & skolemization.
  43. LSUP_NSMap * nsm; // NSM handle for prefixed IRI.
  44. };
  45. LSUP_TermType type; // Term type.
  46. } LSUP_Term;
  47. /** @brief Hash cache for data types.
  48. */
  49. struct term_cache_t {
  50. LSUP_Key key;
  51. LSUP_Term * term;
  52. UT_hash_handle hh;
  53. };
  54. typedef struct triple_t {
  55. LSUP_Term *s;
  56. LSUP_Term *p;
  57. LSUP_Term *o;
  58. } LSUP_Triple;
  59. /*
  60. * Extern variables.
  61. */
  62. /** @brief Global term cache.
  63. *
  64. * Stores frequently used terms, e.g. data type URIs.
  65. */
  66. extern struct term_cache_t *LSUP_term_cache;
  67. /** @brief Compiled hash of default literal data type.
  68. */
  69. extern uint32_t LSUP_default_dtype_key;
  70. /** @brief URI validation pattern, compiled in #LSUP_init().
  71. */
  72. extern regex_t *LSUP_uri_ptn;
  73. /** @brief Default literal data type URI.
  74. *
  75. * Literal terms created with undefined data type will have it set to this
  76. * URI implicitly.
  77. */
  78. extern LSUP_Term *LSUP_default_datatype;
  79. /*
  80. * Function prototypes.
  81. */
  82. /** @brief Create a new term.
  83. *
  84. * @param type[in] Term type. One of #LSUP_TermType.
  85. *
  86. * @param data[in] Term data: textual URI, literal value without data type
  87. * or langtag, etc.
  88. *
  89. * @param metadata[in]: language tag (LSUP_LangTag) for language-tagged
  90. * literals; or data type (LSUP_Term *) for other literals. It may be NULL.
  91. *
  92. * @return New term, which must be freed with #LSUP_term_free after use; or
  93. * NULL on error.
  94. */
  95. LSUP_Term *
  96. LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
  97. /** @brief Placeholder term to use with LSUP_term_reset.
  98. */
  99. #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
  100. /** @brief Shortcut to create a URI.
  101. *
  102. * Must be freed with #LSUP_term_free.
  103. *
  104. * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
  105. *
  106. * @param uri[out] The URI to be created.
  107. *
  108. * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
  109. */
  110. inline LSUP_Term *
  111. LSUP_uri_new (const char *data)
  112. {
  113. if (!data) {
  114. uuid_t uuid;
  115. uuid_generate_random (uuid);
  116. uuid_str_t uuid_str;
  117. uuid_unparse_lower (uuid, uuid_str);
  118. char uri[UUID4_URN_SIZE];
  119. snprintf (uri, UUID4_URN_SIZE, "urn:uuid4:%s", uuid_str);
  120. data = uri;
  121. }
  122. return LSUP_term_new (LSUP_TERM_IRIREF, data, NULL);
  123. }
  124. /* @brief Initialize or reuse a pre-allocated term structure.
  125. *
  126. * The structure must have been previously created with #LSUP_term_new. It can
  127. * be reinitialized multiple times without freeing it. It must be eventually
  128. * freed with #LSUP_term_free.
  129. */
  130. LSUP_rc
  131. LSUP_term_init(
  132. LSUP_Term *term, LSUP_TermType type,
  133. const char *data, void *metadata);
  134. /** @brief Deserialize a buffer into a term.
  135. *
  136. * @param[in] sterm Buffer to convert into a term. It must be a valid
  137. * serialized term from store or obtained with #LSUP_term_serialize().
  138. *
  139. * @return New term handle. It must be freed with #LSUP_term_free().
  140. */
  141. LSUP_Term *
  142. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
  143. /** @brief Serialize a term into a buffer.
  144. *
  145. * @param[in] sterm Term to convert into a buffer.
  146. *
  147. * @return New buffer handle. It must be freed with #LSUP_buffer_free().
  148. */
  149. LSUP_Buffer *
  150. LSUP_term_serialize (const LSUP_Term *term);
  151. /**
  152. * @brief Shortcut to initialize a URI.
  153. */
  154. LSUP_rc
  155. LSUP_uri_init (LSUP_Term *term, const char *data);
  156. /** @brief Hash a buffer.
  157. */
  158. LSUP_Key
  159. LSUP_term_hash (const LSUP_Term *term);
  160. /**
  161. * Compare two terms.
  162. */
  163. bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2);
  164. void
  165. LSUP_term_done (LSUP_Term *term);
  166. void
  167. LSUP_term_free (LSUP_Term *term);
  168. /** @brief Create a new triple from three terms.
  169. *
  170. * TODO Term types are not validated at the moment.
  171. *
  172. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  173. *
  174. * @param[in] p Triple predicate. It must be an IRIRef.
  175. *
  176. * @param[in] o Triple object.
  177. *
  178. */
  179. LSUP_Triple *
  180. LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  181. /** @brief Dummy triple with NULL slots. It is not a valid triple.
  182. */
  183. #define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL)
  184. LSUP_Triple *
  185. LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo);
  186. LSUP_BufferTriple *
  187. LSUP_triple_serialize (const LSUP_Triple *spo);
  188. /** @brief Initialize internal term pointers in a heap-allocated triple.
  189. *
  190. * NOTE: the term structures are not copied. If the triple is freed with
  191. * #LSUP_triple_free(), the originally provided terms are freed too.
  192. *
  193. * @param spo[in] Triple pointer to initialize.
  194. */
  195. LSUP_rc
  196. LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  197. /** @brief Free the internal pointers of a triple.
  198. *
  199. * @param spo[in] Triple to be freed.
  200. */
  201. void
  202. LSUP_triple_done (LSUP_Triple *spo);
  203. /** @brief Free a triple and all its internal pointers.
  204. *
  205. * NOTE: If the term pointers are not to be freed (e.g. they are owned by a
  206. * back end), use a simple free(spo) instead of this.
  207. *
  208. * @param spo[in] Triple to be freed.
  209. */
  210. void
  211. LSUP_triple_free (LSUP_Triple *spo);
  212. /** @brief Get triple by term position.
  213. *
  214. * Useful for looping over all terms.
  215. *
  216. * @param trp[in] Triple pointer.
  217. *
  218. * @param n[in] A number between 0÷2.
  219. *
  220. * @return Corresponding triple term or NULL if n is out of range.
  221. */
  222. inline LSUP_Term *
  223. LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n)
  224. {
  225. if (n == TRP_POS_S) return trp->s;
  226. if (n == TRP_POS_P) return trp->p;
  227. if (n == TRP_POS_O) return trp->o;
  228. return NULL;
  229. }
  230. /** @brief Hash a triple.
  231. *
  232. * TODO This doesn't handle blank nodes correctly.
  233. */
  234. inline LSUP_Key
  235. LSUP_triple_hash (const LSUP_Triple *trp)
  236. {
  237. LSUP_BufferTriple *strp = LSUP_triple_serialize (trp);
  238. LSUP_Key hash = LSUP_btriple_hash (strp);
  239. LSUP_btriple_free (strp);
  240. return hash;
  241. }
  242. /** @brief Add an identifier to the term cache.
  243. *
  244. * @param[in] key Hash of the inserted term.
  245. *
  246. * @param[in] term Term to insert.
  247. */
  248. LSUP_rc
  249. LSUP_tcache_add (const LSUP_Key key, LSUP_Term *term);
  250. /** @brief Get an identifier from the cache.
  251. *
  252. * @param[in] key Key for the queried term.
  253. *
  254. * @return The retrieved term if found, or NULL. The string must not be
  255. * modified or freed.
  256. */
  257. const LSUP_Term *
  258. LSUP_tcache_get (const LSUP_Key key);
  259. #endif