term.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649
  1. #ifndef _LSUP_TERM_H
  2. #define _LSUP_TERM_H
  3. #include <assert.h>
  4. #include "buffer.h"
  5. #include "namespace.h"
  6. #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
  7. // Some common RDF term values.
  8. #define LSUP_RDF_TYPE "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
  9. #define LSUP_RDF_TYPE_NS "rdf:type"
  10. /// Default data type for untyped literals (prefixed IRI).
  11. #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string"
  12. #define DEFAULT_DTYPE_NS "xsd:string"
  13. /*
  14. * Data types.
  15. */
  16. /// Language tag, currently restricted to 7 characters.
  17. typedef char LSUP_LangTag[8];
  18. /// Term type.
  19. typedef enum {
  20. LSUP_TERM_UNDEFINED = 0,/**<
  21. * Undefined placeholder or result of an error.
  22. * Invalid for most operations.
  23. */
  24. LSUP_TERM_IRIREF, ///< IRI reference.
  25. LSUP_TERM_NS_IRIREF, ///< Namespace-prefixed IRI reference.
  26. LSUP_TERM_LITERAL, ///< Literal without language tag.
  27. LSUP_TERM_LT_LITERAL, ///< Language-tagged string literal.
  28. LSUP_TERM_BNODE, ///< Blank node.
  29. } LSUP_TermType;
  30. /** @brief IRI information.
  31. *
  32. * See regex matching group for #LSUP_URI_REGEX_STR for more information.
  33. */
  34. typedef struct iri_info_t LSUP_IRIInfo;
  35. typedef struct link_map_iter LSUP_LinkMapIterator;
  36. /// RDF term.
  37. typedef struct term_t {
  38. char * data; // URI, literal value, or BNode label.
  39. union {
  40. struct term_t * datatype; // Data type IRI for LSUP_TERM_LITERAL.
  41. LSUP_LangTag lang; // Lang tag for LSUP_TERM_LT_LITERAL.
  42. LSUP_Key bnode_id; // BNode ID for comparison & skolemization.
  43. LSUP_IRIInfo * iri_info; // IRI information structure.
  44. };
  45. LSUP_TermType type; // Term type.
  46. } LSUP_Term;
  47. /** @brief Shorthand to test if a term is a IRI of any kind.
  48. */
  49. #define LSUP_IS_IRI(term) \
  50. ((term)->type == LSUP_TERM_IRIREF || (term)->type == LSUP_TERM_NS_IRIREF)
  51. /** @brief Shorthand to test if a term is a literal of any kind.
  52. */
  53. #define LSUP_IS_LITERAL(term) \
  54. ((term)->type == LSUP_TERM_LITERAL || (term)->type == LSUP_TERM_LT_LITERAL)
  55. /** @brief Whether the environment is already initialized.
  56. */
  57. #define LSUP_IS_INIT (LSUP_default_datatype != NULL)
  58. /** @brief RDF triple.
  59. *
  60. * This represents a complete RDF statement. Triple terms can be accessed
  61. * directly via the `s`, `p`, `o` members or sequentially via
  62. * #LSUP_triple_pos().
  63. */
  64. typedef struct triple_t {
  65. LSUP_Term *s; ///< Subject.
  66. LSUP_Term *p; ///< Predicate.
  67. LSUP_Term *o; ///< Object.
  68. } LSUP_Triple;
  69. /// Link type.
  70. typedef enum {
  71. LSUP_LINK_INBOUND, ///< Inbound link (sp).
  72. LSUP_LINK_OUTBOUND, ///< Outbound link (po).
  73. LSUP_LINK_EDGE, ///< Edge link (so).
  74. } LSUP_LinkType;
  75. /** @brief The immediate neighborhood of terms connected to a term.
  76. *
  77. * This is a hash map whose each term is related to a set of one or more other
  78. * terms. The hash map is inside an opaque handle and is manipulated via the
  79. * `LSUP_link_map_*` functions.
  80. *
  81. * If the type of the link map is `LSUP_LINK_INBOUND`, the map keys
  82. * represent predicates and the sets related to them are the objects, and the
  83. * term associated to the link map is the object; if
  84. * `LSUP_LINK_OUTBOUND`, the keys represent predicates, the related sets
  85. * objects, and the associated term is the subject. If `LSUP_LINK_EDGE`, the
  86. * keys represent subjects and the related sets objects, and the associated
  87. * term is the predicate.
  88. */
  89. typedef struct link_map LSUP_LinkMap;
  90. /** @brief a set of unique terms.
  91. *
  92. * This is used to bulk-add terms to a link map.
  93. */
  94. typedef struct hashmap LSUP_TermSet;
  95. /*
  96. * External variables.
  97. */
  98. /** @brief Compiled hash of default literal data type.
  99. */
  100. extern uint32_t LSUP_default_dtype_key;
  101. /** @brief Default literal data type URI.
  102. *
  103. * Literal terms created with undefined data type will have it set to this
  104. * URI implicitly.
  105. */
  106. extern LSUP_Term *LSUP_default_datatype;
  107. /** @brief Global term cache.
  108. *
  109. * Stores frequently used terms, e.g. data type URIs.
  110. */
  111. extern LSUP_TermSet *LSUP_term_cache;
  112. /*
  113. * API functions.
  114. */
  115. /** @brief Create a new term.
  116. *
  117. * This is a generic function; it is recommended to use specialized functions
  118. * such as #LSUP_term_new(), #LSUP_literal_new(), etc. as they have strict type
  119. * checks for the metadata parameter.
  120. *
  121. * @param type[in] Term type. One of #LSUP_TermType.
  122. *
  123. * @param data[in] Term data: textual URI, literal value without data type
  124. * or langtag, etc. It may be NULL for IRI refs and BNodes, in which case a
  125. * random identifier is generated.
  126. *
  127. * @param metadata[in] Namespace map (LSUP_NSMap *) for IRI refs; language tag
  128. * (LSUP_LangTag *) for language-tagged literals; or data type (LSUP_Term *)
  129. * for other literals. It may be NULL.
  130. *
  131. * @return New term, which must be freed with #LSUP_term_free after use; or
  132. * NULL on error.
  133. */
  134. LSUP_Term *
  135. LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
  136. /** @brief Placeholder term to use with LSUP_term_reset.
  137. */
  138. #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
  139. /** @brief Shortcut to create an IRI reference.
  140. *
  141. * Must be freed with #LSUP_term_free.
  142. *
  143. * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
  144. * This cannot be NULL if the nsm parameter is not NULL.
  145. *
  146. * @param nsm[in] Namespace map. If not NULL, a namespace-prefixed
  147. * (#LSUP_TERM_NS_IRIREF) is created, otherwise a regular one
  148. * (#LSUP_TERM_IRIREF).
  149. *
  150. * @return same as #LSUP_term_new().
  151. */
  152. inline LSUP_Term *
  153. LSUP_iriref_new (const char *data, LSUP_NSMap *nsm)
  154. {
  155. return (
  156. nsm ? LSUP_term_new (LSUP_TERM_NS_IRIREF, data, nsm) :
  157. LSUP_term_new (LSUP_TERM_IRIREF, data, NULL));
  158. }
  159. /** @brief Create a new absolute IRI from a path relative to a root IRI.
  160. *
  161. * The term is always of type LSUP_TERM_IRIREF (i.e. not namespace-prefixed).
  162. *
  163. * If the provided IRI is already a fully qualified IRI (i.e. it has a prefix)
  164. * the result is semantically identical to the input.
  165. *
  166. * If the relative IRI begins with a '/', the resulting IRI is relative to the
  167. * web root of the root IRI. I.e. if a root IRI has a path after the webroot,
  168. * it is ignored.
  169. *
  170. * Otherwise, the resulting IRI is relative to the full root string.
  171. *
  172. * @param[in] root Root IRI that the new IRI should be relative to.
  173. *
  174. * @param[in] iri Term with an IRI relative to the webroot.
  175. *
  176. * @return New absolute IRI, or NULL if either term is not an IRI.
  177. */
  178. LSUP_Term *
  179. LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri);
  180. /** @brief Create a new relative IRI from an absolute IRI and a web root IRI.
  181. *
  182. * This works with namespace-prefixed IRIs and returns a term of the same type
  183. * as the input.
  184. *
  185. * @param[in] root Root IRI that the new IRI should be relative to.
  186. *
  187. * @param[in] iri Full IRI.
  188. *
  189. * @return New IRI, or NULL if either term is not an IRI. If the input IRI is
  190. * not a path under the root IRI, the result will be identical to the input.
  191. */
  192. LSUP_Term *
  193. LSUP_iriref_relative (const LSUP_Term *root, const LSUP_Term *iri);
  194. /** @brief Shortcut to create a literal term.
  195. *
  196. * Must be freed with #LSUP_term_free.
  197. *
  198. * @param data[in] The literal string.
  199. *
  200. * @param datatype[in] Data type URI string. If NULL, the default data type
  201. * (xsd:string) is used. The new term takes ownership of the pointer.
  202. *
  203. * @return same as #LSUP_term_new().
  204. */
  205. inline LSUP_Term *
  206. LSUP_literal_new (const char *data, LSUP_Term *datatype)
  207. { return LSUP_term_new (LSUP_TERM_LITERAL, data, datatype); }
  208. /** @brief Shortcut to create a language-tagged literal term.
  209. *
  210. * Must be freed with #LSUP_term_free.
  211. *
  212. * @param data[in] The literal string.
  213. *
  214. * @param lang[in] Language tag string.
  215. *
  216. * @return same as #LSUP_term_new().
  217. */
  218. inline LSUP_Term *
  219. LSUP_lt_literal_new (const char *data, char *lang)
  220. { return LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang); }
  221. /** @brief Shortcut to create a blank node.
  222. *
  223. * Must be freed with #LSUP_term_free.
  224. *
  225. * @param data[in] The BNode identifier.
  226. *
  227. * @return same as #LSUP_term_new().
  228. */
  229. inline LSUP_Term *
  230. LSUP_bnode_new (const char *data)
  231. { return LSUP_term_new (LSUP_TERM_BNODE, data, NULL); }
  232. /** @brief Copy a term.
  233. *
  234. * @param[in] src The term to copy.
  235. *
  236. * @return A new duplicate term handle.
  237. */
  238. LSUP_Term *
  239. LSUP_term_copy (const LSUP_Term *src);
  240. /** @brief Deserialize a buffer into a term.
  241. *
  242. * @param[in] sterm Buffer to convert into a term. It must be a valid
  243. * serialized term from store or obtained with #LSUP_term_serialize().
  244. *
  245. * @return New term handle. It must be freed with #LSUP_term_free().
  246. */
  247. LSUP_Term *
  248. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
  249. /** @brief Serialize a term into a buffer.
  250. *
  251. * @param[in] sterm Term to convert into a buffer.
  252. *
  253. * @return New buffer handle. It must be freed with #LSUP_buffer_free().
  254. */
  255. LSUP_Buffer *
  256. LSUP_term_serialize (const LSUP_Term *term);
  257. /** @brief Hash a buffer.
  258. */
  259. LSUP_Key
  260. LSUP_term_hash (const LSUP_Term *term);
  261. /** @brief Compare two terms.
  262. *
  263. * The terms evaluate as equal if their hashes are equal—i.e. if they are
  264. * semantically equivalent.
  265. */
  266. inline bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
  267. { return LSUP_term_hash (term1) == LSUP_term_hash (term2); }
  268. void
  269. LSUP_term_free (LSUP_Term *term);
  270. /** @brief Namespace map of a IRI ref.
  271. *
  272. * @param[in] iri IRI reference handle.
  273. *
  274. * @return A pointer to the namespace map associated with the IRI. It is
  275. * freed at program shutdown.
  276. */
  277. LSUP_NSMap *
  278. LSUP_iriref_nsm (const LSUP_Term *iri);
  279. /** @brief Get the prefix portion of a IRI ref.
  280. *
  281. * @param[in] iri IRI reference handle.
  282. *
  283. * @return String containing the protocol and domain name part of the IRI. It
  284. * should be freed after use.
  285. */
  286. char *
  287. LSUP_iriref_prefix (const LSUP_Term *iri);
  288. /** @brief Get the path portion of a IRI ref.
  289. *
  290. * @param[in] iri IRI reference handle.
  291. *
  292. * @return String containing the path of the IRI relative to the web root. For
  293. * a URN, such as `urn:myns:myid`, it would be `myns:myid`. This string should
  294. * be freed after use.
  295. */
  296. char *
  297. LSUP_iriref_path (const LSUP_Term *iri);
  298. /** @brief Get the fragment portion of a IRI ref.
  299. *
  300. * @param[in] iri IRI reference handle.
  301. *
  302. * @return String containing the fragment part of the IRI, or NULL if the IRI
  303. * contains no fragment. It should be freed after use.
  304. */
  305. char *
  306. LSUP_iriref_frag (const LSUP_Term *iri);
  307. /*
  308. * TRIPLES
  309. */
  310. /** @brief Create a new triple from three terms.
  311. *
  312. * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free().
  313. * To only free the triple, use free().
  314. *
  315. * TODO Term types are not validated at the moment.
  316. *
  317. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  318. *
  319. * @param[in] p Triple predicate. It must be an IRIRef.
  320. *
  321. * @param[in] o Triple object.
  322. *
  323. */
  324. LSUP_Triple *
  325. LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  326. /** @brief Dummy triple with NULL slots. It is not a valid triple.
  327. */
  328. #define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL)
  329. LSUP_Triple *
  330. LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo);
  331. LSUP_BufferTriple *
  332. LSUP_triple_serialize (const LSUP_Triple *spo);
  333. /** @brief Initialize internal term pointers in a heap-allocated triple.
  334. *
  335. * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free().
  336. * To only free the triple, use free().
  337. *
  338. * @param spo[in] Triple pointer to initialize.
  339. */
  340. LSUP_rc
  341. LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  342. /** @brief Free the internal pointers of a triple.
  343. *
  344. * @param spo[in] Triple to be freed.
  345. */
  346. void
  347. LSUP_triple_done (LSUP_Triple *spo);
  348. /** @brief Free a triple and all its internal pointers.
  349. *
  350. * NOTE: If the term pointers are not to be freed (e.g. they are owned by a
  351. * back end), use a simple free(spo) instead of this.
  352. *
  353. * @param spo[in] Triple to be freed.
  354. */
  355. void
  356. LSUP_triple_free (LSUP_Triple *spo);
  357. /** @brief Get triple by term position.
  358. *
  359. * Useful for looping over all terms.
  360. *
  361. * @param trp[in] Triple pointer.
  362. *
  363. * @param n[in] A number between 0÷2.
  364. *
  365. * @return Corresponding triple term or NULL if n is out of range.
  366. */
  367. inline LSUP_Term *
  368. LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n)
  369. {
  370. if (n == TRP_POS_S) return trp->s;
  371. if (n == TRP_POS_P) return trp->p;
  372. if (n == TRP_POS_O) return trp->o;
  373. return NULL;
  374. }
  375. /** @brief Hash a triple.
  376. *
  377. * TODO This doesn't handle blank nodes correctly.
  378. */
  379. inline LSUP_Key
  380. LSUP_triple_hash (const LSUP_Triple *trp)
  381. {
  382. LSUP_BufferTriple *strp = LSUP_triple_serialize (trp);
  383. LSUP_Key hash = LSUP_btriple_hash (strp);
  384. LSUP_btriple_free (strp);
  385. return hash;
  386. }
  387. /** @brief Create a new term set.
  388. *
  389. * @return New empty term set.
  390. */
  391. LSUP_TermSet *
  392. LSUP_term_set_new (void);
  393. /** @brief Free a term set.
  394. *
  395. * @param[in] ts Term set handle.
  396. */
  397. void
  398. LSUP_term_set_free (LSUP_TermSet *ts);
  399. /** @brief Add term to a term set.
  400. *
  401. * If the same term is already in the set, it is not replaced, and the existing
  402. * term's handle is made available in the `existing` variable. In this case,
  403. * the caller may want to free the passed term which has not been added.
  404. *
  405. * @param[in] tl Term set to be added to.
  406. *
  407. * @param[in] term Term to be added to the list. The term set will take
  408. * ownership of the term and free it when it's freed with
  409. * #LSUP_term_set_free()—only if the return code is LSUP_OK.
  410. *
  411. * @param[out] existing If not NULL, and if the term being added is a
  412. * duplicate, this variable will be populated with the existing term handle.
  413. *
  414. * @return LSUP_OK on success; LSUP_NOACTION if the term is duplicate;
  415. * LSUP_MEM_ERR on memory error. Note: if not LSUP_OK, the caller is in charge
  416. * of freeing the `term` handle.
  417. */
  418. LSUP_rc
  419. LSUP_term_set_add (LSUP_TermSet *ts, LSUP_Term *term, LSUP_Term **existing);
  420. /** @brief Get a term from a term set.
  421. *
  422. * @param[in] ts Term set handle.
  423. *
  424. * @param[in] key Key for the queried term.
  425. *
  426. * @return The retrieved term if found, or NULL. The term must not be
  427. * modified or freed.
  428. */
  429. const LSUP_Term *
  430. LSUP_term_set_get (LSUP_TermSet *ts, LSUP_Key key);
  431. /** @brief Iterate trough a term set.
  432. *
  433. * @param[in] ts Term set handle.
  434. *
  435. * @param[in,out] i Iterator to be initially set to 0.
  436. *
  437. * @param[out] term Pointer to be populated with the next term on success. It
  438. * may be NULL.
  439. *
  440. * @return LSUP_OK if the next term was retrieved; LSUP_END if the end of the
  441. * set has been reached.
  442. */
  443. LSUP_rc
  444. LSUP_term_set_next (LSUP_TermSet *ts, size_t *i, LSUP_Term **term);
  445. /** @brief New link map.
  446. *
  447. * The initial state of the returned list is: `{t: [NULL], tl: [NULL]}`
  448. *
  449. * Predicates and term lists can be added with #LSUP_link_map_add, and terms
  450. * can be added to a term list with #LSUP_term_list_add.
  451. *
  452. * @return a new empty predicate-object list.
  453. */
  454. LSUP_LinkMap *
  455. LSUP_link_map_new (LSUP_LinkType type);
  456. /** @brief Free a link map.
  457. *
  458. * All arrays and term handles are recursively freed.
  459. *
  460. * @param[in] pol link map handle obtained with #LSUP_link_map_new().
  461. */
  462. void
  463. LSUP_link_map_free (LSUP_LinkMap *pol);
  464. /// Return the link map type.
  465. LSUP_LinkType
  466. LSUP_link_map_type (const LSUP_LinkMap *map);
  467. /** @brief Add a term - term set pair to a link map.
  468. *
  469. * If there is already a term set for the given term, items from the added term
  470. * are added to the existing term set (if not duplicated). Otherwise, the term
  471. * set handle is linked to the new term.
  472. *
  473. * In any case, the caller should not directly use the term and term set after
  474. * passing them to this function.
  475. *
  476. * @param[in] cm Link map handle obtained with #LSUP_link_map_new().
  477. *
  478. * @param[in] t Term to be associated with the given object list. The
  479. * link map structure takes ownership of the term.
  480. *
  481. * @param[in] ts term set to be associated with the given term. The link
  482. * list structire takes ownership of the term set and the terms in it.
  483. *
  484. * @return LSUP_OK on success; LSUP_MEM_ERR on allocation error.
  485. */
  486. LSUP_rc
  487. LSUP_link_map_add (
  488. LSUP_LinkMap *cmap, LSUP_Term *term, LSUP_TermSet *tset);
  489. /** @brief Create a new iterator to loop through a link map.
  490. *
  491. * @param[in] lmap Map handle to iterate.
  492. *
  493. * @param[in] ext External term to look for connections.
  494. */
  495. LSUP_LinkMapIterator *
  496. LSUP_link_map_iter_new (const LSUP_LinkMap *lmap, LSUP_Term *ext);
  497. /// Free a link map iterator.
  498. void
  499. LSUP_link_map_iter_free (LSUP_LinkMapIterator *it);
  500. /** @brief Iterate through a link map.
  501. *
  502. * Each call to this function yields a linked term and the related term set.
  503. *
  504. * @param[in] it Link map iterator obtained with #LSUP_link_map_iter_new().
  505. *
  506. * @param[out] lt Linked term returned.
  507. *
  508. * @param[out] ts Term set returned.
  509. *
  510. * @return LSUP_OK if a result was yielded; LSUP_END if the end of the link map
  511. * has been reached.
  512. */
  513. LSUP_rc
  514. LSUP_link_map_next (
  515. LSUP_LinkMapIterator *it, LSUP_Term **lt, LSUP_TermSet **ts);
  516. /**@brief Iterate over a link map and generate triples.
  517. *
  518. * Calling this function repeatedly builds triples for all the linked terms and
  519. * term sets in the map, based on a given related term.
  520. *
  521. * @param[in] it Link map iterator handle, obtained with
  522. * #LSUP_link_map_iter_new().
  523. *
  524. * @param[in] term Term to relate to the link map.
  525. *
  526. * @param[in|out] spo Result triple. The triple handle must be pre-allocated
  527. * (it may be TRP_DUMMY) and calls to this function will be set its memebers
  528. * to term handles owned by the link map. If rc != LSUP_OK, the contents are
  529. * undefined.
  530. *
  531. * @return LSUP_OK if a new triple was yielded; LSUP_END if the end of the loop
  532. * has been reached; <0 on error.
  533. */
  534. LSUP_rc
  535. LSUP_link_map_triples (
  536. LSUP_LinkMapIterator *it, LSUP_Triple *spo);
  537. #endif