term.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. #ifndef _LSUP_TERM_H
  2. #define _LSUP_TERM_H
  3. #include <assert.h>
  4. #include "buffer.h"
  5. #include "namespace.h"
  6. /** @defgroup term RDF term and triple module
  7. * @ingroup public
  8. * @{
  9. */
  10. #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
  11. // Some common RDF term values.
  12. #define LSUP_RDF_TYPE "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
  13. #define LSUP_RDF_TYPE_NS "rdf:type"
  14. /// Default data type for untyped literals (prefixed IRI).
  15. #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string"
  16. #define DEFAULT_DTYPE_NS "xsd:string"
  17. /*
  18. * Data types.
  19. */
  20. /// Language tag, currently restricted to 7 characters.
  21. typedef char LSUP_LangTag[8];
  22. /// Term type.
  23. typedef enum {
  24. LSUP_TERM_UNDEFINED = 0,///< Undefined placeholder or result of an error.
  25. ///<
  26. ///< Invalid for most operations.
  27. LSUP_TERM_IRIREF, ///< IRI reference.
  28. LSUP_TERM_NS_IRIREF, ///< Namespace-prefixed IRI reference.
  29. LSUP_TERM_LITERAL, ///< Literal without language tag.
  30. LSUP_TERM_LT_LITERAL, ///< Language-tagged string literal.
  31. LSUP_TERM_BNODE, ///< Blank node.
  32. } LSUP_TermType;
  33. /// Opaque IRI information.
  34. typedef struct iri_info_t LSUP_IRIInfo;
  35. /// Opaque iterator for link maps.
  36. typedef struct link_map_iter LSUP_LinkMapIterator;
  37. /// RDF term.
  38. typedef struct term_t {
  39. char * data; ///< URI, literal value, or BNode label.
  40. union {
  41. struct term_t * datatype; ///< Data type IRI for LSUP_TERM_LITERAL.
  42. LSUP_LangTag lang; ///< Lang tag for LSUP_TERM_LT_LITERAL.
  43. LSUP_Key bnode_id; ///< BNode ID for comparison & skolemization.
  44. LSUP_IRIInfo * iri_info; ///< IRI information structure.
  45. };
  46. LSUP_TermType type; ///< Term type.
  47. } LSUP_Term;
  48. /** @brief Shorthand to test if a term is a IRI of any kind.
  49. */
  50. #define LSUP_IS_IRI(term) \
  51. ((term)->type == LSUP_TERM_IRIREF || (term)->type == LSUP_TERM_NS_IRIREF)
  52. /** @brief Shorthand to test if a term is a literal of any kind.
  53. */
  54. #define LSUP_IS_LITERAL(term) \
  55. ((term)->type == LSUP_TERM_LITERAL || (term)->type == LSUP_TERM_LT_LITERAL)
  56. /** @brief Whether the environment is already initialized.
  57. */
  58. #define LSUP_IS_INIT (LSUP_default_datatype != NULL)
  59. /** @brief RDF triple.
  60. *
  61. * This represents a complete RDF statement. Triple terms can be accessed
  62. * directly via the `s`, `p`, `o` members or sequentially via
  63. * #LSUP_triple_pos().
  64. */
  65. typedef struct triple_t {
  66. LSUP_Term *s; ///< Subject.
  67. LSUP_Term *p; ///< Predicate.
  68. LSUP_Term *o; ///< Object.
  69. } LSUP_Triple;
  70. /// Link type.
  71. typedef enum {
  72. LSUP_LINK_INBOUND, ///< Inbound link (sp).
  73. LSUP_LINK_OUTBOUND, ///< Outbound link (po).
  74. LSUP_LINK_EDGE, ///< Edge link (so).
  75. } LSUP_LinkType;
  76. /** @brief The immediate neighborhood of terms connected to a term.
  77. *
  78. * This is a hash map whose each term is related to a set of one or more other
  79. * terms. The hash map is inside an opaque handle and is manipulated via the
  80. * `LSUP_link_map_*` functions.
  81. *
  82. * If the type of the link map is `LSUP_LINK_INBOUND`, the map keys
  83. * represent predicates and the sets related to them are the objects, and the
  84. * term associated to the link map is the object.
  85. *
  86. * If the type is `LSUP_LINK_OUTBOUND`, the keys represent predicates, the
  87. * related sets objects, and the associated term is the subject.
  88. *
  89. * If the type is `LSUP_LINK_EDGE`, the keys represent subjects and the related
  90. * sets objects, and the associated term is the predicate.
  91. */
  92. typedef struct link_map LSUP_LinkMap;
  93. /** @brief a set of unique terms.
  94. *
  95. * This is used to bulk-add terms to a link map.
  96. */
  97. typedef struct hashmap LSUP_TermSet;
  98. /*
  99. * External variables.
  100. */
  101. /** @brief Compiled hash of default literal data type.
  102. */
  103. extern uint32_t LSUP_default_dtype_key;
  104. /** @brief Default literal data type URI.
  105. *
  106. * Literal terms created with undefined data type will have it set to this
  107. * URI implicitly.
  108. */
  109. extern LSUP_Term *LSUP_default_datatype;
  110. /** @brief Global term cache.
  111. *
  112. * Stores frequently used terms, e.g. data type URIs.
  113. */
  114. extern LSUP_TermSet *LSUP_term_cache;
  115. /*
  116. * API functions.
  117. */
  118. /** @brief Create a new term.
  119. *
  120. * This is a generic function; it is recommended to use specialized functions
  121. * such as #LSUP_iriref_new(), #LSUP_literal_new(), etc. as they have strict
  122. * type checks for the metadata parameter.
  123. *
  124. * @param[in] type Term type. One of #LSUP_TermType.
  125. *
  126. * @param[in] data Term data: textual URI, literal value without data type
  127. * or langtag, etc. It may be NULL for IRI refs and BNodes, in which case a
  128. * random identifier is generated.
  129. *
  130. * @param[in] metadata Namespace map (LSUP_NSMap *) for IRI refs; language tag
  131. * (LSUP_LangTag *) for language-tagged literals; or data type (LSUP_Term *)
  132. * for other literals. It may be NULL.
  133. *
  134. * @return New term, which must be freed with #LSUP_term_free after use; or
  135. * NULL on error.
  136. */
  137. LSUP_Term *
  138. LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
  139. /** @brief Placeholder term to use with LSUP_term_reset.
  140. */
  141. #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
  142. /** @brief Shortcut to create an IRI reference.
  143. *
  144. * Must be freed with #LSUP_term_free.
  145. *
  146. * @param[in] data The URI string. If NULL, a UUID4-based URN is generated.
  147. * This cannot be NULL if the nsm parameter is not NULL.
  148. *
  149. * @param[in] nsm Namespace map. If not NULL, a namespace-prefixed
  150. * (#LSUP_TERM_NS_IRIREF) is created, otherwise a regular one
  151. * (#LSUP_TERM_IRIREF).
  152. *
  153. * @return same as #LSUP_term_new().
  154. */
  155. inline LSUP_Term *
  156. LSUP_iriref_new (const char *data, LSUP_NSMap *nsm)
  157. {
  158. return (
  159. nsm ? LSUP_term_new (LSUP_TERM_NS_IRIREF, data, nsm) :
  160. LSUP_term_new (LSUP_TERM_IRIREF, data, NULL));
  161. }
  162. /** @brief Create a new absolute IRI from a path relative to a root IRI.
  163. *
  164. * The term is always of type LSUP_TERM_IRIREF (i.e. not namespace-prefixed).
  165. *
  166. * If the provided IRI is already a fully qualified IRI (i.e. it has a prefix)
  167. * the result is semantically identical to the input.
  168. *
  169. * If the relative IRI begins with a '/', the resulting IRI is relative to the
  170. * web root of the root IRI. I.e. if a root IRI has a path after the webroot,
  171. * it is ignored.
  172. *
  173. * Otherwise, the resulting IRI is relative to the full root string.
  174. *
  175. * @param[in] root Root IRI that the new IRI should be relative to.
  176. *
  177. * @param[in] iri Term with an IRI relative to the webroot.
  178. *
  179. * @return New absolute IRI, or NULL if either term is not an IRI.
  180. */
  181. LSUP_Term *
  182. LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri);
  183. /** @brief Create a new relative IRI from an absolute IRI and a web root IRI.
  184. *
  185. * This works with namespace-prefixed IRIs and returns a term of the same type
  186. * as the input.
  187. *
  188. * @param[in] root Root IRI that the new IRI should be relative to.
  189. *
  190. * @param[in] iri Full IRI.
  191. *
  192. * @return New IRI, or NULL if either term is not an IRI. If the input IRI is
  193. * not a path under the root IRI, the result will be identical to the input.
  194. */
  195. LSUP_Term *
  196. LSUP_iriref_relative (const LSUP_Term *root, const LSUP_Term *iri);
  197. /** @brief Shortcut to create a literal term.
  198. *
  199. * Must be freed with #LSUP_term_free.
  200. *
  201. * @param[in] data The literal string.
  202. *
  203. * @param[in] datatype Data type URI string. If NULL, the default data type
  204. * (xsd:string) is used. The new term takes ownership of the pointer.
  205. *
  206. * @return same as #LSUP_term_new().
  207. */
  208. inline LSUP_Term *
  209. LSUP_literal_new (const char *data, LSUP_Term *datatype)
  210. { return LSUP_term_new (LSUP_TERM_LITERAL, data, datatype); }
  211. /** @brief Shortcut to create a language-tagged literal term.
  212. *
  213. * Must be freed with #LSUP_term_free.
  214. *
  215. * @param[in] data The literal string.
  216. *
  217. * @param[in] lang Language tag string.
  218. *
  219. * @return same as #LSUP_term_new().
  220. */
  221. inline LSUP_Term *
  222. LSUP_lt_literal_new (const char *data, char *lang)
  223. { return LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang); }
  224. /** @brief Shortcut to create a blank node.
  225. *
  226. * Must be freed with #LSUP_term_free.
  227. *
  228. * @param[in] data The BNode identifier. It can be NULL, in which case, a
  229. * random identifier is minted.
  230. *
  231. * @return same as #LSUP_term_new().
  232. */
  233. inline LSUP_Term *
  234. LSUP_bnode_new (const char *data)
  235. { return LSUP_term_new (LSUP_TERM_BNODE, data, NULL); }
  236. /** @brief Copy a term.
  237. *
  238. * @param[in] src The term to copy.
  239. *
  240. * @return A new duplicate term handle.
  241. */
  242. LSUP_Term *
  243. LSUP_term_copy (const LSUP_Term *src);
  244. /** @brief Deserialize a buffer into a term.
  245. *
  246. * @param[in] sterm Buffer to convert into a term. It must be a valid
  247. * serialized term from store or obtained with #LSUP_term_serialize().
  248. *
  249. * @return New term handle. It must be freed with #LSUP_term_free().
  250. */
  251. LSUP_Term *
  252. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
  253. /** @brief Serialize a term into a buffer.
  254. *
  255. * @param[in] term Term to convert into a buffer.
  256. *
  257. * @return New buffer handle. It must be freed with #LSUP_buffer_free().
  258. */
  259. LSUP_Buffer *
  260. LSUP_term_serialize (const LSUP_Term *term);
  261. /** @brief Hash a buffer.
  262. */
  263. LSUP_Key
  264. LSUP_term_hash (const LSUP_Term *term);
  265. /** @brief Compare two terms.
  266. *
  267. * The terms evaluate as equal if their hashes are equal—i.e. if they are
  268. * semantically equivalent.
  269. */
  270. inline bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
  271. { return term1 == term2 || LSUP_term_hash (term1) == LSUP_term_hash (term2); }
  272. void
  273. LSUP_term_free (LSUP_Term *term);
  274. /** @brief Namespace map of a IRI ref.
  275. *
  276. * @param[in] iri IRI reference handle.
  277. *
  278. * @return A pointer to the namespace map associated with the IRI. It is
  279. * freed at program shutdown.
  280. */
  281. LSUP_NSMap *
  282. LSUP_iriref_nsm (const LSUP_Term *iri);
  283. /** @brief Get the prefix portion of a IRI ref.
  284. *
  285. * @param[in] iri IRI reference handle.
  286. *
  287. * @return String containing the protocol and domain name part of the IRI. It
  288. * should be freed after use.
  289. */
  290. char *
  291. LSUP_iriref_prefix (const LSUP_Term *iri);
  292. /** @brief Get the path portion of a IRI ref.
  293. *
  294. * @param[in] iri IRI reference handle.
  295. *
  296. * @return String containing the path of the IRI relative to the web root. For
  297. * a URN, such as `urn:myns:myid`, it would be `myns:myid`. This string should
  298. * be freed after use.
  299. */
  300. char *
  301. LSUP_iriref_path (const LSUP_Term *iri);
  302. /** @brief Get the fragment portion of a IRI ref.
  303. *
  304. * @param[in] iri IRI reference handle.
  305. *
  306. * @return String containing the fragment part of the IRI, or NULL if the IRI
  307. * contains no fragment. It should be freed after use.
  308. */
  309. char *
  310. LSUP_iriref_frag (const LSUP_Term *iri);
  311. /*
  312. * TRIPLES
  313. */
  314. /** @brief Create a new triple from three terms.
  315. *
  316. * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free().
  317. * To only free the triple, use free().
  318. *
  319. * TODO Term types are not validated at the moment.
  320. *
  321. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  322. *
  323. * @param[in] p Triple predicate. It must be an IRIRef.
  324. *
  325. * @param[in] o Triple object.
  326. *
  327. */
  328. LSUP_Triple *
  329. LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  330. /** @brief Dummy triple with NULL slots. It is not a valid triple.
  331. */
  332. #define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL)
  333. LSUP_Triple *
  334. LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo);
  335. LSUP_BufferTriple *
  336. LSUP_triple_serialize (const LSUP_Triple *spo);
  337. /** @brief Initialize internal term pointers in a heap-allocated triple.
  338. *
  339. * @sa #LSUP_triple_new()
  340. *
  341. * @param[in] spo Triple pointer to initialize.
  342. *
  343. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  344. *
  345. * @param[in] p Triple predicate. It must be an IRIRef.
  346. *
  347. * @param[in] o Triple object.
  348. */
  349. LSUP_rc
  350. LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  351. /** @brief Free the internal pointers of a triple.
  352. *
  353. * @param[in] spo Triple to be freed.
  354. */
  355. void
  356. LSUP_triple_done (LSUP_Triple *spo);
  357. /** @brief Free a triple and all its internal pointers.
  358. *
  359. * NOTE: If the term pointers are not to be freed (e.g. they are owned by a
  360. * back end), use a simple free(spo) instead of this.
  361. *
  362. * @param[in] spo Triple to be freed.
  363. */
  364. void
  365. LSUP_triple_free (LSUP_Triple *spo);
  366. /** @brief Get triple by term position.
  367. *
  368. * Useful for looping over all terms.
  369. *
  370. * @param[in] trp Triple pointer.
  371. *
  372. * @param[in] n A number between 0÷2.
  373. *
  374. * @return Corresponding triple term or NULL if n is out of range.
  375. */
  376. inline LSUP_Term *
  377. LSUP_triple_pos (const LSUP_Triple *trp, const LSUP_TriplePos n)
  378. {
  379. if (n == TRP_POS_S) return trp->s;
  380. if (n == TRP_POS_P) return trp->p;
  381. if (n == TRP_POS_O) return trp->o;
  382. return NULL;
  383. }
  384. /** @brief Hash a triple.
  385. *
  386. * TODO This doesn't handle blank nodes correctly.
  387. */
  388. inline LSUP_Key
  389. LSUP_triple_hash (const LSUP_Triple *trp)
  390. {
  391. LSUP_BufferTriple *strp = LSUP_triple_serialize (trp);
  392. LSUP_Key hash = LSUP_btriple_hash (strp);
  393. LSUP_btriple_free (strp);
  394. return hash;
  395. }
  396. /** @brief Create a new term set.
  397. *
  398. * @return New empty term set.
  399. */
  400. LSUP_TermSet *
  401. LSUP_term_set_new (void);
  402. /** @brief Free a term set.
  403. *
  404. * @param[in] ts Term set handle.
  405. */
  406. void
  407. LSUP_term_set_free (LSUP_TermSet *ts);
  408. /** @brief Add term to a term set.
  409. *
  410. * If the same term is already in the set, it is not replaced, and the existing
  411. * term's handle is made available in the `existing` variable. In this case,
  412. * the caller may want to free the passed term which has not been added.
  413. *
  414. * @param[in] ts Term set to be added to.
  415. *
  416. * @param[in] term Term to be added to the list. The term set will take
  417. * ownership of the term and free it when it's freed with
  418. * #LSUP_term_set_free()—only if the return code is LSUP_OK.
  419. *
  420. * @param[out] existing If not NULL, and if the term being added is a
  421. * duplicate, this variable will be populated with the existing term handle.
  422. *
  423. * @return LSUP_OK on success; LSUP_NOACTION if the term is duplicate;
  424. * LSUP_MEM_ERR on memory error. Note: if not LSUP_OK, the caller is in charge
  425. * of freeing the `term` handle.
  426. */
  427. LSUP_rc
  428. LSUP_term_set_add (LSUP_TermSet *ts, LSUP_Term *term, LSUP_Term **existing);
  429. /** @brief Get a term from a term set.
  430. *
  431. * @param[in] ts Term set handle.
  432. *
  433. * @param[in] key Key for the queried term.
  434. *
  435. * @return The retrieved term if found, or NULL. The term must not be
  436. * modified or freed.
  437. */
  438. const LSUP_Term *
  439. LSUP_term_set_get (LSUP_TermSet *ts, LSUP_Key key);
  440. /** @brief Iterate trough a term set.
  441. *
  442. * @param[in] ts Term set handle.
  443. *
  444. * @param[in,out] i Iterator to be initially set to 0.
  445. *
  446. * @param[out] term Pointer to be populated with the next term on success. It
  447. * may be NULL.
  448. *
  449. * @return LSUP_OK if the next term was retrieved; LSUP_END if the end of the
  450. * set has been reached.
  451. */
  452. LSUP_rc
  453. LSUP_term_set_next (LSUP_TermSet *ts, size_t *i, LSUP_Term **term);
  454. /** @brief New link map.
  455. *
  456. * Terms can be added to a link map with #LSUP_term_set_add().
  457. *
  458. * @param[in] type Type of links that the link map shall contain.
  459. * @sa #LSUP_LinkType
  460. *
  461. * @return a new empty link map.
  462. */
  463. LSUP_LinkMap *
  464. LSUP_link_map_new (LSUP_LinkType type);
  465. /** @brief Free a link map.
  466. *
  467. * All arrays and term handles are recursively freed.
  468. *
  469. * @param[in] lm link map handle obtained with #LSUP_link_map_new().
  470. */
  471. void
  472. LSUP_link_map_free (LSUP_LinkMap *lm);
  473. /** @brief Return the link map type.
  474. *
  475. * @return Link type. @sa #LSUP_LinkType
  476. */
  477. LSUP_LinkType
  478. LSUP_link_map_type (const LSUP_LinkMap *map);
  479. /** @brief Add a term - term set pair to a link map.
  480. *
  481. * If there is already a term set for the given term, items from the added term
  482. * are added to the existing term set (if not duplicated). Otherwise, the term
  483. * set handle is linked to the new term.
  484. *
  485. * In any case, the caller should not directly use the term and term set after
  486. * passing them to this function.
  487. *
  488. * @param[in] lmap Link map handle obtained with #LSUP_link_map_new().
  489. *
  490. * @param[in] term Term to be associated with the given object list. The
  491. * link map structure takes ownership of the term.
  492. *
  493. * @param[in] tset term set to be associated with the given term. The link
  494. * list structire takes ownership of the term set and the terms in it.
  495. *
  496. * @return LSUP_OK on success; LSUP_MEM_ERR on allocation error.
  497. */
  498. LSUP_rc
  499. LSUP_link_map_add (
  500. LSUP_LinkMap *lmap, LSUP_Term *term, LSUP_TermSet *tset);
  501. /** @brief Create a new iterator to loop through a link map.
  502. *
  503. * @param[in] lmap Map handle to iterate.
  504. */
  505. LSUP_LinkMapIterator *
  506. LSUP_link_map_iter_new (const LSUP_LinkMap *lmap, LSUP_Term *ext);
  507. /** @brief Iterate through a link map.
  508. *
  509. * Each call to this function yields a linked term and the related term set.
  510. *
  511. * @param[in] it Link map iterator obtained with #LSUP_link_map_iter_new().
  512. *
  513. * @param[out] lt Linked term returned.
  514. *
  515. * @param[out] ts Term set returned.
  516. *
  517. * @return LSUP_OK if a result was yielded; LSUP_END if the end of the link map
  518. * has been reached.
  519. */
  520. LSUP_rc
  521. LSUP_link_map_next (
  522. LSUP_LinkMapIterator *it, LSUP_Term **lt, LSUP_TermSet **ts);
  523. /// Free a link map iterator.
  524. void
  525. LSUP_link_map_iter_free (LSUP_LinkMapIterator *it);
  526. /**@brief Iterate over a link map and generate triples.
  527. *
  528. * Calling this function repeatedly builds triples for all the linked terms and
  529. * term sets in the map, based on a given related term.
  530. *
  531. * @param[in] it Link map iterator handle, obtained with
  532. * #LSUP_link_map_iter_new().
  533. *
  534. * @param[in,out] spo Result triple. The triple handle must be pre-allocated
  535. * (it may be TRP_DUMMY) and calls to this function will be set its memebers
  536. * to term handles owned by the link map. If rc != LSUP_OK, the contents are
  537. * undefined.
  538. *
  539. * @return LSUP_OK if a new triple was yielded; LSUP_END if the end of the loop
  540. * has been reached; <0 on error.
  541. */
  542. LSUP_rc
  543. LSUP_link_map_triples (LSUP_LinkMapIterator *it, LSUP_Triple *spo);
  544. ///@} END defgroup term
  545. #endif