term.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. #ifndef _LSUP_TERM_H
  2. #define _LSUP_TERM_H
  3. #include <assert.h>
  4. #include "buffer.h"
  5. #include "namespace.h"
  6. /** @defgroup term RDF term and triple module
  7. * @ingroup public
  8. * @{
  9. */
  10. #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
  11. // Some common RDF term values.
  12. #define LSUP_RDF_TYPE "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
  13. #define LSUP_RDF_TYPE_NS "rdf:type"
  14. /// Default data type for untyped literals (prefixed IRI).
  15. #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string"
  16. #define DEFAULT_DTYPE_NS "xsd:string"
  17. /*
  18. * Data types.
  19. */
  20. /// Language tag, currently restricted to 7 characters.
  21. typedef char LSUP_LangTag[8];
  22. /// Term type.
  23. typedef enum {
  24. LSUP_TERM_UNDEFINED = 0,/**<
  25. * Undefined placeholder or result of an error.
  26. * Invalid for most operations.
  27. */
  28. LSUP_TERM_IRIREF, ///< IRI reference.
  29. LSUP_TERM_NS_IRIREF, ///< Namespace-prefixed IRI reference.
  30. LSUP_TERM_LITERAL, ///< Literal without language tag.
  31. LSUP_TERM_LT_LITERAL, ///< Language-tagged string literal.
  32. LSUP_TERM_BNODE, ///< Blank node.
  33. } LSUP_TermType;
  34. /// Opaque IRI information.
  35. typedef struct iri_info_t LSUP_IRIInfo;
  36. /// Opaque iterator for link maps.
  37. typedef struct link_map_iter LSUP_LinkMapIterator;
  38. /// RDF term.
  39. typedef struct term_t {
  40. char * data; // URI, literal value, or BNode label.
  41. union {
  42. struct term_t * datatype; // Data type IRI for LSUP_TERM_LITERAL.
  43. LSUP_LangTag lang; // Lang tag for LSUP_TERM_LT_LITERAL.
  44. LSUP_Key bnode_id; // BNode ID for comparison & skolemization.
  45. LSUP_IRIInfo * iri_info; // IRI information structure.
  46. };
  47. LSUP_TermType type; // Term type.
  48. } LSUP_Term;
  49. /** @brief Shorthand to test if a term is a IRI of any kind.
  50. */
  51. #define LSUP_IS_IRI(term) \
  52. ((term)->type == LSUP_TERM_IRIREF || (term)->type == LSUP_TERM_NS_IRIREF)
  53. /** @brief Shorthand to test if a term is a literal of any kind.
  54. */
  55. #define LSUP_IS_LITERAL(term) \
  56. ((term)->type == LSUP_TERM_LITERAL || (term)->type == LSUP_TERM_LT_LITERAL)
  57. /** @brief Whether the environment is already initialized.
  58. */
  59. #define LSUP_IS_INIT (LSUP_default_datatype != NULL)
  60. /** @brief RDF triple.
  61. *
  62. * This represents a complete RDF statement. Triple terms can be accessed
  63. * directly via the `s`, `p`, `o` members or sequentially via
  64. * #LSUP_triple_pos().
  65. */
  66. typedef struct triple_t {
  67. LSUP_Term *s; ///< Subject.
  68. LSUP_Term *p; ///< Predicate.
  69. LSUP_Term *o; ///< Object.
  70. } LSUP_Triple;
  71. /// Link type.
  72. typedef enum {
  73. LSUP_LINK_INBOUND, ///< Inbound link (sp).
  74. LSUP_LINK_OUTBOUND, ///< Outbound link (po).
  75. LSUP_LINK_EDGE, ///< Edge link (so).
  76. } LSUP_LinkType;
  77. /** @brief The immediate neighborhood of terms connected to a term.
  78. *
  79. * This is a hash map whose each term is related to a set of one or more other
  80. * terms. The hash map is inside an opaque handle and is manipulated via the
  81. * `LSUP_link_map_*` functions.
  82. *
  83. * If the type of the link map is `LSUP_LINK_INBOUND`, the map keys
  84. * represent predicates and the sets related to them are the objects, and the
  85. * term associated to the link map is the object; if
  86. * `LSUP_LINK_OUTBOUND`, the keys represent predicates, the related sets
  87. * objects, and the associated term is the subject. If `LSUP_LINK_EDGE`, the
  88. * keys represent subjects and the related sets objects, and the associated
  89. * term is the predicate.
  90. */
  91. typedef struct link_map LSUP_LinkMap;
  92. /** @brief a set of unique terms.
  93. *
  94. * This is used to bulk-add terms to a link map.
  95. */
  96. typedef struct hashmap LSUP_TermSet;
  97. /*
  98. * External variables.
  99. */
  100. /** @brief Compiled hash of default literal data type.
  101. */
  102. extern uint32_t LSUP_default_dtype_key;
  103. /** @brief Default literal data type URI.
  104. *
  105. * Literal terms created with undefined data type will have it set to this
  106. * URI implicitly.
  107. */
  108. extern LSUP_Term *LSUP_default_datatype;
  109. /** @brief Global term cache.
  110. *
  111. * Stores frequently used terms, e.g. data type URIs.
  112. */
  113. extern LSUP_TermSet *LSUP_term_cache;
  114. /*
  115. * API functions.
  116. */
  117. /** @brief Create a new term.
  118. *
  119. * This is a generic function; it is recommended to use specialized functions
  120. * such as #LSUP_iriref_new(), #LSUP_literal_new(), etc. as they have strict
  121. * type checks for the metadata parameter.
  122. *
  123. * @param[in] type Term type. One of #LSUP_TermType.
  124. *
  125. * @param[in] data Term data: textual URI, literal value without data type
  126. * or langtag, etc. It may be NULL for IRI refs and BNodes, in which case a
  127. * random identifier is generated.
  128. *
  129. * @param[in] metadata Namespace map (LSUP_NSMap *) for IRI refs; language tag
  130. * (LSUP_LangTag *) for language-tagged literals; or data type (LSUP_Term *)
  131. * for other literals. It may be NULL.
  132. *
  133. * @return New term, which must be freed with #LSUP_term_free after use; or
  134. * NULL on error.
  135. */
  136. LSUP_Term *
  137. LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
  138. /** @brief Placeholder term to use with LSUP_term_reset.
  139. */
  140. #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
  141. /** @brief Shortcut to create an IRI reference.
  142. *
  143. * Must be freed with #LSUP_term_free.
  144. *
  145. * @param[in] data The URI string. If NULL, a UUID4-based URN is generated.
  146. * This cannot be NULL if the nsm parameter is not NULL.
  147. *
  148. * @param[in] nsm Namespace map. If not NULL, a namespace-prefixed
  149. * (#LSUP_TERM_NS_IRIREF) is created, otherwise a regular one
  150. * (#LSUP_TERM_IRIREF).
  151. *
  152. * @return same as #LSUP_term_new().
  153. */
  154. inline LSUP_Term *
  155. LSUP_iriref_new (const char *data, LSUP_NSMap *nsm)
  156. {
  157. return (
  158. nsm ? LSUP_term_new (LSUP_TERM_NS_IRIREF, data, nsm) :
  159. LSUP_term_new (LSUP_TERM_IRIREF, data, NULL));
  160. }
  161. /** @brief Create a new absolute IRI from a path relative to a root IRI.
  162. *
  163. * The term is always of type LSUP_TERM_IRIREF (i.e. not namespace-prefixed).
  164. *
  165. * If the provided IRI is already a fully qualified IRI (i.e. it has a prefix)
  166. * the result is semantically identical to the input.
  167. *
  168. * If the relative IRI begins with a '/', the resulting IRI is relative to the
  169. * web root of the root IRI. I.e. if a root IRI has a path after the webroot,
  170. * it is ignored.
  171. *
  172. * Otherwise, the resulting IRI is relative to the full root string.
  173. *
  174. * @param[in] root Root IRI that the new IRI should be relative to.
  175. *
  176. * @param[in] iri Term with an IRI relative to the webroot.
  177. *
  178. * @return New absolute IRI, or NULL if either term is not an IRI.
  179. */
  180. LSUP_Term *
  181. LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri);
  182. /** @brief Create a new relative IRI from an absolute IRI and a web root IRI.
  183. *
  184. * This works with namespace-prefixed IRIs and returns a term of the same type
  185. * as the input.
  186. *
  187. * @param[in] root Root IRI that the new IRI should be relative to.
  188. *
  189. * @param[in] iri Full IRI.
  190. *
  191. * @return New IRI, or NULL if either term is not an IRI. If the input IRI is
  192. * not a path under the root IRI, the result will be identical to the input.
  193. */
  194. LSUP_Term *
  195. LSUP_iriref_relative (const LSUP_Term *root, const LSUP_Term *iri);
  196. /** @brief Shortcut to create a literal term.
  197. *
  198. * Must be freed with #LSUP_term_free.
  199. *
  200. * @param[in] data The literal string.
  201. *
  202. * @param[in] datatype Data type URI string. If NULL, the default data type
  203. * (xsd:string) is used. The new term takes ownership of the pointer.
  204. *
  205. * @return same as #LSUP_term_new().
  206. */
  207. inline LSUP_Term *
  208. LSUP_literal_new (const char *data, LSUP_Term *datatype)
  209. { return LSUP_term_new (LSUP_TERM_LITERAL, data, datatype); }
  210. /** @brief Shortcut to create a language-tagged literal term.
  211. *
  212. * Must be freed with #LSUP_term_free.
  213. *
  214. * @param[in] data The literal string.
  215. *
  216. * @param[in] lang Language tag string.
  217. *
  218. * @return same as #LSUP_term_new().
  219. */
  220. inline LSUP_Term *
  221. LSUP_lt_literal_new (const char *data, char *lang)
  222. { return LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang); }
  223. /** @brief Shortcut to create a blank node.
  224. *
  225. * Must be freed with #LSUP_term_free.
  226. *
  227. * @param[in] data The BNode identifier. It can be NULL, in which case, a
  228. * random identifier is minted.
  229. *
  230. * @return same as #LSUP_term_new().
  231. */
  232. inline LSUP_Term *
  233. LSUP_bnode_new (const char *data)
  234. { return LSUP_term_new (LSUP_TERM_BNODE, data, NULL); }
  235. /** @brief Copy a term.
  236. *
  237. * @param[in] src The term to copy.
  238. *
  239. * @return A new duplicate term handle.
  240. */
  241. LSUP_Term *
  242. LSUP_term_copy (const LSUP_Term *src);
  243. /** @brief Deserialize a buffer into a term.
  244. *
  245. * @param[in] sterm Buffer to convert into a term. It must be a valid
  246. * serialized term from store or obtained with #LSUP_term_serialize().
  247. *
  248. * @return New term handle. It must be freed with #LSUP_term_free().
  249. */
  250. LSUP_Term *
  251. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
  252. /** @brief Serialize a term into a buffer.
  253. *
  254. * @param[in] term Term to convert into a buffer.
  255. *
  256. * @return New buffer handle. It must be freed with #LSUP_buffer_free().
  257. */
  258. LSUP_Buffer *
  259. LSUP_term_serialize (const LSUP_Term *term);
  260. /** @brief Hash a buffer.
  261. */
  262. LSUP_Key
  263. LSUP_term_hash (const LSUP_Term *term);
  264. /** @brief Compare two terms.
  265. *
  266. * The terms evaluate as equal if their hashes are equal—i.e. if they are
  267. * semantically equivalent.
  268. */
  269. inline bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
  270. { return term1 == term2 || LSUP_term_hash (term1) == LSUP_term_hash (term2); }
  271. void
  272. LSUP_term_free (LSUP_Term *term);
  273. /** @brief Namespace map of a IRI ref.
  274. *
  275. * @param[in] iri IRI reference handle.
  276. *
  277. * @return A pointer to the namespace map associated with the IRI. It is
  278. * freed at program shutdown.
  279. */
  280. LSUP_NSMap *
  281. LSUP_iriref_nsm (const LSUP_Term *iri);
  282. /** @brief Get the prefix portion of a IRI ref.
  283. *
  284. * @param[in] iri IRI reference handle.
  285. *
  286. * @return String containing the protocol and domain name part of the IRI. It
  287. * should be freed after use.
  288. */
  289. char *
  290. LSUP_iriref_prefix (const LSUP_Term *iri);
  291. /** @brief Get the path portion of a IRI ref.
  292. *
  293. * @param[in] iri IRI reference handle.
  294. *
  295. * @return String containing the path of the IRI relative to the web root. For
  296. * a URN, such as `urn:myns:myid`, it would be `myns:myid`. This string should
  297. * be freed after use.
  298. */
  299. char *
  300. LSUP_iriref_path (const LSUP_Term *iri);
  301. /** @brief Get the fragment portion of a IRI ref.
  302. *
  303. * @param[in] iri IRI reference handle.
  304. *
  305. * @return String containing the fragment part of the IRI, or NULL if the IRI
  306. * contains no fragment. It should be freed after use.
  307. */
  308. char *
  309. LSUP_iriref_frag (const LSUP_Term *iri);
  310. /*
  311. * TRIPLES
  312. */
  313. /** @brief Create a new triple from three terms.
  314. *
  315. * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free().
  316. * To only free the triple, use free().
  317. *
  318. * TODO Term types are not validated at the moment.
  319. *
  320. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  321. *
  322. * @param[in] p Triple predicate. It must be an IRIRef.
  323. *
  324. * @param[in] o Triple object.
  325. *
  326. */
  327. LSUP_Triple *
  328. LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  329. /** @brief Dummy triple with NULL slots. It is not a valid triple.
  330. */
  331. #define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL)
  332. LSUP_Triple *
  333. LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo);
  334. LSUP_BufferTriple *
  335. LSUP_triple_serialize (const LSUP_Triple *spo);
  336. /** @brief Initialize internal term pointers in a heap-allocated triple.
  337. *
  338. * @sa #LSUP_triple_new()
  339. *
  340. * @param[in] spo Triple pointer to initialize.
  341. *
  342. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  343. *
  344. * @param[in] p Triple predicate. It must be an IRIRef.
  345. *
  346. * @param[in] o Triple object.
  347. */
  348. LSUP_rc
  349. LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  350. /** @brief Free the internal pointers of a triple.
  351. *
  352. * @param[in] spo Triple to be freed.
  353. */
  354. void
  355. LSUP_triple_done (LSUP_Triple *spo);
  356. /** @brief Free a triple and all its internal pointers.
  357. *
  358. * NOTE: If the term pointers are not to be freed (e.g. they are owned by a
  359. * back end), use a simple free(spo) instead of this.
  360. *
  361. * @param[in] spo Triple to be freed.
  362. */
  363. void
  364. LSUP_triple_free (LSUP_Triple *spo);
  365. /** @brief Get triple by term position.
  366. *
  367. * Useful for looping over all terms.
  368. *
  369. * @param[in] trp Triple pointer.
  370. *
  371. * @param[in] n A number between 0÷2.
  372. *
  373. * @return Corresponding triple term or NULL if n is out of range.
  374. */
  375. inline LSUP_Term *
  376. LSUP_triple_pos (const LSUP_Triple *trp, const LSUP_TriplePos n)
  377. {
  378. if (n == TRP_POS_S) return trp->s;
  379. if (n == TRP_POS_P) return trp->p;
  380. if (n == TRP_POS_O) return trp->o;
  381. return NULL;
  382. }
  383. /** @brief Hash a triple.
  384. *
  385. * TODO This doesn't handle blank nodes correctly.
  386. */
  387. inline LSUP_Key
  388. LSUP_triple_hash (const LSUP_Triple *trp)
  389. {
  390. LSUP_BufferTriple *strp = LSUP_triple_serialize (trp);
  391. LSUP_Key hash = LSUP_btriple_hash (strp);
  392. LSUP_btriple_free (strp);
  393. return hash;
  394. }
  395. /** @brief Create a new term set.
  396. *
  397. * @return New empty term set.
  398. */
  399. LSUP_TermSet *
  400. LSUP_term_set_new (void);
  401. /** @brief Free a term set.
  402. *
  403. * @param[in] ts Term set handle.
  404. */
  405. void
  406. LSUP_term_set_free (LSUP_TermSet *ts);
  407. /** @brief Add term to a term set.
  408. *
  409. * If the same term is already in the set, it is not replaced, and the existing
  410. * term's handle is made available in the `existing` variable. In this case,
  411. * the caller may want to free the passed term which has not been added.
  412. *
  413. * @param[in] ts Term set to be added to.
  414. *
  415. * @param[in] term Term to be added to the list. The term set will take
  416. * ownership of the term and free it when it's freed with
  417. * #LSUP_term_set_free()—only if the return code is LSUP_OK.
  418. *
  419. * @param[out] existing If not NULL, and if the term being added is a
  420. * duplicate, this variable will be populated with the existing term handle.
  421. *
  422. * @return LSUP_OK on success; LSUP_NOACTION if the term is duplicate;
  423. * LSUP_MEM_ERR on memory error. Note: if not LSUP_OK, the caller is in charge
  424. * of freeing the `term` handle.
  425. */
  426. LSUP_rc
  427. LSUP_term_set_add (LSUP_TermSet *ts, LSUP_Term *term, LSUP_Term **existing);
  428. /** @brief Get a term from a term set.
  429. *
  430. * @param[in] ts Term set handle.
  431. *
  432. * @param[in] key Key for the queried term.
  433. *
  434. * @return The retrieved term if found, or NULL. The term must not be
  435. * modified or freed.
  436. */
  437. const LSUP_Term *
  438. LSUP_term_set_get (LSUP_TermSet *ts, LSUP_Key key);
  439. /** @brief Iterate trough a term set.
  440. *
  441. * @param[in] ts Term set handle.
  442. *
  443. * @param[in,out] i Iterator to be initially set to 0.
  444. *
  445. * @param[out] term Pointer to be populated with the next term on success. It
  446. * may be NULL.
  447. *
  448. * @return LSUP_OK if the next term was retrieved; LSUP_END if the end of the
  449. * set has been reached.
  450. */
  451. LSUP_rc
  452. LSUP_term_set_next (LSUP_TermSet *ts, size_t *i, LSUP_Term **term);
  453. /** @brief New link map.
  454. *
  455. * The initial state of the returned list is: `{t: [NULL], tl: [NULL]}`
  456. *
  457. * Terms can be added to a term list with #LSUP_term_set_add().
  458. *
  459. * @param[in] type Type of links that the link map shall contain.
  460. * @sa #LSUP_LinkType
  461. *
  462. * @return a new empty predicate-object list.
  463. */
  464. LSUP_LinkMap *
  465. LSUP_link_map_new (LSUP_LinkType type);
  466. /** @brief Free a link map.
  467. *
  468. * All arrays and term handles are recursively freed.
  469. *
  470. * @param[in] pol link map handle obtained with #LSUP_link_map_new().
  471. */
  472. void
  473. LSUP_link_map_free (LSUP_LinkMap *pol);
  474. /** @brief Return the link map type.
  475. *
  476. * @return Link type. @sa #LSUP_LinkType
  477. */
  478. LSUP_LinkType
  479. LSUP_link_map_type (const LSUP_LinkMap *map);
  480. /** @brief Add a term - term set pair to a link map.
  481. *
  482. * If there is already a term set for the given term, items from the added term
  483. * are added to the existing term set (if not duplicated). Otherwise, the term
  484. * set handle is linked to the new term.
  485. *
  486. * In any case, the caller should not directly use the term and term set after
  487. * passing them to this function.
  488. *
  489. * @param[in] cmap Link map handle obtained with #LSUP_link_map_new().
  490. *
  491. * @param[in] term Term to be associated with the given object list. The
  492. * link map structure takes ownership of the term.
  493. *
  494. * @param[in] tset term set to be associated with the given term. The link
  495. * list structire takes ownership of the term set and the terms in it.
  496. *
  497. * @return LSUP_OK on success; LSUP_MEM_ERR on allocation error.
  498. */
  499. LSUP_rc
  500. LSUP_link_map_add (
  501. LSUP_LinkMap *cmap, LSUP_Term *term, LSUP_TermSet *tset);
  502. /** @brief Create a new iterator to loop through a link map.
  503. *
  504. * @param[in] lmap Map handle to iterate.
  505. *
  506. * @param[in] ext External term to look for connections.
  507. */
  508. LSUP_LinkMapIterator *
  509. LSUP_link_map_iter_new (const LSUP_LinkMap *lmap, LSUP_Term *ext);
  510. /// Free a link map iterator.
  511. void
  512. LSUP_link_map_iter_free (LSUP_LinkMapIterator *it);
  513. /** @brief Iterate through a link map.
  514. *
  515. * Each call to this function yields a linked term and the related term set.
  516. *
  517. * @param[in] it Link map iterator obtained with #LSUP_link_map_iter_new().
  518. *
  519. * @param[out] lt Linked term returned.
  520. *
  521. * @param[out] ts Term set returned.
  522. *
  523. * @return LSUP_OK if a result was yielded; LSUP_END if the end of the link map
  524. * has been reached.
  525. */
  526. LSUP_rc
  527. LSUP_link_map_next (
  528. LSUP_LinkMapIterator *it, LSUP_Term **lt, LSUP_TermSet **ts);
  529. /**@brief Iterate over a link map and generate triples.
  530. *
  531. * Calling this function repeatedly builds triples for all the linked terms and
  532. * term sets in the map, based on a given related term.
  533. *
  534. * @param[in] it Link map iterator handle, obtained with
  535. * #LSUP_link_map_iter_new().
  536. *
  537. * @param[in,out] spo Result triple. The triple handle must be pre-allocated
  538. * (it may be TRP_DUMMY) and calls to this function will be set its memebers
  539. * to term handles owned by the link map. If rc != LSUP_OK, the contents are
  540. * undefined.
  541. *
  542. * @return LSUP_OK if a new triple was yielded; LSUP_END if the end of the loop
  543. * has been reached; <0 on error.
  544. */
  545. LSUP_rc
  546. LSUP_link_map_triples (
  547. LSUP_LinkMapIterator *it, LSUP_Triple *spo);
  548. ///@} END defgroup term
  549. #endif