term.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. #ifndef _LSUP_TERM_H
  2. #define _LSUP_TERM_H
  3. //#include <assert.h>
  4. #include "lsup/buffer.h"
  5. #include "lsup/namespace.h"
  6. /** @defgroup term RDF term and triple module
  7. * @ingroup public
  8. * @{
  9. */
  10. #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
  11. // Some common RDF term values.
  12. #define LSUP_RDF_TYPE "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
  13. #define LSUP_RDF_TYPE_NS "rdf:type"
  14. /// Default data type for untyped literals (prefixed IRI).
  15. #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string"
  16. #define DEFAULT_DTYPE_NS "xsd:string"
  17. /*
  18. * Data types.
  19. */
  20. /// Language tag, currently restricted to 7 characters.
  21. typedef char LSUP_LangTag[8];
  22. /// Term type.
  23. typedef enum {
  24. LSUP_TERM_UNDEFINED = 0,///< Undefined placeholder or result of an error.
  25. ///<
  26. ///< Invalid for most operations.
  27. LSUP_TERM_IRIREF, ///< IRI reference.
  28. LSUP_TERM_LITERAL, ///< Literal without language tag.
  29. LSUP_TERM_LT_LITERAL, ///< Language-tagged string literal.
  30. LSUP_TERM_BNODE, ///< Blank node.
  31. } LSUP_TermType;
  32. /// Opaque IRI information.
  33. typedef struct iri_info_t LSUP_IRIInfo;
  34. /// Opaque iterator for link maps.
  35. typedef struct link_map_iter LSUP_LinkMapIterator;
  36. /// RDF term.
  37. typedef struct term_t {
  38. char * data; ///< URI, literal value, or BNode label.
  39. union {
  40. struct term_t * datatype; ///< Data type IRI for LSUP_TERM_LITERAL.
  41. LSUP_LangTag lang; ///< Lang tag for LSUP_TERM_LT_LITERAL.
  42. LSUP_Key bnode_id; ///< BNode ID for comparison & skolemization.
  43. LSUP_IRIInfo * iri_info; ///< IRI information structure.
  44. };
  45. LSUP_TermType type; ///< Term type.
  46. } LSUP_Term;
  47. /** @brief Shorthand to test if a term is a literal of any kind.
  48. */
  49. #define LSUP_IS_LITERAL(term) \
  50. ((term)->type == LSUP_TERM_LITERAL || (term)->type == LSUP_TERM_LT_LITERAL)
  51. /** @brief Whether the environment is already initialized.
  52. */
  53. #define LSUP_IS_INIT (LSUP_default_datatype != NULL)
  54. /** @brief RDF triple.
  55. *
  56. * This represents a complete RDF statement. Triple terms can be accessed
  57. * directly via the `s`, `p`, `o` members or sequentially via
  58. * #LSUP_triple_pos().
  59. */
  60. typedef struct triple_t {
  61. LSUP_Term *s; ///< Subject.
  62. LSUP_Term *p; ///< Predicate.
  63. LSUP_Term *o; ///< Object.
  64. } LSUP_Triple;
  65. /// Link type.
  66. typedef enum {
  67. LSUP_LINK_INBOUND, ///< Inbound link (sp).
  68. LSUP_LINK_OUTBOUND, ///< Outbound link (po).
  69. LSUP_LINK_EDGE, ///< Edge link (so).
  70. } LSUP_LinkType;
  71. /** @brief The immediate neighborhood of terms connected to a term.
  72. *
  73. * This is a hash map whose each term is related to a set of one or more other
  74. * terms. The hash map is inside an opaque handle and is manipulated via the
  75. * `LSUP_link_map_*` functions.
  76. *
  77. * If the type of the link map is `LSUP_LINK_INBOUND`, the map keys
  78. * represent predicates and the sets related to them are the objects, and the
  79. * term associated to the link map is the object.
  80. *
  81. * If the type is `LSUP_LINK_OUTBOUND`, the keys represent predicates, the
  82. * related sets objects, and the associated term is the subject.
  83. *
  84. * If the type is `LSUP_LINK_EDGE`, the keys represent subjects and the related
  85. * sets objects, and the associated term is the predicate.
  86. */
  87. typedef struct link_map LSUP_LinkMap;
  88. /** @brief a set of unique terms.
  89. *
  90. * This is used to bulk-add terms to a link map.
  91. */
  92. typedef struct hashmap LSUP_TermSet;
  93. /*
  94. * External variables.
  95. */
  96. /** @brief Compiled hash of default literal data type.
  97. */
  98. extern uint32_t LSUP_default_dtype_key;
  99. /** @brief Default literal data type URI.
  100. *
  101. * Literal terms created with undefined data type will have it set to this
  102. * URI implicitly.
  103. */
  104. extern LSUP_Term *LSUP_default_datatype;
  105. /** @brief Global term cache.
  106. *
  107. * Stores frequently used terms, e.g. data type URIs.
  108. */
  109. extern LSUP_TermSet *LSUP_term_cache;
  110. /*
  111. * API functions.
  112. */
  113. /** @brief Create a new term.
  114. *
  115. * This is a generic function; it is recommended to use specialized functions
  116. * such as #LSUP_iriref_new(), #LSUP_literal_new(), etc. as they have strict
  117. * type checks for the metadata parameter.
  118. *
  119. * @param[in] type Term type. One of #LSUP_TermType.
  120. *
  121. * @param[in] data Term data: textual URI, literal value without data type
  122. * or langtag, etc. It may be NULL for IRI refs and BNodes, in which case a
  123. * random identifier is generated.
  124. *
  125. * @param[in] metadata language tag (LSUP_LangTag *) for language-tagged
  126. * literals; or data type (LSUP_Term *) for other literals. It may be NULL.
  127. *
  128. * @return New term, which must be freed with #LSUP_term_free after use; or
  129. * NULL on error.
  130. */
  131. LSUP_Term *
  132. LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
  133. /** @brief Placeholder term to use with LSUP_term_reset.
  134. */
  135. #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
  136. /** @brief Create an IRI reference.
  137. *
  138. * Must be freed with #LSUP_term_free.
  139. *
  140. * @param[in] data The fully qualified URI. If NULL, a UUID4 URN is generated.
  141. *
  142. * @return same as #LSUP_term_new().
  143. */
  144. inline LSUP_Term *
  145. LSUP_iriref_new (const char *data)
  146. { return LSUP_term_new (LSUP_TERM_IRIREF, data, NULL); }
  147. /** @brief Create an IRI reference from a namespace-prefixed string.
  148. *
  149. * Must be freed with #LSUP_term_free.
  150. *
  151. * @param[in] data Namespace-prefixed URI. It MUST NOT be NULL,
  152. *
  153. * @return same as #LSUP_term_new().
  154. */
  155. inline LSUP_Term *
  156. LSUP_iriref_new_ns (const char *data)
  157. {
  158. char *fquri;
  159. RCNL (LSUP_nsmap_normalize_uri (data, &fquri));
  160. LSUP_Term *t = LSUP_term_new (LSUP_TERM_IRIREF, fquri, NULL);
  161. free (fquri);
  162. return t;
  163. }
  164. /** @brief Create a new absolute IRI from a path relative to a root IRI.
  165. *
  166. * If the provided IRI is already an absolute IRI, the result is semantically
  167. * identical to the input.
  168. *
  169. * If the relative IRI begins with a '/', the resulting IRI is relative to the
  170. * web root of the root IRI. I.e. if a root IRI has a path after the webroot,
  171. * it is ignored.
  172. *
  173. * Otherwise, the resulting IRI is relative to the full root string.
  174. *
  175. * @param[in] root Root IRI that the new IRI should be relative to.
  176. *
  177. * @param[in] iri Term with an IRI relative to the webroot.
  178. *
  179. * @return New absolute IRI, or NULL if either term is not an IRI.
  180. */
  181. LSUP_Term *
  182. LSUP_iriref_new_abs (const LSUP_Term *root, const LSUP_Term *iri);
  183. /** @brief Create a new relative IRI from an absolute IRI and a web root IRI.
  184. *
  185. * This works with namespace-prefixed IRIs and returns a term of the same type
  186. * as the input.
  187. *
  188. * @param[in] root Root IRI that the new IRI should be relative to.
  189. *
  190. * @param[in] iri Full IRI.
  191. *
  192. * @return New IRI, or NULL if either term is not an IRI. If the input IRI is
  193. * not a path under the root IRI, the result will be identical to the input.
  194. */
  195. LSUP_Term *
  196. LSUP_iriref_new_rel (const LSUP_Term *root, const LSUP_Term *iri);
  197. /** @brief Shortcut to create a literal term.
  198. *
  199. * Must be freed with #LSUP_term_free.
  200. *
  201. * @param[in] data The literal string.
  202. *
  203. * @param[in] datatype Data type URI string. If NULL, the default data type
  204. * (xsd:string) is used. The new term takes ownership of the pointer.
  205. *
  206. * @return same as #LSUP_term_new().
  207. */
  208. inline LSUP_Term *
  209. LSUP_literal_new (const char *data, LSUP_Term *datatype)
  210. { return LSUP_term_new (LSUP_TERM_LITERAL, data, datatype); }
  211. /** @brief Shortcut to create a language-tagged literal term.
  212. *
  213. * Must be freed with #LSUP_term_free.
  214. *
  215. * @param[in] data The literal string.
  216. *
  217. * @param[in] lang Language tag string.
  218. *
  219. * @return same as #LSUP_term_new().
  220. */
  221. inline LSUP_Term *
  222. LSUP_lt_literal_new (const char *data, char *lang)
  223. { return LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang); }
  224. /** @brief Shortcut to create a blank node.
  225. *
  226. * Must be freed with #LSUP_term_free.
  227. *
  228. * @param[in] data The BNode identifier. It can be NULL, in which case, a
  229. * random identifier is minted.
  230. *
  231. * @return same as #LSUP_term_new().
  232. */
  233. inline LSUP_Term *
  234. LSUP_bnode_new (const char *data)
  235. { return LSUP_term_new (LSUP_TERM_BNODE, data, NULL); }
  236. /** @brief Copy a term.
  237. *
  238. * @param[in] src The term to copy.
  239. *
  240. * @return A new duplicate term handle.
  241. */
  242. LSUP_Term *
  243. LSUP_term_copy (const LSUP_Term *src);
  244. /** @brief Deserialize a buffer into a term.
  245. *
  246. * @param[in] sterm Buffer to convert into a term. It must be a valid
  247. * serialized term from store or obtained with #LSUP_term_serialize().
  248. *
  249. * @return New term handle. It must be freed with #LSUP_term_free().
  250. */
  251. LSUP_Term *
  252. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
  253. /** @brief Serialize a term into a buffer.
  254. *
  255. * @param[in] term Term to convert into a buffer.
  256. *
  257. * @return New buffer handle. It must be freed with #LSUP_buffer_free().
  258. */
  259. LSUP_Buffer *
  260. LSUP_term_serialize (const LSUP_Term *term);
  261. /** @brief Hash a buffer.
  262. */
  263. LSUP_Key
  264. LSUP_term_hash (const LSUP_Term *term);
  265. /** @brief Compare two terms.
  266. *
  267. * The terms evaluate as equal if their hashes are equal—i.e. if they are
  268. * semantically equivalent.
  269. */
  270. inline bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
  271. { return term1 == term2 || LSUP_term_hash (term1) == LSUP_term_hash (term2); }
  272. void
  273. LSUP_term_free (LSUP_Term *term);
  274. /** @brief Get the prefix portion of a IRI ref.
  275. *
  276. * @param[in] iri IRI reference handle.
  277. *
  278. * @return String containing the protocol and domain name part of the IRI. It
  279. * should be freed after use.
  280. */
  281. char *
  282. LSUP_iriref_prefix (const LSUP_Term *iri);
  283. /** @brief Get the path portion of a IRI ref.
  284. *
  285. * @param[in] iri IRI reference handle.
  286. *
  287. * @return String containing the path of the IRI relative to the web root. For
  288. * a URN, such as `urn:myns:myid`, it would be `myns:myid`. This string should
  289. * be freed after use.
  290. */
  291. char *
  292. LSUP_iriref_path (const LSUP_Term *iri);
  293. /** @brief Get the fragment portion of a IRI ref.
  294. *
  295. * @param[in] iri IRI reference handle.
  296. *
  297. * @return String containing the fragment part of the IRI, or NULL if the IRI
  298. * contains no fragment. It should be freed after use.
  299. */
  300. char *
  301. LSUP_iriref_frag (const LSUP_Term *iri);
  302. /*
  303. * TRIPLES
  304. */
  305. /** @brief Create a new triple from three terms.
  306. *
  307. * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free().
  308. * To only free the triple, use free().
  309. *
  310. * TODO Term types are not validated at the moment.
  311. *
  312. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  313. *
  314. * @param[in] p Triple predicate. It must be an IRIRef.
  315. *
  316. * @param[in] o Triple object.
  317. *
  318. */
  319. LSUP_Triple *
  320. LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  321. /** @brief Dummy triple with NULL slots. It is not a valid triple.
  322. */
  323. #define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL)
  324. LSUP_Triple *
  325. LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo);
  326. LSUP_BufferTriple *
  327. LSUP_triple_serialize (const LSUP_Triple *spo);
  328. /** @brief Initialize internal term pointers in a heap-allocated triple.
  329. *
  330. * @sa #LSUP_triple_new()
  331. *
  332. * @param[in] spo Triple pointer to initialize.
  333. *
  334. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  335. *
  336. * @param[in] p Triple predicate. It must be an IRIRef.
  337. *
  338. * @param[in] o Triple object.
  339. */
  340. LSUP_rc
  341. LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  342. /** @brief Free the internal pointers of a triple.
  343. *
  344. * @param[in] spo Triple to be freed.
  345. */
  346. void
  347. LSUP_triple_done (LSUP_Triple *spo);
  348. /** @brief Free a triple and all its internal pointers.
  349. *
  350. * NOTE: If the term pointers are not to be freed (e.g. they are owned by a
  351. * back end), use a simple free(spo) instead of this.
  352. *
  353. * @param[in] spo Triple to be freed.
  354. */
  355. void
  356. LSUP_triple_free (LSUP_Triple *spo);
  357. /** @brief Get triple by term position.
  358. *
  359. * Useful for looping over all terms.
  360. *
  361. * @param[in] trp Triple pointer.
  362. *
  363. * @param[in] n A number between 0÷2.
  364. *
  365. * @return Corresponding triple term or NULL if n is out of range.
  366. */
  367. inline LSUP_Term *
  368. LSUP_triple_pos (const LSUP_Triple *trp, const LSUP_TriplePos n)
  369. {
  370. if (n == TRP_POS_S) return trp->s;
  371. if (n == TRP_POS_P) return trp->p;
  372. if (n == TRP_POS_O) return trp->o;
  373. return NULL;
  374. }
  375. /** @brief Hash a triple.
  376. *
  377. * TODO This doesn't handle blank nodes correctly.
  378. */
  379. inline LSUP_Key
  380. LSUP_triple_hash (const LSUP_Triple *trp)
  381. {
  382. LSUP_BufferTriple *strp = LSUP_triple_serialize (trp);
  383. LSUP_Key hash = LSUP_btriple_hash (strp);
  384. LSUP_btriple_free (strp);
  385. return hash;
  386. }
  387. /** @brief Create a new term set.
  388. *
  389. * @return New empty term set.
  390. */
  391. LSUP_TermSet *
  392. LSUP_term_set_new (void);
  393. /** @brief Free a term set.
  394. *
  395. * @param[in] ts Term set handle.
  396. */
  397. void
  398. LSUP_term_set_free (LSUP_TermSet *ts);
  399. /** @brief Add term to a term set.
  400. *
  401. * If the same term is already in the set, it is not replaced, and the existing
  402. * term's handle is made available in the `existing` variable. In this case,
  403. * the caller may want to free the passed term which has not been added.
  404. *
  405. * @param[in] ts Term set to be added to.
  406. *
  407. * @param[in] term Term to be added to the list. The term set will take
  408. * ownership of the term and free it when it's freed with
  409. * #LSUP_term_set_free()—only if the return code is LSUP_OK.
  410. *
  411. * @param[out] existing If not NULL, and if the term being added is a
  412. * duplicate, this variable will be populated with the existing term handle.
  413. *
  414. * @return LSUP_OK on success; LSUP_NOACTION if the term is duplicate;
  415. * LSUP_MEM_ERR on memory error. Note: if not LSUP_OK, the caller is in charge
  416. * of freeing the `term` handle.
  417. */
  418. LSUP_rc
  419. LSUP_term_set_add (LSUP_TermSet *ts, LSUP_Term *term, LSUP_Term **existing);
  420. /** @brief Get a term from a term set.
  421. *
  422. * @param[in] ts Term set handle.
  423. *
  424. * @param[in] key Key for the queried term.
  425. *
  426. * @return The retrieved term if found, or NULL. The term must not be
  427. * modified or freed.
  428. */
  429. const LSUP_Term *
  430. LSUP_term_set_get (LSUP_TermSet *ts, LSUP_Key key);
  431. /** @brief Iterate trough a term set.
  432. *
  433. * @param[in] ts Term set handle.
  434. *
  435. * @param[in,out] i Iterator to be initially set to 0.
  436. *
  437. * @param[out] term Pointer to be populated with the next term on success. It
  438. * may be NULL.
  439. *
  440. * @return LSUP_OK if the next term was retrieved; LSUP_END if the end of the
  441. * set has been reached.
  442. */
  443. LSUP_rc
  444. LSUP_term_set_next (LSUP_TermSet *ts, size_t *i, LSUP_Term **term);
  445. /** @brief Size of a term set.
  446. *
  447. * @param[in] ts Term set handle.
  448. *
  449. * @return Number of unique terms in the term set.
  450. */
  451. size_t
  452. LSUP_term_set_size (LSUP_TermSet *ts);
  453. /** @brief New link map.
  454. *
  455. * @param[in] linked_term Term to be linked to map. The term is copied and may
  456. * be freed after this function call.
  457. *
  458. * @param[in] type Type of links that the link map shall contain.
  459. * @sa #LSUP_LinkType
  460. *
  461. * @return a new empty link map.
  462. */
  463. LSUP_LinkMap *
  464. LSUP_link_map_new (const LSUP_Term *linked_term, LSUP_LinkType type);
  465. /** @brief Free a link map.
  466. *
  467. * All arrays and term handles are recursively freed.
  468. *
  469. * @param[in] lm link map handle obtained with #LSUP_link_map_new().
  470. */
  471. void
  472. LSUP_link_map_free (LSUP_LinkMap *lm);
  473. /** @brief Return the link map type.
  474. *
  475. * @return Link type. @sa #LSUP_LinkType
  476. */
  477. LSUP_LinkType
  478. LSUP_link_map_type (const LSUP_LinkMap *map);
  479. /** @brief Add a term - term set pair to a link map.
  480. *
  481. * If there is already a term set for the given term, items from the added term
  482. * are added to the existing term set (if not duplicated). Otherwise, the term
  483. * set handle is linked to the new term.
  484. *
  485. * In any case, the caller should not directly use the term and term set after
  486. * passing them to this function.
  487. *
  488. * @param[in] lmap Link map handle obtained with #LSUP_link_map_new().
  489. *
  490. * @param[in] term Term to be associated with the given object list. The
  491. * link map takes ownership of the term.
  492. *
  493. * @param[in] tset term set to be associated with the given term. The link
  494. * map takes ownership of the term set and the terms in it.
  495. *
  496. * @return LSUP_OK on success; LSUP_MEM_ERR on allocation error.
  497. */
  498. LSUP_rc
  499. LSUP_link_map_add (
  500. LSUP_LinkMap *lmap, LSUP_Term *term, LSUP_TermSet *tset);
  501. /** @brief Create a new iterator to loop through a link map.
  502. *
  503. * @param[in] lmap Map handle to iterate.
  504. */
  505. LSUP_LinkMapIterator *
  506. LSUP_link_map_iter_new (const LSUP_LinkMap *lmap);
  507. /** @brief Iterate through a link map.
  508. *
  509. * Each call to this function yields a linked term and the related term set.
  510. *
  511. * @param[in] it Link map iterator obtained with #LSUP_link_map_iter_new().
  512. *
  513. * @param[out] lt Linked term returned.
  514. *
  515. * @param[out] ts Term set returned.
  516. *
  517. * @return LSUP_OK if a result was yielded; LSUP_END if the end of the link map
  518. * has been reached.
  519. */
  520. LSUP_rc
  521. LSUP_link_map_next (
  522. LSUP_LinkMapIterator *it, LSUP_Term **lt, LSUP_TermSet **ts);
  523. /// Free a link map iterator.
  524. void
  525. LSUP_link_map_iter_free (LSUP_LinkMapIterator *it);
  526. /**@brief Iterate over a link map and generate triples.
  527. *
  528. * Calling this function repeatedly builds triples for all the linked terms and
  529. * term sets in the map, based on a given related term.
  530. *
  531. * @param[in] it Link map iterator handle, obtained with
  532. * #LSUP_link_map_iter_new().
  533. *
  534. * @param[in,out] spo Result triple. The triple handle must be pre-allocated
  535. * (it may be TRP_DUMMY) and calls to this function will be set its memebers
  536. * to term handles owned by the link map. If rc != LSUP_OK, the contents are
  537. * undefined.
  538. *
  539. * @return LSUP_OK if a new triple was yielded; LSUP_END if the end of the loop
  540. * has been reached; <0 on error.
  541. */
  542. LSUP_rc
  543. LSUP_link_map_triples (LSUP_LinkMapIterator *it, LSUP_Triple *spo);
  544. ///@} END defgroup term
  545. #endif