term.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661
  1. #ifndef _LSUP_TERM_H
  2. #define _LSUP_TERM_H
  3. #include <assert.h>
  4. #include "buffer.h"
  5. #include "namespace.h"
  6. /** @defgroup term RDF term and triple module
  7. * @ingroup public
  8. * @{
  9. */
  10. #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
  11. // Some common RDF term values.
  12. #define LSUP_RDF_TYPE "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
  13. #define LSUP_RDF_TYPE_NS "rdf:type"
  14. /// Default data type for untyped literals (prefixed IRI).
  15. #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string"
  16. #define DEFAULT_DTYPE_NS "xsd:string"
  17. /*
  18. * Data types.
  19. */
  20. /// Language tag, currently restricted to 7 characters.
  21. typedef char LSUP_LangTag[8];
  22. /// Term type.
  23. typedef enum {
  24. LSUP_TERM_UNDEFINED = 0,/**<
  25. * Undefined placeholder or result of an error.
  26. * Invalid for most operations.
  27. */
  28. LSUP_TERM_IRIREF, ///< IRI reference.
  29. LSUP_TERM_NS_IRIREF, ///< Namespace-prefixed IRI reference.
  30. LSUP_TERM_LITERAL, ///< Literal without language tag.
  31. LSUP_TERM_LT_LITERAL, ///< Language-tagged string literal.
  32. LSUP_TERM_BNODE, ///< Blank node.
  33. } LSUP_TermType;
  34. /// Opaque IRI information.
  35. typedef struct iri_info_t LSUP_IRIInfo;
  36. /// Opaque iterator for link maps.
  37. typedef struct link_map_iter LSUP_LinkMapIterator;
  38. /// RDF term.
  39. typedef struct term_t {
  40. char * data; // URI, literal value, or BNode label.
  41. union {
  42. struct term_t * datatype; // Data type IRI for LSUP_TERM_LITERAL.
  43. LSUP_LangTag lang; // Lang tag for LSUP_TERM_LT_LITERAL.
  44. LSUP_Key bnode_id; // BNode ID for comparison & skolemization.
  45. LSUP_IRIInfo * iri_info; // IRI information structure.
  46. };
  47. LSUP_TermType type; // Term type.
  48. } LSUP_Term;
  49. /** @brief Shorthand to test if a term is a IRI of any kind.
  50. */
  51. #define LSUP_IS_IRI(term) \
  52. ((term)->type == LSUP_TERM_IRIREF || (term)->type == LSUP_TERM_NS_IRIREF)
  53. /** @brief Shorthand to test if a term is a literal of any kind.
  54. */
  55. #define LSUP_IS_LITERAL(term) \
  56. ((term)->type == LSUP_TERM_LITERAL || (term)->type == LSUP_TERM_LT_LITERAL)
  57. /** @brief Whether the environment is already initialized.
  58. */
  59. #define LSUP_IS_INIT (LSUP_default_datatype != NULL)
  60. /** @brief RDF triple.
  61. *
  62. * This represents a complete RDF statement. Triple terms can be accessed
  63. * directly via the `s`, `p`, `o` members or sequentially via
  64. * #LSUP_triple_pos().
  65. */
  66. typedef struct triple_t {
  67. LSUP_Term *s; ///< Subject.
  68. LSUP_Term *p; ///< Predicate.
  69. LSUP_Term *o; ///< Object.
  70. } LSUP_Triple;
  71. /// Link type.
  72. typedef enum {
  73. LSUP_LINK_INBOUND, ///< Inbound link (sp).
  74. LSUP_LINK_OUTBOUND, ///< Outbound link (po).
  75. LSUP_LINK_EDGE, ///< Edge link (so).
  76. } LSUP_LinkType;
  77. /** @brief The immediate neighborhood of terms connected to a term.
  78. *
  79. * This is a hash map whose each term is related to a set of one or more other
  80. * terms. The hash map is inside an opaque handle and is manipulated via the
  81. * `LSUP_link_map_*` functions.
  82. *
  83. * If the type of the link map is `LSUP_LINK_INBOUND`, the map keys
  84. * represent predicates and the sets related to them are the objects, and the
  85. * term associated to the link map is the object; if
  86. * `LSUP_LINK_OUTBOUND`, the keys represent predicates, the related sets
  87. * objects, and the associated term is the subject. If `LSUP_LINK_EDGE`, the
  88. * keys represent subjects and the related sets objects, and the associated
  89. * term is the predicate.
  90. */
  91. typedef struct link_map LSUP_LinkMap;
  92. /** @brief a set of unique terms.
  93. *
  94. * This is used to bulk-add terms to a link map.
  95. */
  96. typedef struct hashmap LSUP_TermSet;
  97. /*
  98. * External variables.
  99. */
  100. /** @brief Compiled hash of default literal data type.
  101. */
  102. extern uint32_t LSUP_default_dtype_key;
  103. /** @brief Default literal data type URI.
  104. *
  105. * Literal terms created with undefined data type will have it set to this
  106. * URI implicitly.
  107. */
  108. extern LSUP_Term *LSUP_default_datatype;
  109. /** @brief Global term cache.
  110. *
  111. * Stores frequently used terms, e.g. data type URIs.
  112. */
  113. extern LSUP_TermSet *LSUP_term_cache;
  114. /*
  115. * API functions.
  116. */
  117. /** @brief Create a new term.
  118. *
  119. * This is a generic function; it is recommended to use specialized functions
  120. * such as #LSUP_iriref_new(), #LSUP_literal_new(), etc. as they have strict
  121. * type checks for the metadata parameter.
  122. *
  123. * @param[in] type Term type. One of #LSUP_TermType.
  124. *
  125. * @param[in] data Term data: textual URI, literal value without data type
  126. * or langtag, etc. It may be NULL for IRI refs and BNodes, in which case a
  127. * random identifier is generated.
  128. *
  129. * @param[in] metadata Namespace map (LSUP_NSMap *) for IRI refs; language tag
  130. * (LSUP_LangTag *) for language-tagged literals; or data type (LSUP_Term *)
  131. * for other literals. It may be NULL.
  132. *
  133. * @return New term, which must be freed with #LSUP_term_free after use; or
  134. * NULL on error.
  135. */
  136. LSUP_Term *
  137. LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
  138. /** @brief Placeholder term to use with LSUP_term_reset.
  139. */
  140. #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
  141. /** @brief Shortcut to create an IRI reference.
  142. *
  143. * Must be freed with #LSUP_term_free.
  144. *
  145. * @param[in] data The URI string. If NULL, a UUID4-based URN is generated.
  146. * This cannot be NULL if the nsm parameter is not NULL.
  147. *
  148. * @param[in] nsm Namespace map. If not NULL, a namespace-prefixed
  149. * (#LSUP_TERM_NS_IRIREF) is created, otherwise a regular one
  150. * (#LSUP_TERM_IRIREF).
  151. *
  152. * @return same as #LSUP_term_new().
  153. */
  154. inline LSUP_Term *
  155. LSUP_iriref_new (const char *data, LSUP_NSMap *nsm)
  156. {
  157. return (
  158. nsm ? LSUP_term_new (LSUP_TERM_NS_IRIREF, data, nsm) :
  159. LSUP_term_new (LSUP_TERM_IRIREF, data, NULL));
  160. }
  161. /** @brief Create a new absolute IRI from a path relative to a root IRI.
  162. *
  163. * The term is always of type LSUP_TERM_IRIREF (i.e. not namespace-prefixed).
  164. *
  165. * If the provided IRI is already a fully qualified IRI (i.e. it has a prefix)
  166. * the result is semantically identical to the input.
  167. *
  168. * If the relative IRI begins with a '/', the resulting IRI is relative to the
  169. * web root of the root IRI. I.e. if a root IRI has a path after the webroot,
  170. * it is ignored.
  171. *
  172. * Otherwise, the resulting IRI is relative to the full root string.
  173. *
  174. * @param[in] root Root IRI that the new IRI should be relative to.
  175. *
  176. * @param[in] iri Term with an IRI relative to the webroot.
  177. *
  178. * @return New absolute IRI, or NULL if either term is not an IRI.
  179. */
  180. LSUP_Term *
  181. LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri);
  182. /** @brief Create a new relative IRI from an absolute IRI and a web root IRI.
  183. *
  184. * This works with namespace-prefixed IRIs and returns a term of the same type
  185. * as the input.
  186. *
  187. * @param[in] root Root IRI that the new IRI should be relative to.
  188. *
  189. * @param[in] iri Full IRI.
  190. *
  191. * @return New IRI, or NULL if either term is not an IRI. If the input IRI is
  192. * not a path under the root IRI, the result will be identical to the input.
  193. */
  194. LSUP_Term *
  195. LSUP_iriref_relative (const LSUP_Term *root, const LSUP_Term *iri);
  196. /** @brief Shortcut to create a literal term.
  197. *
  198. * Must be freed with #LSUP_term_free.
  199. *
  200. * @param[in] data The literal string.
  201. *
  202. * @param[in] datatype Data type URI string. If NULL, the default data type
  203. * (xsd:string) is used. The new term takes ownership of the pointer.
  204. *
  205. * @return same as #LSUP_term_new().
  206. */
  207. inline LSUP_Term *
  208. LSUP_literal_new (const char *data, LSUP_Term *datatype)
  209. { return LSUP_term_new (LSUP_TERM_LITERAL, data, datatype); }
  210. /** @brief Shortcut to create a language-tagged literal term.
  211. *
  212. * Must be freed with #LSUP_term_free.
  213. *
  214. * @param[in] data The literal string.
  215. *
  216. * @param[in] lang Language tag string.
  217. *
  218. * @return same as #LSUP_term_new().
  219. */
  220. inline LSUP_Term *
  221. LSUP_lt_literal_new (const char *data, char *lang)
  222. { return LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang); }
  223. /** @brief Shortcut to create a blank node.
  224. *
  225. * Must be freed with #LSUP_term_free.
  226. *
  227. * @param[in] data The BNode identifier.
  228. *
  229. * @return same as #LSUP_term_new().
  230. */
  231. inline LSUP_Term *
  232. LSUP_bnode_new (const char *data)
  233. { return LSUP_term_new (LSUP_TERM_BNODE, data, NULL); }
  234. /** @brief Copy a term.
  235. *
  236. * @param[in] src The term to copy.
  237. *
  238. * @return A new duplicate term handle.
  239. */
  240. LSUP_Term *
  241. LSUP_term_copy (const LSUP_Term *src);
  242. /** @brief Deserialize a buffer into a term.
  243. *
  244. * @param[in] sterm Buffer to convert into a term. It must be a valid
  245. * serialized term from store or obtained with #LSUP_term_serialize().
  246. *
  247. * @return New term handle. It must be freed with #LSUP_term_free().
  248. */
  249. LSUP_Term *
  250. LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
  251. /** @brief Serialize a term into a buffer.
  252. *
  253. * @param[in] term Term to convert into a buffer.
  254. *
  255. * @return New buffer handle. It must be freed with #LSUP_buffer_free().
  256. */
  257. LSUP_Buffer *
  258. LSUP_term_serialize (const LSUP_Term *term);
  259. /** @brief Hash a buffer.
  260. */
  261. LSUP_Key
  262. LSUP_term_hash (const LSUP_Term *term);
  263. /** @brief Compare two terms.
  264. *
  265. * The terms evaluate as equal if their hashes are equal—i.e. if they are
  266. * semantically equivalent.
  267. */
  268. inline bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
  269. { return term1 == term2 || LSUP_term_hash (term1) == LSUP_term_hash (term2); }
  270. void
  271. LSUP_term_free (LSUP_Term *term);
  272. /** @brief Namespace map of a IRI ref.
  273. *
  274. * @param[in] iri IRI reference handle.
  275. *
  276. * @return A pointer to the namespace map associated with the IRI. It is
  277. * freed at program shutdown.
  278. */
  279. LSUP_NSMap *
  280. LSUP_iriref_nsm (const LSUP_Term *iri);
  281. /** @brief Get the prefix portion of a IRI ref.
  282. *
  283. * @param[in] iri IRI reference handle.
  284. *
  285. * @return String containing the protocol and domain name part of the IRI. It
  286. * should be freed after use.
  287. */
  288. char *
  289. LSUP_iriref_prefix (const LSUP_Term *iri);
  290. /** @brief Get the path portion of a IRI ref.
  291. *
  292. * @param[in] iri IRI reference handle.
  293. *
  294. * @return String containing the path of the IRI relative to the web root. For
  295. * a URN, such as `urn:myns:myid`, it would be `myns:myid`. This string should
  296. * be freed after use.
  297. */
  298. char *
  299. LSUP_iriref_path (const LSUP_Term *iri);
  300. /** @brief Get the fragment portion of a IRI ref.
  301. *
  302. * @param[in] iri IRI reference handle.
  303. *
  304. * @return String containing the fragment part of the IRI, or NULL if the IRI
  305. * contains no fragment. It should be freed after use.
  306. */
  307. char *
  308. LSUP_iriref_frag (const LSUP_Term *iri);
  309. /*
  310. * TRIPLES
  311. */
  312. /** @brief Create a new triple from three terms.
  313. *
  314. * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free().
  315. * To only free the triple, use free().
  316. *
  317. * TODO Term types are not validated at the moment.
  318. *
  319. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  320. *
  321. * @param[in] p Triple predicate. It must be an IRIRef.
  322. *
  323. * @param[in] o Triple object.
  324. *
  325. */
  326. LSUP_Triple *
  327. LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  328. /** @brief Dummy triple with NULL slots. It is not a valid triple.
  329. */
  330. #define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL)
  331. LSUP_Triple *
  332. LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo);
  333. LSUP_BufferTriple *
  334. LSUP_triple_serialize (const LSUP_Triple *spo);
  335. /** @brief Initialize internal term pointers in a heap-allocated triple.
  336. *
  337. * @sa #LSUP_triple_new()
  338. *
  339. * @param[in] spo Triple pointer to initialize.
  340. *
  341. * @param[in] s Triple subject. It must be an IRIRef or BNode.
  342. *
  343. * @param[in] p Triple predicate. It must be an IRIRef.
  344. *
  345. * @param[in] o Triple object.
  346. */
  347. LSUP_rc
  348. LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
  349. /** @brief Free the internal pointers of a triple.
  350. *
  351. * @param[in] spo Triple to be freed.
  352. */
  353. void
  354. LSUP_triple_done (LSUP_Triple *spo);
  355. /** @brief Free a triple and all its internal pointers.
  356. *
  357. * NOTE: If the term pointers are not to be freed (e.g. they are owned by a
  358. * back end), use a simple free(spo) instead of this.
  359. *
  360. * @param[in] spo Triple to be freed.
  361. */
  362. void
  363. LSUP_triple_free (LSUP_Triple *spo);
  364. /** @brief Get triple by term position.
  365. *
  366. * Useful for looping over all terms.
  367. *
  368. * @param[in] trp Triple pointer.
  369. *
  370. * @param[in] n A number between 0÷2.
  371. *
  372. * @return Corresponding triple term or NULL if n is out of range.
  373. */
  374. inline LSUP_Term *
  375. LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n)
  376. {
  377. if (n == TRP_POS_S) return trp->s;
  378. if (n == TRP_POS_P) return trp->p;
  379. if (n == TRP_POS_O) return trp->o;
  380. return NULL;
  381. }
  382. /** @brief Hash a triple.
  383. *
  384. * TODO This doesn't handle blank nodes correctly.
  385. */
  386. inline LSUP_Key
  387. LSUP_triple_hash (const LSUP_Triple *trp)
  388. {
  389. LSUP_BufferTriple *strp = LSUP_triple_serialize (trp);
  390. LSUP_Key hash = LSUP_btriple_hash (strp);
  391. LSUP_btriple_free (strp);
  392. return hash;
  393. }
  394. /** @brief Create a new term set.
  395. *
  396. * @return New empty term set.
  397. */
  398. LSUP_TermSet *
  399. LSUP_term_set_new (void);
  400. /** @brief Free a term set.
  401. *
  402. * @param[in] ts Term set handle.
  403. */
  404. void
  405. LSUP_term_set_free (LSUP_TermSet *ts);
  406. /** @brief Add term to a term set.
  407. *
  408. * If the same term is already in the set, it is not replaced, and the existing
  409. * term's handle is made available in the `existing` variable. In this case,
  410. * the caller may want to free the passed term which has not been added.
  411. *
  412. * @param[in] ts Term set to be added to.
  413. *
  414. * @param[in] term Term to be added to the list. The term set will take
  415. * ownership of the term and free it when it's freed with
  416. * #LSUP_term_set_free()—only if the return code is LSUP_OK.
  417. *
  418. * @param[out] existing If not NULL, and if the term being added is a
  419. * duplicate, this variable will be populated with the existing term handle.
  420. *
  421. * @return LSUP_OK on success; LSUP_NOACTION if the term is duplicate;
  422. * LSUP_MEM_ERR on memory error. Note: if not LSUP_OK, the caller is in charge
  423. * of freeing the `term` handle.
  424. */
  425. LSUP_rc
  426. LSUP_term_set_add (LSUP_TermSet *ts, LSUP_Term *term, LSUP_Term **existing);
  427. /** @brief Get a term from a term set.
  428. *
  429. * @param[in] ts Term set handle.
  430. *
  431. * @param[in] key Key for the queried term.
  432. *
  433. * @return The retrieved term if found, or NULL. The term must not be
  434. * modified or freed.
  435. */
  436. const LSUP_Term *
  437. LSUP_term_set_get (LSUP_TermSet *ts, LSUP_Key key);
  438. /** @brief Iterate trough a term set.
  439. *
  440. * @param[in] ts Term set handle.
  441. *
  442. * @param[in,out] i Iterator to be initially set to 0.
  443. *
  444. * @param[out] term Pointer to be populated with the next term on success. It
  445. * may be NULL.
  446. *
  447. * @return LSUP_OK if the next term was retrieved; LSUP_END if the end of the
  448. * set has been reached.
  449. */
  450. LSUP_rc
  451. LSUP_term_set_next (LSUP_TermSet *ts, size_t *i, LSUP_Term **term);
  452. /** @brief New link map.
  453. *
  454. * The initial state of the returned list is: `{t: [NULL], tl: [NULL]}`
  455. *
  456. * Terms can be added to a term list with #LSUP_term_set_add().
  457. *
  458. * @param[in] type Type of links that the link map shall contain.
  459. * @sa #LSUP_LinkType
  460. *
  461. * @return a new empty predicate-object list.
  462. */
  463. LSUP_LinkMap *
  464. LSUP_link_map_new (LSUP_LinkType type);
  465. /** @brief Free a link map.
  466. *
  467. * All arrays and term handles are recursively freed.
  468. *
  469. * @param[in] pol link map handle obtained with #LSUP_link_map_new().
  470. */
  471. void
  472. LSUP_link_map_free (LSUP_LinkMap *pol);
  473. /** @brief Return the link map type.
  474. *
  475. * @return Link type. @sa #LSUP_LinkType
  476. */
  477. LSUP_LinkType
  478. LSUP_link_map_type (const LSUP_LinkMap *map);
  479. /** @brief Add a term - term set pair to a link map.
  480. *
  481. * If there is already a term set for the given term, items from the added term
  482. * are added to the existing term set (if not duplicated). Otherwise, the term
  483. * set handle is linked to the new term.
  484. *
  485. * In any case, the caller should not directly use the term and term set after
  486. * passing them to this function.
  487. *
  488. * @param[in] cmap Link map handle obtained with #LSUP_link_map_new().
  489. *
  490. * @param[in] term Term to be associated with the given object list. The
  491. * link map structure takes ownership of the term.
  492. *
  493. * @param[in] tset term set to be associated with the given term. The link
  494. * list structire takes ownership of the term set and the terms in it.
  495. *
  496. * @return LSUP_OK on success; LSUP_MEM_ERR on allocation error.
  497. */
  498. LSUP_rc
  499. LSUP_link_map_add (
  500. LSUP_LinkMap *cmap, LSUP_Term *term, LSUP_TermSet *tset);
  501. /** @brief Create a new iterator to loop through a link map.
  502. *
  503. * @param[in] lmap Map handle to iterate.
  504. *
  505. * @param[in] ext External term to look for connections.
  506. */
  507. LSUP_LinkMapIterator *
  508. LSUP_link_map_iter_new (const LSUP_LinkMap *lmap, LSUP_Term *ext);
  509. /// Free a link map iterator.
  510. void
  511. LSUP_link_map_iter_free (LSUP_LinkMapIterator *it);
  512. /** @brief Iterate through a link map.
  513. *
  514. * Each call to this function yields a linked term and the related term set.
  515. *
  516. * @param[in] it Link map iterator obtained with #LSUP_link_map_iter_new().
  517. *
  518. * @param[out] lt Linked term returned.
  519. *
  520. * @param[out] ts Term set returned.
  521. *
  522. * @return LSUP_OK if a result was yielded; LSUP_END if the end of the link map
  523. * has been reached.
  524. */
  525. LSUP_rc
  526. LSUP_link_map_next (
  527. LSUP_LinkMapIterator *it, LSUP_Term **lt, LSUP_TermSet **ts);
  528. /**@brief Iterate over a link map and generate triples.
  529. *
  530. * Calling this function repeatedly builds triples for all the linked terms and
  531. * term sets in the map, based on a given related term.
  532. *
  533. * @param[in] it Link map iterator handle, obtained with
  534. * #LSUP_link_map_iter_new().
  535. *
  536. * @param[in,out] spo Result triple. The triple handle must be pre-allocated
  537. * (it may be TRP_DUMMY) and calls to this function will be set its memebers
  538. * to term handles owned by the link map. If rc != LSUP_OK, the contents are
  539. * undefined.
  540. *
  541. * @return LSUP_OK if a new triple was yielded; LSUP_END if the end of the loop
  542. * has been reached; <0 on error.
  543. */
  544. LSUP_rc
  545. LSUP_link_map_triples (
  546. LSUP_LinkMapIterator *it, LSUP_Triple *spo);
  547. ///@} END defgroup term
  548. #endif