123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655 |
- #ifndef _LSUP_TERM_H
- #define _LSUP_TERM_H
- #include <assert.h>
- #include "buffer.h"
- #include "namespace.h"
- #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
- // Some common RDF term values.
- #define LSUP_RDF_TYPE "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
- #define LSUP_RDF_TYPE_NS "rdf:type"
- /// Default data type for untyped literals (prefixed IRI).
- #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string"
- #define DEFAULT_DTYPE_NS "xsd:string"
- /*
- * Data types.
- */
- /// Language tag, currently restricted to 7 characters.
- typedef char LSUP_LangTag[8];
- /// Term type.
- typedef enum {
- LSUP_TERM_UNDEFINED = 0,/**<
- * Undefined placeholder or result of an error.
- * Invalid for most operations.
- */
- LSUP_TERM_IRIREF, ///< IRI reference.
- LSUP_TERM_NS_IRIREF, ///< Namespace-prefixed IRI reference.
- LSUP_TERM_LITERAL, ///< Literal without language tag.
- LSUP_TERM_LT_LITERAL, ///< Language-tagged string literal.
- LSUP_TERM_BNODE, ///< Blank node.
- } LSUP_TermType;
- /** @brief IRI information.
- *
- * See regex matching group for #LSUP_URI_REGEX_STR for more information.
- */
- typedef struct iri_info_t LSUP_IRIInfo;
- typedef struct link_map_iter LSUP_LinkMapIterator;
- /// RDF term.
- typedef struct term_t {
- char * data; // URI, literal value, or BNode label.
- union {
- struct term_t * datatype; // Data type IRI for LSUP_TERM_LITERAL.
- LSUP_LangTag lang; // Lang tag for LSUP_TERM_LT_LITERAL.
- LSUP_Key bnode_id; // BNode ID for comparison & skolemization.
- LSUP_IRIInfo * iri_info; // IRI information structure.
- };
- LSUP_TermType type; // Term type.
- } LSUP_Term;
- /** @brief Shorthand to test if a term is a IRI of any kind.
- */
- #define LSUP_IS_IRI(term) \
- ((term)->type == LSUP_TERM_IRIREF || (term)->type == LSUP_TERM_NS_IRIREF)
- /** @brief Shorthand to test if a term is a literal of any kind.
- */
- #define LSUP_IS_LITERAL(term) \
- ((term)->type == LSUP_TERM_LITERAL || (term)->type == LSUP_TERM_LT_LITERAL)
- /** @brief Whether the environment is already initialized.
- */
- #define LSUP_IS_INIT (LSUP_default_datatype != NULL)
- /** @brief RDF triple.
- *
- * This represents a complete RDF statement. Triple terms can be accessed
- * directly via the `s`, `p`, `o` members or sequentially via
- * #LSUP_triple_pos().
- */
- typedef struct triple_t {
- LSUP_Term *s; ///< Subject.
- LSUP_Term *p; ///< Predicate.
- LSUP_Term *o; ///< Object.
- } LSUP_Triple;
- /// Link type.
- typedef enum {
- LSUP_LINK_INBOUND, ///< Inbound link (sp).
- LSUP_LINK_OUTBOUND, ///< Outbound link (po).
- LSUP_LINK_EDGE, ///< Edge link (so).
- } LSUP_LinkType;
- /** @brief The immediate neighborhood of terms connected to a term.
- *
- * This is a hash map whose each term is related to a set of one or more other
- * terms. The hash map is inside an opaque handle and is manipulated via the
- * `LSUP_link_map_*` functions.
- *
- * If the type of the link map is `LSUP_LINK_INBOUND`, the map keys
- * represent predicates and the sets related to them are the objects, and the
- * term associated to the link map is the object; if
- * `LSUP_LINK_OUTBOUND`, the keys represent predicates, the related sets
- * objects, and the associated term is the subject. If `LSUP_LINK_EDGE`, the
- * keys represent subjects and the related sets objects, and the associated
- * term is the predicate.
- */
- typedef struct link_map LSUP_LinkMap;
- /** @brief a set of unique terms.
- *
- * This is used to bulk-add terms to a link map.
- */
- typedef struct hashmap LSUP_TermSet;
- /*
- * External variables.
- */
- /** @brief Compiled hash of default literal data type.
- */
- extern uint32_t LSUP_default_dtype_key;
- /** @brief Default literal data type URI.
- *
- * Literal terms created with undefined data type will have it set to this
- * URI implicitly.
- */
- extern LSUP_Term *LSUP_default_datatype;
- /** @brief Global term cache.
- *
- * Stores frequently used terms, e.g. data type URIs.
- */
- extern LSUP_TermSet *LSUP_term_cache;
- /*
- * API functions.
- */
- /** @brief Create a new term.
- *
- * This is a generic function; it is recommended to use specialized functions
- * such as #LSUP_term_new(), #LSUP_literal_new(), etc. as they have strict type
- * checks for the metadata parameter.
- *
- * @param type[in] Term type. One of #LSUP_TermType.
- *
- * @param data[in] Term data: textual URI, literal value without data type
- * or langtag, etc. It may be NULL for IRI refs and BNodes, in which case a
- * random identifier is generated.
- *
- * @param metadata[in] Namespace map (LSUP_NSMap *) for IRI refs; language tag
- * (LSUP_LangTag *) for language-tagged literals; or data type (LSUP_Term *)
- * for other literals. It may be NULL.
- *
- * @return New term, which must be freed with #LSUP_term_free after use; or
- * NULL on error.
- */
- LSUP_Term *
- LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
- /** @brief Placeholder term to use with LSUP_term_reset.
- */
- #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL)
- /** @brief Shortcut to create an IRI reference.
- *
- * Must be freed with #LSUP_term_free.
- *
- * @param data[in] The URI string. If NULL, a UUID4-based URN is generated.
- * This cannot be NULL if the nsm parameter is not NULL.
- *
- * @param nsm[in] Namespace map. If not NULL, a namespace-prefixed
- * (#LSUP_TERM_NS_IRIREF) is created, otherwise a regular one
- * (#LSUP_TERM_IRIREF).
- *
- * @return same as #LSUP_term_new().
- */
- inline LSUP_Term *
- LSUP_iriref_new (const char *data, LSUP_NSMap *nsm)
- {
- return (
- nsm ? LSUP_term_new (LSUP_TERM_NS_IRIREF, data, nsm) :
- LSUP_term_new (LSUP_TERM_IRIREF, data, NULL));
- }
- /** @brief Create a new absolute IRI from a path relative to a root IRI.
- *
- * The term is always of type LSUP_TERM_IRIREF (i.e. not namespace-prefixed).
- *
- * If the provided IRI is already a fully qualified IRI (i.e. it has a prefix)
- * the result is semantically identical to the input.
- *
- * If the relative IRI begins with a '/', the resulting IRI is relative to the
- * web root of the root IRI. I.e. if a root IRI has a path after the webroot,
- * it is ignored.
- *
- * Otherwise, the resulting IRI is relative to the full root string.
- *
- * @param[in] root Root IRI that the new IRI should be relative to.
- *
- * @param[in] iri Term with an IRI relative to the webroot.
- *
- * @return New absolute IRI, or NULL if either term is not an IRI.
- */
- LSUP_Term *
- LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri);
- /** @brief Create a new relative IRI from an absolute IRI and a web root IRI.
- *
- * This works with namespace-prefixed IRIs and returns a term of the same type
- * as the input.
- *
- * @param[in] root Root IRI that the new IRI should be relative to.
- *
- * @param[in] iri Full IRI.
- *
- * @return New IRI, or NULL if either term is not an IRI. If the input IRI is
- * not a path under the root IRI, the result will be identical to the input.
- */
- LSUP_Term *
- LSUP_iriref_relative (const LSUP_Term *root, const LSUP_Term *iri);
- /** @brief Shortcut to create a literal term.
- *
- * Must be freed with #LSUP_term_free.
- *
- * @param data[in] The literal string.
- *
- * @param datatype[in] Data type URI string. If NULL, the default data type
- * (xsd:string) is used. The new term takes ownership of the pointer.
- *
- * @return same as #LSUP_term_new().
- */
- inline LSUP_Term *
- LSUP_literal_new (const char *data, LSUP_Term *datatype)
- { return LSUP_term_new (LSUP_TERM_LITERAL, data, datatype); }
- /** @brief Shortcut to create a language-tagged literal term.
- *
- * Must be freed with #LSUP_term_free.
- *
- * @param data[in] The literal string.
- *
- * @param lang[in] Language tag string.
- *
- * @return same as #LSUP_term_new().
- */
- inline LSUP_Term *
- LSUP_lt_literal_new (const char *data, char *lang)
- { return LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang); }
- /** @brief Shortcut to create a blank node.
- *
- * Must be freed with #LSUP_term_free.
- *
- * @param data[in] The BNode identifier.
- *
- * @return same as #LSUP_term_new().
- */
- inline LSUP_Term *
- LSUP_bnode_new (const char *data)
- { return LSUP_term_new (LSUP_TERM_BNODE, data, NULL); }
- /** @brief Copy a term.
- *
- * @param[in] src The term to copy.
- *
- * @return A new duplicate term handle.
- */
- LSUP_Term *
- LSUP_term_copy (const LSUP_Term *src);
- /** @brief Deserialize a buffer into a term.
- *
- * @param[in] sterm Buffer to convert into a term. It must be a valid
- * serialized term from store or obtained with #LSUP_term_serialize().
- *
- * @return New term handle. It must be freed with #LSUP_term_free().
- */
- LSUP_Term *
- LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
- /** @brief Serialize a term into a buffer.
- *
- * @param[in] sterm Term to convert into a buffer.
- *
- * @return New buffer handle. It must be freed with #LSUP_buffer_free().
- */
- LSUP_Buffer *
- LSUP_term_serialize (const LSUP_Term *term);
- /** @brief Hash a buffer.
- */
- LSUP_Key
- LSUP_term_hash (const LSUP_Term *term);
- /** @brief Compare two terms.
- *
- * The terms evaluate as equal if their hashes are equal—i.e. if they are
- * semantically equivalent.
- */
- inline bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
- { return LSUP_term_hash (term1) == LSUP_term_hash (term2); }
- void
- LSUP_term_free (LSUP_Term *term);
- /** @brief Namespace map of a IRI ref.
- *
- * @param[in] iri IRI reference handle.
- *
- * @return A pointer to the namespace map associated with the IRI. It is
- * freed at program shutdown.
- */
- LSUP_NSMap *
- LSUP_iriref_nsm (const LSUP_Term *iri);
- /** @brief Get the prefix portion of a IRI ref.
- *
- * @param[in] iri IRI reference handle.
- *
- * @return String containing the protocol and domain name part of the IRI. It
- * should be freed after use.
- */
- char *
- LSUP_iriref_prefix (const LSUP_Term *iri);
- /** @brief Get the path portion of a IRI ref.
- *
- * @param[in] iri IRI reference handle.
- *
- * @return String containing the path of the IRI relative to the web root. For
- * a URN, such as `urn:myns:myid`, it would be `myns:myid`. This string should
- * be freed after use.
- */
- char *
- LSUP_iriref_path (const LSUP_Term *iri);
- /** @brief Get the fragment portion of a IRI ref.
- *
- * @param[in] iri IRI reference handle.
- *
- * @return String containing the fragment part of the IRI, or NULL if the IRI
- * contains no fragment. It should be freed after use.
- */
- char *
- LSUP_iriref_frag (const LSUP_Term *iri);
- /*
- * TRIPLES
- */
- /** @brief Create a new triple from three terms.
- *
- * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free().
- * To only free the triple, use free().
- *
- * TODO Term types are not validated at the moment.
- *
- * @param[in] s Triple subject. It must be an IRIRef or BNode.
- *
- * @param[in] p Triple predicate. It must be an IRIRef.
- *
- * @param[in] o Triple object.
- *
- */
- LSUP_Triple *
- LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
- /** @brief Dummy triple with NULL slots. It is not a valid triple.
- */
- #define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL)
- LSUP_Triple *
- LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo);
- LSUP_BufferTriple *
- LSUP_triple_serialize (const LSUP_Triple *spo);
- /** @brief Initialize internal term pointers in a heap-allocated triple.
- *
- * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free().
- * To only free the triple, use free().
- *
- * @param spo[in] Triple pointer to initialize.
- */
- LSUP_rc
- LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
- /** @brief Free the internal pointers of a triple.
- *
- * @param spo[in] Triple to be freed.
- */
- void
- LSUP_triple_done (LSUP_Triple *spo);
- /** @brief Free a triple and all its internal pointers.
- *
- * NOTE: If the term pointers are not to be freed (e.g. they are owned by a
- * back end), use a simple free(spo) instead of this.
- *
- * @param spo[in] Triple to be freed.
- */
- void
- LSUP_triple_free (LSUP_Triple *spo);
- /** @brief Get triple by term position.
- *
- * Useful for looping over all terms.
- *
- * @param trp[in] Triple pointer.
- *
- * @param n[in] A number between 0÷2.
- *
- * @return Corresponding triple term or NULL if n is out of range.
- */
- inline LSUP_Term *
- LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n)
- {
- if (n == TRP_POS_S) return trp->s;
- if (n == TRP_POS_P) return trp->p;
- if (n == TRP_POS_O) return trp->o;
- return NULL;
- }
- /** @brief Hash a triple.
- *
- * TODO This doesn't handle blank nodes correctly.
- */
- inline LSUP_Key
- LSUP_triple_hash (const LSUP_Triple *trp)
- {
- LSUP_BufferTriple *strp = LSUP_triple_serialize (trp);
- LSUP_Key hash = LSUP_btriple_hash (strp);
- LSUP_btriple_free (strp);
- return hash;
- }
- /** @brief Create a new term set.
- *
- * @return New empty term set.
- */
- LSUP_TermSet *
- LSUP_term_set_new (void);
- /** @brief Free a term set.
- *
- * @param[in] ts Term set handle.
- */
- void
- LSUP_term_set_free (LSUP_TermSet *ts);
- /** @brief Add term to a term set.
- *
- * If the same term is already in the set, it is not replaced, and the existing
- * term's handle is made available in the `existing` variable. In this case,
- * the caller may want to free the passed term which has not been added.
- *
- * @param[in] tl Term set to be added to.
- *
- * @param[in] term Term to be added to the list. The term set will take
- * ownership of the term and free it when it's freed with
- * #LSUP_term_set_free()—only if the return code is LSUP_OK.
- *
- * @param[out] existing If not NULL, and if the term being added is a
- * duplicate, this variable will be populated with the existing term handle.
- *
- * @return LSUP_OK on success; LSUP_NOACTION if the term is duplicate;
- * LSUP_MEM_ERR on memory error. Note: if not LSUP_OK, the caller is in charge
- * of freeing the `term` handle.
- */
- LSUP_rc
- LSUP_term_set_add (LSUP_TermSet *ts, LSUP_Term *term, LSUP_Term **existing);
- /** @brief Get a term from a term set.
- *
- * @param[in] ts Term set handle.
- *
- * @param[in] key Key for the queried term.
- *
- * @return The retrieved term if found, or NULL. The term must not be
- * modified or freed.
- */
- const LSUP_Term *
- LSUP_term_set_get (LSUP_TermSet *ts, LSUP_Key key);
- /** @brief Iterate trough a term set.
- *
- * @param[in] ts Term set handle.
- *
- * @param[in,out] i Iterator to be initially set to 0.
- *
- * @param[out] term Pointer to be populated with the next term on success. It
- * may be NULL.
- *
- * @return LSUP_OK if the next term was retrieved; LSUP_END if the end of the
- * set has been reached.
- */
- LSUP_rc
- LSUP_term_set_next (LSUP_TermSet *ts, size_t *i, LSUP_Term **term);
- /** @brief New link map.
- *
- * The initial state of the returned list is: `{t: [NULL], tl: [NULL]}`
- *
- * Predicates and term lists can be added with #LSUP_link_map_add, and terms
- * can be added to a term list with #LSUP_term_list_add.
- *
- * @param[in] type Type of links that the link map shall contain.
- * @sa #LSUP_LinkType
- *
- * @return a new empty predicate-object list.
- */
- LSUP_LinkMap *
- LSUP_link_map_new (LSUP_LinkType type);
- /** @brief Free a link map.
- *
- * All arrays and term handles are recursively freed.
- *
- * @param[in] pol link map handle obtained with #LSUP_link_map_new().
- */
- void
- LSUP_link_map_free (LSUP_LinkMap *pol);
- /** @brief Return the link map type.
- *
- * @return Link type. @sa #LSUP_LinkType
- */
- LSUP_LinkType
- LSUP_link_map_type (const LSUP_LinkMap *map);
- /** @brief Add a term - term set pair to a link map.
- *
- * If there is already a term set for the given term, items from the added term
- * are added to the existing term set (if not duplicated). Otherwise, the term
- * set handle is linked to the new term.
- *
- * In any case, the caller should not directly use the term and term set after
- * passing them to this function.
- *
- * @param[in] cm Link map handle obtained with #LSUP_link_map_new().
- *
- * @param[in] t Term to be associated with the given object list. The
- * link map structure takes ownership of the term.
- *
- * @param[in] ts term set to be associated with the given term. The link
- * list structire takes ownership of the term set and the terms in it.
- *
- * @return LSUP_OK on success; LSUP_MEM_ERR on allocation error.
- */
- LSUP_rc
- LSUP_link_map_add (
- LSUP_LinkMap *cmap, LSUP_Term *term, LSUP_TermSet *tset);
- /** @brief Create a new iterator to loop through a link map.
- *
- * @param[in] lmap Map handle to iterate.
- *
- * @param[in] ext External term to look for connections.
- */
- LSUP_LinkMapIterator *
- LSUP_link_map_iter_new (const LSUP_LinkMap *lmap, LSUP_Term *ext);
- /// Free a link map iterator.
- void
- LSUP_link_map_iter_free (LSUP_LinkMapIterator *it);
- /** @brief Iterate through a link map.
- *
- * Each call to this function yields a linked term and the related term set.
- *
- * @param[in] it Link map iterator obtained with #LSUP_link_map_iter_new().
- *
- * @param[out] lt Linked term returned.
- *
- * @param[out] ts Term set returned.
- *
- * @return LSUP_OK if a result was yielded; LSUP_END if the end of the link map
- * has been reached.
- */
- LSUP_rc
- LSUP_link_map_next (
- LSUP_LinkMapIterator *it, LSUP_Term **lt, LSUP_TermSet **ts);
- /**@brief Iterate over a link map and generate triples.
- *
- * Calling this function repeatedly builds triples for all the linked terms and
- * term sets in the map, based on a given related term.
- *
- * @param[in] it Link map iterator handle, obtained with
- * #LSUP_link_map_iter_new().
- *
- * @param[in] term Term to relate to the link map.
- *
- * @param[in|out] spo Result triple. The triple handle must be pre-allocated
- * (it may be TRP_DUMMY) and calls to this function will be set its memebers
- * to term handles owned by the link map. If rc != LSUP_OK, the contents are
- * undefined.
- *
- * @return LSUP_OK if a new triple was yielded; LSUP_END if the end of the loop
- * has been reached; <0 on error.
- */
- LSUP_rc
- LSUP_link_map_triples (
- LSUP_LinkMapIterator *it, LSUP_Triple *spo);
- #endif
|