#ifndef _LSUP_TERM_H #define _LSUP_TERM_H #include #include "buffer.h" #include "namespace.h" #define UUID4_URN_SIZE UUIDSTR_SIZE + 10 // Some common RDF term values. #define LSUP_RDF_TYPE "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" #define LSUP_RDF_TYPE_NS "rdf:type" /// Default data type for untyped literals (prefixed IRI). #define DEFAULT_DTYPE "http://www.w3.org/2001/XMLSchema#string" #define DEFAULT_DTYPE_NS "xsd:string" /* * Data types. */ /// Language tag, currently restricted to 7 characters. typedef char LSUP_LangTag[8]; /// Term type. typedef enum { LSUP_TERM_UNDEFINED = 0,/**< * Undefined placeholder or result of an error. * Invalid for most operations. */ LSUP_TERM_IRIREF, ///< IRI reference. LSUP_TERM_NS_IRIREF, ///< Namespace-prefixed IRI reference. LSUP_TERM_LITERAL, ///< Literal without language tag. LSUP_TERM_LT_LITERAL, ///< Language-tagged string literal. LSUP_TERM_BNODE, ///< Blank node. } LSUP_TermType; /** @brief IRI information. * * See regex matching group for #LSUP_URI_REGEX_STR for more information. */ typedef struct iri_info_t LSUP_IRIInfo; typedef struct link_map_iter LSUP_LinkMapIterator; /// RDF term. typedef struct term_t { char * data; // URI, literal value, or BNode label. union { struct term_t * datatype; // Data type IRI for LSUP_TERM_LITERAL. LSUP_LangTag lang; // Lang tag for LSUP_TERM_LT_LITERAL. LSUP_Key bnode_id; // BNode ID for comparison & skolemization. LSUP_IRIInfo * iri_info; // IRI information structure. }; LSUP_TermType type; // Term type. } LSUP_Term; /** @brief Shorthand to test if a term is a IRI of any kind. */ #define LSUP_IS_IRI(term) \ ((term)->type == LSUP_TERM_IRIREF || (term)->type == LSUP_TERM_NS_IRIREF) /** @brief Shorthand to test if a term is a literal of any kind. */ #define LSUP_IS_LITERAL(term) \ ((term)->type == LSUP_TERM_LITERAL || (term)->type == LSUP_TERM_LT_LITERAL) /** @brief Whether the environment is already initialized. */ #define LSUP_IS_INIT (LSUP_default_datatype != NULL) /** @brief RDF triple. * * This represents a complete RDF statement. Triple terms can be accessed * directly via the `s`, `p`, `o` members or sequentially via * #LSUP_triple_pos(). */ typedef struct triple_t { LSUP_Term *s; ///< Subject. LSUP_Term *p; ///< Predicate. LSUP_Term *o; ///< Object. } LSUP_Triple; /// Link type. typedef enum { LSUP_LINK_INBOUND, ///< Inbound link (sp). LSUP_LINK_OUTBOUND, ///< Outbound link (po). LSUP_LINK_EDGE, ///< Edge link (so). } LSUP_LinkType; /** @brief The immediate neighborhood of terms connected to a term. * * This is a hash map whose each term is related to a set of one or more other * terms. The hash map is inside an opaque handle and is manipulated via the * `LSUP_link_map_*` functions. * * If the type of the link map is `LSUP_LINK_INBOUND`, the map keys * represent predicates and the sets related to them are the objects, and the * term associated to the link map is the object; if * `LSUP_LINK_OUTBOUND`, the keys represent predicates, the related sets * objects, and the associated term is the subject. If `LSUP_LINK_EDGE`, the * keys represent subjects and the related sets objects, and the associated * term is the predicate. */ typedef struct link_map LSUP_LinkMap; /** @brief a set of unique terms. * * This is used to bulk-add terms to a link map. */ typedef struct hashmap LSUP_TermSet; /* * External variables. */ /** @brief Compiled hash of default literal data type. */ extern uint32_t LSUP_default_dtype_key; /** @brief Default literal data type URI. * * Literal terms created with undefined data type will have it set to this * URI implicitly. */ extern LSUP_Term *LSUP_default_datatype; /** @brief Global term cache. * * Stores frequently used terms, e.g. data type URIs. */ extern LSUP_TermSet *LSUP_term_cache; /* * API functions. */ /** @brief Create a new term. * * This is a generic function; it is recommended to use specialized functions * such as #LSUP_term_new(), #LSUP_literal_new(), etc. as they have strict type * checks for the metadata parameter. * * @param type[in] Term type. One of #LSUP_TermType. * * @param data[in] Term data: textual URI, literal value without data type * or langtag, etc. It may be NULL for IRI refs and BNodes, in which case a * random identifier is generated. * * @param metadata[in] Namespace map (LSUP_NSMap *) for IRI refs; language tag * (LSUP_LangTag *) for language-tagged literals; or data type (LSUP_Term *) * for other literals. It may be NULL. * * @return New term, which must be freed with #LSUP_term_free after use; or * NULL on error. */ LSUP_Term * LSUP_term_new (LSUP_TermType type, const char *data, void *metadata); /** @brief Placeholder term to use with LSUP_term_reset. */ #define TERM_DUMMY LSUP_term_new (LSUP_TERM_UNDEFINED, NULL, NULL) /** @brief Shortcut to create an IRI reference. * * Must be freed with #LSUP_term_free. * * @param data[in] The URI string. If NULL, a UUID4-based URN is generated. * This cannot be NULL if the nsm parameter is not NULL. * * @param nsm[in] Namespace map. If not NULL, a namespace-prefixed * (#LSUP_TERM_NS_IRIREF) is created, otherwise a regular one * (#LSUP_TERM_IRIREF). * * @return same as #LSUP_term_new(). */ inline LSUP_Term * LSUP_iriref_new (const char *data, LSUP_NSMap *nsm) { return ( nsm ? LSUP_term_new (LSUP_TERM_NS_IRIREF, data, nsm) : LSUP_term_new (LSUP_TERM_IRIREF, data, NULL)); } /** @brief Create a new absolute IRI from a path relative to a root IRI. * * The term is always of type LSUP_TERM_IRIREF (i.e. not namespace-prefixed). * * If the provided IRI is already a fully qualified IRI (i.e. it has a prefix) * the result is semantically identical to the input. * * If the relative IRI begins with a '/', the resulting IRI is relative to the * web root of the root IRI. I.e. if a root IRI has a path after the webroot, * it is ignored. * * Otherwise, the resulting IRI is relative to the full root string. * * @param[in] root Root IRI that the new IRI should be relative to. * * @param[in] iri Term with an IRI relative to the webroot. * * @return New absolute IRI, or NULL if either term is not an IRI. */ LSUP_Term * LSUP_iriref_absolute (const LSUP_Term *root, const LSUP_Term *iri); /** @brief Create a new relative IRI from an absolute IRI and a web root IRI. * * This works with namespace-prefixed IRIs and returns a term of the same type * as the input. * * @param[in] root Root IRI that the new IRI should be relative to. * * @param[in] iri Full IRI. * * @return New IRI, or NULL if either term is not an IRI. If the input IRI is * not a path under the root IRI, the result will be identical to the input. */ LSUP_Term * LSUP_iriref_relative (const LSUP_Term *root, const LSUP_Term *iri); /** @brief Shortcut to create a literal term. * * Must be freed with #LSUP_term_free. * * @param data[in] The literal string. * * @param datatype[in] Data type URI string. If NULL, the default data type * (xsd:string) is used. The new term takes ownership of the pointer. * * @return same as #LSUP_term_new(). */ inline LSUP_Term * LSUP_literal_new (const char *data, LSUP_Term *datatype) { return LSUP_term_new (LSUP_TERM_LITERAL, data, datatype); } /** @brief Shortcut to create a language-tagged literal term. * * Must be freed with #LSUP_term_free. * * @param data[in] The literal string. * * @param lang[in] Language tag string. * * @return same as #LSUP_term_new(). */ inline LSUP_Term * LSUP_lt_literal_new (const char *data, char *lang) { return LSUP_term_new (LSUP_TERM_LT_LITERAL, data, lang); } /** @brief Shortcut to create a blank node. * * Must be freed with #LSUP_term_free. * * @param data[in] The BNode identifier. * * @return same as #LSUP_term_new(). */ inline LSUP_Term * LSUP_bnode_new (const char *data) { return LSUP_term_new (LSUP_TERM_BNODE, data, NULL); } /** @brief Copy a term. * * @param[in] src The term to copy. * * @return A new duplicate term handle. */ LSUP_Term * LSUP_term_copy (const LSUP_Term *src); /** @brief Deserialize a buffer into a term. * * @param[in] sterm Buffer to convert into a term. It must be a valid * serialized term from store or obtained with #LSUP_term_serialize(). * * @return New term handle. It must be freed with #LSUP_term_free(). */ LSUP_Term * LSUP_term_new_from_buffer (const LSUP_Buffer *sterm); /** @brief Serialize a term into a buffer. * * @param[in] sterm Term to convert into a buffer. * * @return New buffer handle. It must be freed with #LSUP_buffer_free(). */ LSUP_Buffer * LSUP_term_serialize (const LSUP_Term *term); /** @brief Hash a buffer. */ LSUP_Key LSUP_term_hash (const LSUP_Term *term); /** @brief Compare two terms. * * The terms evaluate as equal if their hashes are equal—i.e. if they are * semantically equivalent. */ inline bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2) { return LSUP_term_hash (term1) == LSUP_term_hash (term2); } void LSUP_term_free (LSUP_Term *term); /** @brief Namespace map of a IRI ref. * * @param[in] iri IRI reference handle. * * @return A pointer to the namespace map associated with the IRI. It is * freed at program shutdown. */ LSUP_NSMap * LSUP_iriref_nsm (const LSUP_Term *iri); /** @brief Get the prefix portion of a IRI ref. * * @param[in] iri IRI reference handle. * * @return String containing the protocol and domain name part of the IRI. It * should be freed after use. */ char * LSUP_iriref_prefix (const LSUP_Term *iri); /** @brief Get the path portion of a IRI ref. * * @param[in] iri IRI reference handle. * * @return String containing the path of the IRI relative to the web root. For * a URN, such as `urn:myns:myid`, it would be `myns:myid`. This string should * be freed after use. */ char * LSUP_iriref_path (const LSUP_Term *iri); /** @brief Get the fragment portion of a IRI ref. * * @param[in] iri IRI reference handle. * * @return String containing the fragment part of the IRI, or NULL if the IRI * contains no fragment. It should be freed after use. */ char * LSUP_iriref_frag (const LSUP_Term *iri); /* * TRIPLES */ /** @brief Create a new triple from three terms. * * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free(). * To only free the triple, use free(). * * TODO Term types are not validated at the moment. * * @param[in] s Triple subject. It must be an IRIRef or BNode. * * @param[in] p Triple predicate. It must be an IRIRef. * * @param[in] o Triple object. * */ LSUP_Triple * LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o); /** @brief Dummy triple with NULL slots. It is not a valid triple. */ #define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL) LSUP_Triple * LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo); LSUP_BufferTriple * LSUP_triple_serialize (const LSUP_Triple *spo); /** @brief Initialize internal term pointers in a heap-allocated triple. * * Terms are NOT copied. To free them with the triple, use #LSUP_triple_free(). * To only free the triple, use free(). * * @param spo[in] Triple pointer to initialize. */ LSUP_rc LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o); /** @brief Free the internal pointers of a triple. * * @param spo[in] Triple to be freed. */ void LSUP_triple_done (LSUP_Triple *spo); /** @brief Free a triple and all its internal pointers. * * NOTE: If the term pointers are not to be freed (e.g. they are owned by a * back end), use a simple free(spo) instead of this. * * @param spo[in] Triple to be freed. */ void LSUP_triple_free (LSUP_Triple *spo); /** @brief Get triple by term position. * * Useful for looping over all terms. * * @param trp[in] Triple pointer. * * @param n[in] A number between 0÷2. * * @return Corresponding triple term or NULL if n is out of range. */ inline LSUP_Term * LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n) { if (n == TRP_POS_S) return trp->s; if (n == TRP_POS_P) return trp->p; if (n == TRP_POS_O) return trp->o; return NULL; } /** @brief Hash a triple. * * TODO This doesn't handle blank nodes correctly. */ inline LSUP_Key LSUP_triple_hash (const LSUP_Triple *trp) { LSUP_BufferTriple *strp = LSUP_triple_serialize (trp); LSUP_Key hash = LSUP_btriple_hash (strp); LSUP_btriple_free (strp); return hash; } /** @brief Create a new term set. * * @return New empty term set. */ LSUP_TermSet * LSUP_term_set_new (void); /** @brief Free a term set. * * @param[in] ts Term set handle. */ void LSUP_term_set_free (LSUP_TermSet *ts); /** @brief Add term to a term set. * * If the same term is already in the set, it is not replaced, and the existing * term's handle is made available in the `existing` variable. In this case, * the caller may want to free the passed term which has not been added. * * @param[in] tl Term set to be added to. * * @param[in] term Term to be added to the list. The term set will take * ownership of the term and free it when it's freed with * #LSUP_term_set_free()—only if the return code is LSUP_OK. * * @param[out] existing If not NULL, and if the term being added is a * duplicate, this variable will be populated with the existing term handle. * * @return LSUP_OK on success; LSUP_NOACTION if the term is duplicate; * LSUP_MEM_ERR on memory error. Note: if not LSUP_OK, the caller is in charge * of freeing the `term` handle. */ LSUP_rc LSUP_term_set_add (LSUP_TermSet *ts, LSUP_Term *term, LSUP_Term **existing); /** @brief Get a term from a term set. * * @param[in] ts Term set handle. * * @param[in] key Key for the queried term. * * @return The retrieved term if found, or NULL. The term must not be * modified or freed. */ const LSUP_Term * LSUP_term_set_get (LSUP_TermSet *ts, LSUP_Key key); /** @brief Iterate trough a term set. * * @param[in] ts Term set handle. * * @param[in,out] i Iterator to be initially set to 0. * * @param[out] term Pointer to be populated with the next term on success. It * may be NULL. * * @return LSUP_OK if the next term was retrieved; LSUP_END if the end of the * set has been reached. */ LSUP_rc LSUP_term_set_next (LSUP_TermSet *ts, size_t *i, LSUP_Term **term); /** @brief New link map. * * The initial state of the returned list is: `{t: [NULL], tl: [NULL]}` * * Predicates and term lists can be added with #LSUP_link_map_add, and terms * can be added to a term list with #LSUP_term_list_add. * * @return a new empty predicate-object list. */ LSUP_LinkMap * LSUP_link_map_new (LSUP_LinkType type); /** @brief Free a link map. * * All arrays and term handles are recursively freed. * * @param[in] pol link map handle obtained with #LSUP_link_map_new(). */ void LSUP_link_map_free (LSUP_LinkMap *pol); /// Return the link map type. LSUP_LinkType LSUP_link_map_type (const LSUP_LinkMap *map); /** @brief Add a term - term set pair to a link map. * * If there is already a term set for the given term, items from the added term * are added to the existing term set (if not duplicated). Otherwise, the term * set handle is linked to the new term. * * In any case, the caller should not directly use the term and term set after * passing them to this function. * * @param[in] cm Link map handle obtained with #LSUP_link_map_new(). * * @param[in] t Term to be associated with the given object list. The * link map structure takes ownership of the term. * * @param[in] ts term set to be associated with the given term. The link * list structire takes ownership of the term set and the terms in it. * * @return LSUP_OK on success; LSUP_MEM_ERR on allocation error. */ LSUP_rc LSUP_link_map_add ( LSUP_LinkMap *cmap, LSUP_Term *term, LSUP_TermSet *tset); /** @brief Create a new iterator to loop through a link map. * * @param[in] lmap Map handle to iterate. * * @param[in] ext External term to look for connections. */ LSUP_LinkMapIterator * LSUP_link_map_iter_new (const LSUP_LinkMap *lmap, LSUP_Term *ext); /// Free a link map iterator. void LSUP_link_map_iter_free (LSUP_LinkMapIterator *it); /** @brief Iterate through a link map. * * Each call to this function yields a linked term and the related term set. * * @param[in] it Link map iterator obtained with #LSUP_link_map_iter_new(). * * @param[out] lt Linked term returned. * * @param[out] ts Term set returned. * * @return LSUP_OK if a result was yielded; LSUP_END if the end of the link map * has been reached. */ LSUP_rc LSUP_link_map_next ( LSUP_LinkMapIterator *it, LSUP_Term **lt, LSUP_TermSet **ts); /**@brief Iterate over a link map and generate triples. * * Calling this function repeatedly builds triples for all the linked terms and * term sets in the map, based on a given related term. * * @param[in] it Link map iterator handle, obtained with * #LSUP_link_map_iter_new(). * * @param[in] term Term to relate to the link map. * * @param[in|out] spo Result triple. The triple handle must be pre-allocated * (it may be TRP_DUMMY) and calls to this function will be set its memebers * to term handles owned by the link map. If rc != LSUP_OK, the contents are * undefined. * * @return LSUP_OK if a new triple was yielded; LSUP_END if the end of the loop * has been reached; <0 on error. */ LSUP_rc LSUP_link_map_triples ( LSUP_LinkMapIterator *it, LSUP_Triple *spo); #endif