瀏覽代碼

WIP Remove ID cache.

Stefano Cossu 3 年之前
父節點
當前提交
f70ace7b27
共有 26 個文件被更改,包括 636 次插入463 次删除
  1. 0 1
      cpython/py_triple.h
  2. 30 30
      docs/dev/deps.dot
  3. 二進制
      docs/dev/deps.pdf
  4. 28 0
      include/buffer.h
  5. 79 4
      include/core.h
  6. 5 17
      include/graph.h
  7. 14 1
      include/store_htable.h
  8. 18 10
      include/store_mdb.h
  9. 145 57
      include/term.h
  10. 0 115
      include/triple.h
  11. 1 0
      src/buffer.c
  12. 3 12
      src/codec/nt_grammar.y
  13. 3 3
      src/codec_nt.c
  14. 11 6
      src/core.c
  15. 9 10
      src/environment.c
  16. 60 49
      src/graph.c
  17. 20 1
      src/store_htable.c
  18. 38 15
      src/store_mdb.c
  19. 151 23
      src/term.c
  20. 0 88
      src/triple.c
  21. 1 1
      test/assets/triples.h
  22. 1 1
      test/test_codec_nt.c
  23. 4 4
      test/test_graph.c
  24. 1 1
      test/test_store_ht.c
  25. 5 5
      test/test_store_mdb.c
  26. 9 9
      test/test_term.c

+ 0 - 1
cpython/py_triple.h

@@ -6,7 +6,6 @@
 #include <Python.h>
 #include <structmember.h>
 
-#include "triple.h"
 #include "py_term.h"
 
 

+ 30 - 30
docs/dev/deps.dot

@@ -5,41 +5,41 @@ digraph "source tree" {
     fontsize="16";
     fontname="Helvetica";
 	clusterrank="local";
-	"codec_nt" -> "codec_base"
-	"buffer" -> "core"
-	"py_triple" -> "triple"
-	"codec_nt" -> "nt_parser"
-	"graph" -> "store_mdb"
+	"environment" -> "store_mdb"
+	"buffer" -> "xxhash"
+	"graph" -> "store_htable"
+	"profile" -> "lsup_rdf"
+	"graph" -> "environment"
 	"py_graph" -> "graph"
-	"py_graph" -> "codec_nt"
-	"triple" -> "term"
 	"py_lsup_rdf" -> "py_namespace"
-	"nt_parser" -> "graph"
-	"py_graph" -> "py_triple"
+	"namespace" -> "core"
+	"graph" -> "term"
+	"store_htable" -> "buffer"
+	"codec_nt" -> "codec_base"
+	"codec_base" -> "graph"
+	"term" -> "buffer"
 	"term" -> "uthash"
-	"lsup_rdf" -> "codec_nt"
+	"py_namespace" -> "namespace"
+	"py_graph" -> "codec_nt"
+	"namespace" -> "uthash"
+	"store_htable" -> "uthash"
+	"py_lsup_rdf" -> "py_graph"
 	"term" -> "tpl"
-	"store_htable" -> "triple"
-	"term" -> "namespace"
-	"buffer" -> "xxhash"
+	"py_term" -> "term"
+	"lsup_rdf" -> "codec_nt"
+	"nt_parser" -> "graph"
+	"core" -> "log"
+	"graph" -> "store_mdb"
+	"py_graph" -> "py_triple"
+	"buffer" -> "core"
 	"store_mdb" -> "lmdb"
-	"profile" -> "lsup_rdf"
-	"store_mdb" -> "triple"
-	"py_lsup_rdf" -> "py_graph"
+	"store_mdb" -> "buffer"
+	"term" -> "namespace"
+	"codec_nt" -> "nt_parser"
+	"nt_grammar" -> "graph"
+	"store_mdb" -> "bootstrap"
+	"py_triple" -> "py_term"
+	"store_mdb" -> "store"
 	"core" -> "lmdb"
 	"nt_parser" -> "nt_grammar"
-	"store_mdb" -> "store"
-	"environment" -> "store_mdb"
-	"graph" -> "store_htable"
-	"py_triple" -> "py_term"
-	"codec_base" -> "graph"
-	"py_term" -> "term"
-	"namespace" -> "core"
-	"graph" -> "environment"
-	"namespace" -> "uthash"
-	"py_namespace" -> "namespace"
-	"term" -> "buffer"
-	"nt_grammar" -> "graph"
-	"core" -> "log"
-	"store_htable" -> "uthash"
 }

二進制
docs/dev/deps.pdf


+ 28 - 0
include/buffer.h

@@ -44,6 +44,13 @@ typedef struct buffer_triple_t {
 } LSUP_BufferTriple;
 
 
+typedef enum {
+    TRP_POS_S = 0,
+    TRP_POS_P = 1,
+    TRP_POS_O = 2,
+} LSUP_TriplePos;
+
+
 /** Initialize or reuse a buffer handle.
  *
  * The handle must have been created with #LSUP_buffer_new*().
@@ -209,6 +216,27 @@ void
 LSUP_btriple_free_shallow (LSUP_BufferTriple *sspo);
 
 
+/** @brief Get serialized triple by term position.
+ *
+ * Useful for looping over all terms.
+ *
+ * @param trp[in] Serialized triple pointer.
+ *
+ * @param n[in] A number between 0÷2.
+ *
+ * @return Corresponding serialized term or NULL if n is out of range.
+ */
+inline LSUP_Buffer *
+LSUP_btriple_pos (const LSUP_BufferTriple *btrp, LSUP_TriplePos n)
+{
+    if (n == TRP_POS_S) return btrp->s;
+    if (n == TRP_POS_P) return btrp->p;
+    if (n == TRP_POS_O) return btrp->o;
+
+    return NULL;
+}
+
+
 /** @brief Hash a buffer triple.
  *
  * TODO This doesn't handle blank nodes correctly.

+ 79 - 4
include/core.h

@@ -43,6 +43,10 @@
 
 # define UUIDSTR_SIZE 37
 
+// "NULL" triple, a value that is never user-provided. Used to fill deleted
+// triples in a keyset.
+#define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
+
 
 /* * * RETURN CODES * * */
 
@@ -52,30 +56,101 @@
  */
 typedef int LSUP_rc;
 
+/** @brief Generic success return code.
+ */
 #define LSUP_OK             0
 
+/** @brief No action taken.
+ *
+ * An attempt to create or update a resource was made, but the resource already
+ * existed substantially in the same form, so no action took place. The caller
+ * is expected to find the resource as though the action actually took place.
+ * If this is returned from the iteration of multiple updates, it means that
+ * none of the iterations produced a change in state.
+ */
 #define LSUP_NOACTION       88801
+
+/** @brief No result yielded.
+ *
+ * A read operation returned no results. If an iterator is expected to be
+ * created, it may be created empty.
+ */
 #define LSUP_NORESULT       88802
+
+/** @brief Loop end.
+ *
+ * End of a loop was reached. This can be used in a while() or for()
+ * loop as a terminating condition.
+ */
 #define LSUP_END            88803
+
+/** @brief Conflict warning.
+ * An attempt to create or update a resource was made, but the resource existed
+ * with a different form or value. The caller should find the value of the
+ * existing resource to be different than the one that was attempted to store.
+ * If this is returned from the iteration of multiple updates, it means that
+ * other resources in the loop may have changed state and the operation as a
+ * whole completed successfully.
+ */
 #define LSUP_CONFLICT       88804
-// NOTE When adding new warning codes, use a value larger than the last one
-// in the list. Also change LSUP_MAX_WARNING.
+
+/*
+ * NOTE When adding new warning codes, use a value larger than the last one
+ * in the list. Also change LSUP_MAX_WARNING.
+ */
 
 #define LSUP_MIN_WARNING    LSUP_NOACTION
 #define LSUP_MAX_WARNING    LSUP_CONFLICT
 
+/** @brief Generic error return code.
+ */
 #define LSUP_ERROR          -88899
+
+/** @brief TODO
+ */
 #define LSUP_PARSE_ERR      -88898
+
+/** @brief TODO
+ */
 #define LSUP_VALUE_ERR      -88897
+
+/** @brief TODO
+ */
 #define LSUP_TXN_ERR        -88896
+
+/** @brief TODO
+ */
 #define LSUP_DB_ERR         -88895
+
+/** @brief TODO
+ */
 #define LSUP_NOT_IMPL_ERR   -88894
+
+/** @brief TODO
+ */
 #define LSUP_IO_ERR         -88893
+
+/** @brief TODO
+ */
 #define LSUP_MEM_ERR        -88892
+
+/** @brief Conflict error.
+ *
+ * A critical resource conflict happened and no resources were updated. If this
+ * is returned from the iteration of multiple updates, it means that the
+ * operation has been interrupted and any state change within the loop prior to
+ * the error has been rolled back.
+ */
 #define LSUP_CONFLICT_ERR   -88891
+
+/** @brief TODO
+ */
 #define LSUP_ENV_ERR        -88890
-// NOTE When adding new error codes, use a value larger than the last one
-// in the list. Also change LSUP_MAX_ERROR.
+
+/*
+ * NOTE When adding new error codes, use a value larger than the last one
+ * in the list. Also change LSUP_MAX_ERROR.
+ */
 
 #define LSUP_MIN_ERROR      LSUP_ERROR
 #define LSUP_MAX_ERROR      LSUP_ENV_ERR

+ 5 - 17
include/graph.h

@@ -2,6 +2,7 @@
 #define _LSUP_GRAPH_H
 
 #include "environment.h"
+#include "term.h"
 
 /*
  * Define backend types and checks.
@@ -12,6 +13,7 @@
     ENTRY(  MDB,       1)/* LMDB back end on persistent disk. */ \
     ENTRY(  MDB_TMP,   2)/* LMDB back end on RAM disk. */        \
 
+
 typedef enum LSUP_store_type {
 #define ENTRY(a, b) LSUP_STORE_##a = b,
     BACKEND_TBL
@@ -249,8 +251,7 @@ LSUP_graph_add_init (LSUP_Graph *gr);
  * @param[in] sspo Serialized triple to add.
  */
 LSUP_rc
-LSUP_graph_add_iter (
-        LSUP_GraphIterator *it, const LSUP_BufferTriple *sspo);
+LSUP_graph_add_iter (LSUP_GraphIterator *it, const LSUP_Triple *spo);
 
 
 /** @brief Finalize an add iteration loop and free the iterator.
@@ -263,10 +264,7 @@ void
 LSUP_graph_add_done (LSUP_GraphIterator *it);
 
 
-/** @brief Add triples and/or serialized triples to a graph.
- *
- * For API users it may be more convenient to use the more specialized
- * #LSUP_graph_add_trp.
+/** @brief Add triples to a graph.
  *
  * @param[in] gr Graph to add triples to.
  *
@@ -278,17 +276,7 @@ LSUP_graph_add_done (LSUP_GraphIterator *it);
  *  inserted.
  */
 LSUP_rc
-LSUP_graph_add (
-        LSUP_Graph *gr, const LSUP_Triple trp[],
-        const LSUP_BufferTriple strp[], size_t *inserted);
-
-
-/** @brief Insert RDF triples into a graph.
- *
- * This is a convenience method for external callers which most likely have
- * non-serialized triples at hand.
- */
-#define LSUP_graph_add_trp(gr, trp, ins) LSUP_graph_add (gr, trp, NULL, ins)
+LSUP_graph_add (LSUP_Graph *gr, const LSUP_Triple trp[], size_t *inserted);
 
 
 /** @brief Delete triples by a matching pattern.

+ 14 - 1
include/store_htable.h

@@ -20,7 +20,7 @@
 #ifndef _LSUP_STORE_HTABLE_H
 #define _LSUP_STORE_HTABLE_H
 
-#include "triple.h"
+#include "buffer.h"
 
 
 typedef struct ht_store_t LSUP_HTStore;
@@ -69,6 +69,19 @@ LSUP_rc
 LSUP_htstore_copy_contents (LSUP_HTStore *dest, const LSUP_HTStore *src);
 
 
+/** Add a term to the index.
+ *
+ * @parm[in] store HTStore handle.
+ *
+ * @param[in] sterm Serialized term to insert.
+ *
+ * @return LSUP_OK on success; LSUP_NOACTION if the term exists already; <0
+ *  on error.
+ */
+LSUP_rc
+LSUP_htstore_add_term (LSUP_HTStore *store, const LSUP_Buffer *sterm);
+
+
 LSUP_HTIterator *
 LSUP_htstore_add_init (LSUP_HTStore *store);
 

+ 18 - 10
include/store_mdb.h

@@ -23,7 +23,9 @@
 #define _LSUP_STORE_MDB_H
 
 #include "lmdb.h"
-#include "triple.h"
+
+#include "buffer.h"
+#include "namespace.h"
 
 
 // FIXME find a better cross-platform path.
@@ -365,27 +367,33 @@ LSUP_mdbstore_nsm_get (LSUP_MDBStore *store, LSUP_NSMap **nsm);
  * @param[out] nsm Namespace map handle to store.
  *
  * @return LSUP_OK if all terms were updated; LSUP_CONFLICT if one or more
- *  namespaces or terms were not updated because they already existed.
+ *  namespaces or terms were not updated because they already existed; <0 if
+ *  an error occurred.
  */
 LSUP_rc
 LSUP_mdbstore_nsm_store (LSUP_MDBStore *store, const LSUP_NSMap *nsm);
 
 
-/** @brief Populate the ID cache with data types and lang tags from store.
+/** @brief Add a single term to the store.
+ *
+ * @param[in] store MDB store handle.
  *
- * @param[in] store The store to get data from.
+ * @param[in] sterm Serialized term to store.
  */
 LSUP_rc
-LSUP_mdbstore_idcache_get (LSUP_MDBStore *store);
+LSUP_mdbstore_add_term (LSUP_MDBStore *store, const LSUP_Buffer *sterm);
 
 
-/** @brief Store an ID into a MDB store.
+/** @brief Whether a term key exists.
+ *
+ * @param[in] store MDB store to search in.
  *
- * @param[in] store MDB store.
+ * @param[in] key Key to look up.
  *
- * @param[in] id ID to store.
+ * @raturn 1 if the term exists, 0 if it does not exist; <0 on error.
  */
-LSUP_rc
-LSUP_mdbstore_idcache_store (LSUP_MDBStore *store, const char *id);
+int
+LSUP_mdbstore_tkey_exists (LSUP_MDBStore *store, LSUP_Key tkey);
+
 
 #endif

+ 145 - 57
include/term.h

@@ -9,10 +9,6 @@
 #include "buffer.h"
 #include "namespace.h"
 
-// "NULL" triple, a value that is never user-provided. Used to fill deleted
-// triples in a keyset.
-#define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
-
 #define UUID4_URN_SIZE UUIDSTR_SIZE + 10
 
 /*
@@ -31,18 +27,6 @@
 /* Blank node. */
 #define LSUP_TERM_BNODE         5
 
-/*
- * In-term identifier types.
- */
-/* Namespace prefix string. */
-#define LSUP_ID_NS              10
-/* Data type IRI. */
-#define LSUP_ID_DATATYPE        11
-/* Language tag string. */
-#define LSUP_ID_LANG            12
-/* Temporary blank node ID. TODO implement. */
-#define LSUP_ID_BNODE           13
-
 /** @brief Default data type for untyped literals (prefixed IRI).
  */
 #define DEFAULT_DTYPE           "http://www.w3.org/2001/XMLSchema#string"
@@ -59,39 +43,46 @@
 
 typedef XXH64_hash_t LSUP_Hash64;
 typedef char LSUP_TermType;
+typedef char LSUP_LangTag[8];
 
 
 typedef struct term_t {
     char *              data;       // URI, literal value, or BNode label.
     union {
-        uint32_t        datatype;   // Data type hash for LSUP_TERM_LITERAL.
-        uint32_t        lang;       // Lang tag hash for LSUP_TERM_LT_LITERAL.
-        uint32_t        bnode_id;   // Blank node ID. TODO implement.
+        LSUP_Key        datatype;   // Data type key for LSUP_TERM_LITERAL.
+        LSUP_LangTag    lang;       // Lang tag for LSUP_TERM_LT_LITERAL.
+        LSUP_Key        bnode_id;   // BNode ID for comparison & skolemization.
         LSUP_NSMap *    nsm;        // NSM handle for prefixed IRI.
     };
     LSUP_TermType      type;       // Term type.
 } LSUP_Term;
 
 
-/** @brief Hash cache for lang tags and data types.
+/** @brief Hash cache for data types.
  */
-typedef struct id_cache_t {
-    uint32_t            key;
-    char *              data;
+struct term_cache_t {
+    LSUP_Key            key;
+    LSUP_Term *         term;
     UT_hash_handle      hh;
-} IDCache;
+};
+
+
+typedef struct triple_t {
+    LSUP_Term *s;
+    LSUP_Term *p;
+    LSUP_Term *o;
+} LSUP_Triple;
 
 
 /*
  * Extern variables.
  */
 
-/** @brief Global ID cache.
+/** @brief Global term cache.
  *
- * Map of internal term identifiers, such as literal data types, language tags
- * and BNode identifiers.
+ * Stores frequently used terms, e.g. data type URIs.
  */
-extern IDCache *LSUP_id_cache;
+extern struct term_cache_t *LSUP_term_cache;
 
 /** @brief Compiled hash of default literal data type.
  */
@@ -101,6 +92,12 @@ extern uint32_t LSUP_default_dtype_key;
  */
 extern regex_t *LSUP_uri_ptn;
 
+/** @brief Default literal data type URI.
+ *
+ * Literal terms created with undefined data type will have it set to this
+ * URI implicitly.
+ */
+extern LSUP_Term *LSUP_default_datatype;
 
 
 /*
@@ -114,16 +111,14 @@ extern regex_t *LSUP_uri_ptn;
  * @param data[in] Term data: textual URI, literal value without data type
  *  or langtag, etc.
  *
- * @param metadata[in]: language tag for language-tagged literals or data type
- *  for other literals.
+ * @param metadata[in]: language tag (LSUP_LangTag) for language-tagged
+ * literals; or data type (LSUP_Term *) for other literals. It may be NULL.
  *
- * @param term[out] Pointer to a new term, which must be freed with
- *  #LSUP_term_free after use.
- *
- * @return LSUP_OK if successful, LSUP_VALUE_ERR if validation fails.
+ * @return New term, which must be freed with #LSUP_term_free after use; or
+ *  NULL on error.
  */
 LSUP_Term *
-LSUP_term_new (LSUP_TermType type, const char *data, const char *metadata);
+LSUP_term_new (LSUP_TermType type, const char *data, void *metadata);
 
 
 /** @brief Placeholder term to use with LSUP_term_reset.
@@ -170,15 +165,28 @@ LSUP_uri_new (const char *data)
 LSUP_rc
 LSUP_term_init(
         LSUP_Term *term, LSUP_TermType type,
-        const char *data, const char *metadata);
+        const char *data, void *metadata);
 
 
+/** @brief Deserialize a buffer into a term.
+ *
+ * @param[in] sterm Buffer to convert into a term. It must be a valid
+ *  serialized term from store or obtained with #LSUP_term_serialize().
+ *
+ * @return New term handle. It must be freed with #LSUP_term_free().
+ */
 LSUP_Term *
 LSUP_term_new_from_buffer (const LSUP_Buffer *sterm);
 
 
+/** @brief Serialize a term into a buffer.
+ *
+ * @param[in] sterm Term to convert into a buffer.
+ *
+ * @return New buffer handle. It must be freed with #LSUP_buffer_free().
+ */
 LSUP_Buffer *
-LSUP_buffer_new_from_term (const LSUP_Term *term);
+LSUP_term_serialize (const LSUP_Term *term);
 
 
 /**
@@ -190,20 +198,8 @@ LSUP_uri_init (LSUP_Term *term, const char *data);
 
 /** @brief Hash a buffer.
  */
-inline LSUP_Key
-LSUP_term_hash (const LSUP_Term *term)
-{
-    LSUP_Buffer *buf;
-
-    if (UNLIKELY (!term)) buf = BUF_DUMMY;
-    else buf = LSUP_buffer_new_from_term (term);
-
-    LSUP_Key key = LSUP_buffer_hash (buf);
-
-    LSUP_buffer_free (buf);
-
-    return key;
-}
+LSUP_Key
+LSUP_term_hash (const LSUP_Term *term);
 
 
 /**
@@ -218,15 +214,107 @@ void
 LSUP_term_free (LSUP_Term *term);
 
 
-/** @brief Add an identifier to the term cache.
+/** @brief Create a new triple from three terms.
+ *
+ * TODO Term types are not validated at the moment.
+ *
+ * @param[in] s Triple subject. It must be an IRIRef or BNode.
+ *
+ * @param[in] p Triple predicate. It must be an IRIRef.
+ *
+ * @param[in] o Triple object.
+ *
+ */
+LSUP_Triple *
+LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
+
+
+/** @brief Dummy triple with NULL slots. It is not a valid triple.
+ */
+#define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL)
+
+
+LSUP_Triple *
+LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo);
+
+
+LSUP_BufferTriple *
+LSUP_triple_serialize (const LSUP_Triple *spo);
+
+
+/** @brief Initialize internal term pointers in a heap-allocated triple.
  *
- * @param[in] key 32-bit hash of the inserted term.
+ * NOTE: the term structures are not copied. If the triple is freed with
+ * #LSUP_triple_free(), the originally provided terms are freed too.
  *
- * @param[in] data Term to insert.
+ * @param spo[in] Triple pointer to initialize.
  */
 LSUP_rc
-LSUP_tcache_add_id (const uint32_t key, const char *data);
+LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
+
+
+/** @brief Free the internal pointers of a triple.
+ *
+ * @param spo[in] Triple to be freed.
+ */
+void
+LSUP_triple_done (LSUP_Triple *spo);
+
 
+/** @brief Free a triple and all its internal pointers.
+ *
+ * NOTE: If the term pointers are not to be freed (e.g. they are owned by a
+ * back end), use a simple free(spo) instead of this.
+ *
+ * @param spo[in] Triple to be freed.
+ */
+void
+LSUP_triple_free (LSUP_Triple *spo);
+
+
+/** @brief Get triple by term position.
+ *
+ * Useful for looping over all terms.
+ *
+ * @param trp[in] Triple pointer.
+ *
+ * @param n[in] A number between 0÷2.
+ *
+ * @return Corresponding triple term or NULL if n is out of range.
+ */
+inline LSUP_Term *
+LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n)
+{
+    if (n == TRP_POS_S) return trp->s;
+    if (n == TRP_POS_P) return trp->p;
+    if (n == TRP_POS_O) return trp->o;
+    return NULL;
+}
+
+
+/** @brief Hash a triple.
+ *
+ * TODO This doesn't handle blank nodes correctly.
+ */
+inline LSUP_Key
+LSUP_triple_hash (const LSUP_Triple *trp)
+{
+    LSUP_BufferTriple *strp = LSUP_triple_serialize (trp);
+    LSUP_Key hash = LSUP_btriple_hash (strp);
+    LSUP_btriple_free (strp);
+
+    return hash;
+}
+
+
+/** @brief Add an identifier to the term cache.
+ *
+ * @param[in] key Hash of the inserted term.
+ *
+ * @param[in] term Term to insert.
+ */
+LSUP_rc
+LSUP_tcache_add (const LSUP_Key key, LSUP_Term *term);
 
 /** @brief Get an identifier from the cache.
  *
@@ -235,7 +323,7 @@ LSUP_tcache_add_id (const uint32_t key, const char *data);
  * @return The retrieved term if found, or NULL. The string must not be
  *  modified or freed.
  */
-const char *
-LSUP_tcache_get_id (const uint32_t key);
+const LSUP_Term *
+LSUP_tcache_get (const LSUP_Key key);
 
 #endif

+ 0 - 115
include/triple.h

@@ -1,115 +0,0 @@
-#ifndef _LSUP_TRIPLE_H
-#define _LSUP_TRIPLE_H
-
-#include "term.h"
-
-typedef struct LSUP_Triple {
-    LSUP_Term *s;
-    LSUP_Term *p;
-    LSUP_Term *o;
-} LSUP_Triple;
-
-typedef enum {
-    TRP_POS_S = 0,
-    TRP_POS_P = 1,
-    TRP_POS_O = 2,
-} LSUP_TriplePos;
-
-
-LSUP_Triple *
-LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
-
-#define TRP_DUMMY LSUP_triple_new (NULL, NULL, NULL)
-
-
-LSUP_Triple *
-LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo);
-
-
-LSUP_BufferTriple *
-LSUP_btriple_new_from_triple (const LSUP_Triple *spo);
-
-
-/** @brief Initialize internal term pointers in a heap-allocated triple.
- *
- * NOTE: the term structures are not copied. If the triple is freed with
- * #LSUP_triple_free(), the originally provided terms are freed too.
- *
- * @param spo[in] Triple pointer to initialize.
- */
-LSUP_rc
-LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o);
-
-
-/** @brief Free the internal pointers of a triple.
- *
- * @param spo[in] Triple to be freed.
- */
-void
-LSUP_triple_done (LSUP_Triple *spo);
-
-
-/** @brief Free a triple and all its internal pointers.
- *
- * NOTE: If the term pointers are not to be freed (e.g. they are owned by a
- * back end), use a simple free(spo) instead of this.
- *
- * @param spo[in] Triple to be freed.
- */
-void
-LSUP_triple_free (LSUP_Triple *spo);
-
-
-#define _FN_BODY \
-    if (n == TRP_POS_S) return trp->s; \
-    if (n == TRP_POS_P) return trp->p; \
-    if (n == TRP_POS_O) return trp->o; \
-    return NULL;
-
-
-/** @brief Get triple by term position.
- *
- * Useful for looping over all terms.
- *
- * @param trp[in] Triple pointer.
- *
- * @param n[in] A number between 0÷2.
- *
- * @return Corresponding triple term or NULL if n is out of range.
- */
-inline LSUP_Term *
-LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n)
-{ _FN_BODY }
-
-
-/** @brief Get serialized triple by term position.
- *
- * Useful for looping over all terms.
- *
- * @param trp[in] Serialized triple pointer.
- *
- * @param n[in] A number between 0÷2.
- *
- * @return Corresponding serialized term or NULL if n is out of range.
- */
-inline LSUP_Buffer *
-LSUP_btriple_pos (const LSUP_BufferTriple *trp, LSUP_TriplePos n)
-{ _FN_BODY }
-#undef _FN_BODY
-
-
-/** @brief Hash a triple.
- *
- * TODO This doesn't handle blank nodes correctly.
- */
-inline LSUP_Key
-LSUP_triple_hash (const LSUP_Triple *trp)
-{
-    LSUP_BufferTriple *strp = LSUP_btriple_new_from_triple (trp);
-    LSUP_Key hash = LSUP_btriple_hash (strp);
-    LSUP_btriple_free (strp);
-
-    return hash;
-}
-
-#endif

+ 1 - 0
src/buffer.c

@@ -156,3 +156,4 @@ LSUP_btriple_free_shallow (LSUP_BufferTriple *sspo)
 
 LSUP_Key LSUP_buffer_hash (const LSUP_Buffer *buf);
 LSUP_Key LSUP_btriple_hash (const LSUP_BufferTriple *strp);
+LSUP_Buffer *LSUP_btriple_pos (const LSUP_BufferTriple *trp, LSUP_TriplePos n);

+ 3 - 12
src/codec/nt_grammar.y

@@ -15,8 +15,8 @@
 %token_type { LSUP_Term * }
 %token_prefix "T_"
 
-%type triple            { LSUP_BufferTriple * }
-%destructor triple      { LSUP_btriple_free ($$); }
+%type triple            { LSUP_Triple * }
+%destructor triple      { LSUP_triple_free ($$); }
 %type subject           { LSUP_Term * }
 %destructor subject     { LSUP_term_free ($$); }
 %type predicate         { LSUP_Term * }
@@ -38,17 +38,8 @@ triples     ::= triples triple eol.
 
 triple(A)   ::= ws subject(S) ws predicate(P) ws object(O) ws DOT. {
 
-                A = LSUP_btriple_new (
-                    LSUP_buffer_new_from_term (S),
-                    LSUP_buffer_new_from_term (P),
-                    LSUP_buffer_new_from_term (O)
-                );
-
+                A = LSUP_triple_new (S, P, O);
                 LSUP_graph_add_iter (it, A);
-
-                LSUP_term_free (S);
-                LSUP_term_free (P);
-                LSUP_term_free (O);
             }
 
 subject     ::= IRIREF.

+ 3 - 3
src/codec_nt.c

@@ -52,7 +52,7 @@ term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
                 term->datatype != 0
                 && term->datatype != LSUP_default_dtype_key
             ) {
-                metadata = LSUP_tcache_get_id (term->datatype);
+                metadata = LSUP_tcache_get (term->datatype)->data;
                 buf_len += strlen (metadata) + 4; // Room for ^^<>
             }
 
@@ -78,8 +78,8 @@ term_to_nt (const LSUP_Term *term, const LSUP_NSMap *nsm, char **out_p)
             buf_len = strlen (escaped) + 3; // Room for "" and terminator
 
             if (term->lang != 0) {
-                metadata = LSUP_tcache_get_id (term->lang);
-                buf_len += strlen(metadata) + 1; // Room for @
+                metadata = term->lang;
+                buf_len += strlen (metadata) + 1; // Room for @
             }
 
             tmp = realloc (out, buf_len);

+ 11 - 6
src/core.c

@@ -4,20 +4,25 @@
 #include "lmdb.h"
 
 
-/*
+/** @brief Warning messages.
+ *
  * The message corresponding to the rc is found by
- * warning_msg[rc - LSUP_MIN_WARNING].
+ * warning_msg[rc - LSUP_MIN_WARNING]. #LSUP_strerror() facilitates this.
  */
 char *warning_msg[] = {
     "LSUP_NOACTION: No action or change of state occurred.",
     "LSUP_NORESULT: No result.",
     "LSUP_END: End of the loop reached.",
-    "LSUP_CONFLICT: A resource conflict prevented an action from completing."
+    "LSUP_CONFLICT: A conflict prevented a resource from being updated.",
 };
 
-/*
+/** @brief error messages.
+ *
+ * Note that all error values are < 0 so it is possible to set conditions to
+ * be triggered only by error return values.
+ *
  * The message corresponding to the rc is found by
- * err_msg[rc - LSUP_MIN_ERROR].
+ * err_msg[rc - LSUP_MIN_ERROR]. #LSUP_strerror() facilitates this.
  */
 char *err_msg[] = {
     "LSUP_ERROR: Runtime error.",
@@ -28,7 +33,7 @@ char *err_msg[] = {
     "LSUP_NOT_IMPL_ERR: Feature is not implemented.",
     "LSUP_IO_ERR: Input/Output error.",
     "LSUP_MEM_ERR: Memory error.",
-    "LSUP_CONFLICT_ERR: A resource conflict resulted in an invalid state.",
+    "LSUP_CONFLICT_ERR: A resource conflict interrupted the operation.",
     "LSUP_ENV_ERR: Invalid environment. Did you call LSUP_init()?",
 };
 

+ 9 - 10
src/environment.c

@@ -1,5 +1,6 @@
 #include <unistd.h>
 
+#include "term.h"
 #include "environment.h"
 
 
@@ -24,7 +25,7 @@ LSUP_env_new (
 
     // Default store context.
     LSUP_Term *default_ctx_uri = LSUP_uri_new (default_ctx);
-    env->default_ctx = LSUP_buffer_new_from_term (default_ctx_uri);
+    env->default_ctx = LSUP_term_serialize (default_ctx_uri);
     LSUP_term_free (default_ctx_uri);
     log_info ("Set up default context.");
 
@@ -37,9 +38,6 @@ LSUP_env_new (
     // Get default namespace from store.
     RCNL (LSUP_mdbstore_nsm_get (env->mdb_store, &env->nsm));
 
-    // Load data types, lang tags from mdb into memory cache.
-    LSUP_mdbstore_idcache_get (env->mdb_store);
-
     return env;
 }
 
@@ -67,8 +65,9 @@ LSUP_init (void)
         }
 
         // Default literal datatype key.
-        LSUP_default_dtype_key  = XXH32 (
-                DEFAULT_DTYPE, strlen (DEFAULT_DTYPE) + 1, HASH_SEED);
+        LSUP_default_datatype = LSUP_uri_new (DEFAULT_DTYPE);
+        LSUP_default_dtype_key  = LSUP_term_hash (LSUP_default_datatype);
+        LSUP_tcache_add (LSUP_default_dtype_key, LSUP_default_datatype);
 
         // Default permanent store path.
         char *mdb_path = getenv ("LSUP_MDB_STORE_PATH");
@@ -117,10 +116,10 @@ LSUP_env_free (LSUP_Env *env)
     LSUP_nsmap_free (env->nsm);
 
     // Free ID cache.
-    IDCache *entry, *tmp;
-    HASH_ITER (hh, LSUP_id_cache, entry, tmp) {
-        HASH_DEL (LSUP_id_cache, entry);
-        free (entry->data);
+    struct term_cache_t *entry, *tmp;
+    HASH_ITER (hh, LSUP_term_cache, entry, tmp) {
+        HASH_DEL (LSUP_term_cache, entry);
+        LSUP_term_free (entry->term);
         free (entry);
     }
 

+ 60 - 49
src/graph.c

@@ -96,9 +96,9 @@ LSUP_graph_new_lookup_env (
         log_error ("No valid environment passed. Did you call LSUP_init()?");
         return NULL;
     }
-    LSUP_Buffer *ss = LSUP_buffer_new_from_term (s);
-    LSUP_Buffer *sp = LSUP_buffer_new_from_term (p);
-    LSUP_Buffer *so = LSUP_buffer_new_from_term (o);
+    LSUP_Buffer *ss = LSUP_term_serialize (s);
+    LSUP_Buffer *sp = LSUP_term_serialize (p);
+    LSUP_Buffer *so = LSUP_term_serialize (o);
 
     LSUP_Buffer **ctx_a = LSUP_mdbstore_lookup_contexts (
             env->mdb_store, ss, sp, so);
@@ -274,7 +274,7 @@ LSUP_graph_add_init (LSUP_Graph *gr)
     if (gr->store_type == LSUP_STORE_MEM) {
         it->ht_iter = LSUP_htstore_add_init (gr->ht_store);
     } else {
-        LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
+        LSUP_Buffer *sc = LSUP_term_serialize (gr->uri);
         it->mdb_iter = LSUP_mdbstore_add_init (gr->mdb_store, sc);
         LSUP_buffer_free (sc);
     }
@@ -286,12 +286,46 @@ LSUP_graph_add_init (LSUP_Graph *gr)
 
 
 LSUP_rc
-LSUP_graph_add_iter (LSUP_GraphIterator *it, const LSUP_BufferTriple *sspo)
+LSUP_graph_add_iter (LSUP_GraphIterator *it, const LSUP_Triple *spo)
 {
-    if (it->graph->store_type == LSUP_STORE_MEM)
-        return LSUP_htstore_add_iter (it->ht_iter, sspo);
+    LSUP_rc rc;
+
+    LSUP_BufferTriple *sspo = LSUP_triple_serialize (spo);
+    if (UNLIKELY (!sspo)) return LSUP_MEM_ERR;
+
+    if (it->graph->store_type == LSUP_STORE_MEM) {
+        rc = LSUP_htstore_add_iter (it->ht_iter, sspo);
+
+        for (int i = 0; i < 3; i++) {
+            LSUP_htstore_add_term (
+                    it->graph->ht_store, LSUP_btriple_pos (sspo, i));
+            // HT store uses term keys from tcache.
+        }
+    } else {
+        rc = LSUP_mdbstore_add_iter (it->mdb_iter, sspo);
+
+        for (int i = 0; i < 3; i++) {
+            LSUP_mdbstore_add_term (
+                    it->graph->mdb_store, LSUP_btriple_pos (sspo, i));
+
+            // Store datatype term permanently.
+            LSUP_Term *term = LSUP_triple_pos (spo, i);
+            if (
+                term->type == LSUP_TERM_LITERAL
+                && !LSUP_mdbstore_tkey_exists (
+                        it->graph->mdb_store, term->datatype)
+            ) {
+                LSUP_Buffer *ser_dtype = LSUP_term_serialize (
+                        LSUP_tcache_get (term->datatype));
+                LSUP_mdbstore_add_term (it->graph->mdb_store, ser_dtype);
+                LSUP_buffer_free (ser_dtype);
+            }
+        }
+    }
+
+    LSUP_btriple_free (sspo);
 
-    return LSUP_mdbstore_add_iter (it->mdb_iter, sspo);
+    return rc;
 }
 
 
@@ -308,44 +342,21 @@ LSUP_graph_add_done (LSUP_GraphIterator *it)
 
 
 LSUP_rc
-LSUP_graph_add (
-        Graph *gr, const LSUP_Triple trp[],
-        const LSUP_BufferTriple strp[], size_t *inserted)
+LSUP_graph_add (Graph *gr, const LSUP_Triple trp[], size_t *inserted)
 {
-    /*
-     * NOTE It is possible to pass both sets of RDF triples and buffer triples.
-     */
-
     LSUP_rc rc = LSUP_NOACTION;
 
     // Initialize iterator.
     LSUP_GraphIterator *it = LSUP_graph_add_init (gr);
 
     // Serialize and insert RDF triples.
-    if (trp) {
-        for (size_t i = 0; trp[i].s != NULL; i++) {
-            log_trace ("Inserting triple #%lu", i);
+    for (size_t i = 0; trp[i].s != NULL; i++) {
+        log_trace ("Inserting triple #%lu", i);
 
-            LSUP_BufferTriple *sspo = LSUP_btriple_new_from_triple (trp + i);
-            if (UNLIKELY (!sspo)) return LSUP_MEM_ERR;
-            LSUP_rc db_rc = LSUP_graph_add_iter (it, sspo);
+        LSUP_rc db_rc = LSUP_graph_add_iter (it, trp + i);
 
-            LSUP_btriple_free (sspo);
-
-            if (db_rc == LSUP_OK) rc = LSUP_OK;
-            if (UNLIKELY (db_rc < 0)) return db_rc;
-        }
-    }
-
-    // Insert serialized triples.
-    if (strp) {
-        for (size_t i = 0; strp[i].s != NULL; i++) {
-            log_trace ("Inserting serialized triple #%lu", i);
-            LSUP_rc db_rc = LSUP_graph_add_iter (it, strp + i);
-
-            if (db_rc == LSUP_OK) rc = LSUP_OK;
-            if (UNLIKELY (db_rc < 0)) return db_rc;
-        }
+        if (db_rc == LSUP_OK) rc = LSUP_OK;
+        if (UNLIKELY (db_rc < 0)) return db_rc;
     }
 
     if (inserted) {
@@ -369,10 +380,10 @@ LSUP_graph_remove (
 {
     LSUP_rc rc;
 
-    LSUP_Buffer *ss = LSUP_buffer_new_from_term (s);
-    LSUP_Buffer *sp = LSUP_buffer_new_from_term (p);
-    LSUP_Buffer *so = LSUP_buffer_new_from_term (o);
-    LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
+    LSUP_Buffer *ss = LSUP_term_serialize (s);
+    LSUP_Buffer *sp = LSUP_term_serialize (p);
+    LSUP_Buffer *so = LSUP_term_serialize (o);
+    LSUP_Buffer *sc = LSUP_term_serialize (gr->uri);
 
     if (gr->store_type == LSUP_STORE_MEM)
         rc = LSUP_htstore_remove (gr->ht_store, ss, sp, so, ct);
@@ -397,10 +408,10 @@ LSUP_graph_lookup (const Graph *gr, const LSUP_Term *s, const LSUP_Term *p,
 
     it->graph = gr;
 
-    LSUP_Buffer *ss = LSUP_buffer_new_from_term (s);
-    LSUP_Buffer *sp = LSUP_buffer_new_from_term (p);
-    LSUP_Buffer *so = LSUP_buffer_new_from_term (o);
-    LSUP_Buffer *sc = LSUP_buffer_new_from_term (gr->uri);
+    LSUP_Buffer *ss = LSUP_term_serialize (s);
+    LSUP_Buffer *sp = LSUP_term_serialize (p);
+    LSUP_Buffer *so = LSUP_term_serialize (o);
+    LSUP_Buffer *sc = LSUP_term_serialize (gr->uri);
 
     if (it->graph->store_type == LSUP_STORE_MEM) {
         it->ht_iter = LSUP_htstore_lookup (it->graph->ht_store, ss, sp, so);
@@ -525,15 +536,15 @@ graph_copy_contents (const LSUP_Graph *src, LSUP_Graph *dest)
 
     GraphIterator *it = LSUP_graph_lookup (src, NULL, NULL, NULL, NULL);
 
-    LSUP_BufferTriple sspo;
+    LSUP_Triple spo;
 
     LSUP_GraphIterator *add_it = LSUP_graph_add_init (dest);
-    while (graph_iter_next_buffer (it, &sspo) != LSUP_END) {
-        LSUP_rc add_rc = LSUP_graph_add_iter (add_it, &sspo);
-
+    while (LSUP_graph_iter_next (it, &spo) != LSUP_END) {
+        LSUP_rc add_rc = LSUP_graph_add_iter (add_it, &spo);
         if (LIKELY (add_rc == LSUP_OK)) rc = LSUP_OK;
         else if (add_rc < 0) return add_rc;
     }
+
     LSUP_graph_add_done (add_it);
     LSUP_graph_iter_free (it);
 

+ 20 - 1
src/store_htable.c

@@ -280,6 +280,25 @@ LSUP_htstore_size (LSUP_HTStore *ht)
 { return HASH_COUNT (ht->keys); }
 
 
+LSUP_rc
+LSUP_htstore_add_term (HTStore *store, const LSUP_Buffer *sterm)
+{
+    LSUP_Key tk = LSUP_buffer_hash (sterm);
+    log_trace ("Adding term: %lx", tk);
+
+    IndexEntry *ins = NULL;
+    HASH_FIND (hh, store->idx, &tk, KLEN, ins);
+    if (ins) return LSUP_NOACTION;
+
+    MALLOC_GUARD (ins, LSUP_MEM_ERR);
+    ins->key = tk;
+    ins->sterm = LSUP_buffer_new (sterm->size, sterm->addr);
+    HASH_ADD (hh, store->idx, key, KLEN, ins);
+
+    return LSUP_OK;
+}
+
+
 LSUP_HTIterator *
 LSUP_htstore_add_init (HTStore *store)
 {
@@ -320,7 +339,7 @@ LSUP_htstore_add_iter (HTIterator *it, const LSUP_BufferTriple *sspo)
         return LSUP_NOACTION;
     }
 
-    // Add terms to index.
+    // Add terms to index. Terms are copied.
     for (int i = 0; i < 3; i++) {
         IndexEntry *ins = NULL;
         HASH_FIND (hh, it->store->idx, spok + i, KLEN, ins);

+ 38 - 15
src/store_mdb.c

@@ -1,5 +1,7 @@
 #include <ftw.h>
 
+#include "uthash.h"
+
 #include "store_mdb.h"
 #include "data/bootstrap.h"
 
@@ -312,15 +314,6 @@ LSUP_mdbstore_new (const char *path, const LSUP_Buffer *default_ctx)
                 LSUP_nsmap_add (nsm, init_nsmap[i][0], init_nsmap[i][1]);
 
             LSUP_mdbstore_nsm_store (store, nsm);
-
-            // Load initial IDs.
-            for (int i = 0; init_datatypes[i] != NULL; i++) {
-                char *fq_uri;
-                LSUP_nsmap_normalize_uri (nsm, init_datatypes[i], &fq_uri);
-                db_rc = LSUP_mdbstore_idcache_store (store, fq_uri);
-                free (fq_uri);
-                if (UNLIKELY (db_rc < 0)) goto fail;
-            }
         }
     }
 
@@ -1031,8 +1024,36 @@ LSUP_mdbstore_nsm_store (LSUP_MDBStore *store, const LSUP_NSMap *nsm)
 }
 
 
+int
+LSUP_mdbstore_tkey_exists (LSUP_MDBStore *store, LSUP_Key tkey)
+{
+    int db_rc, rc;
+    MDB_val key, data;
+
+    MDB_txn *txn = NULL;
+    mdb_txn_begin (store->env, NULL, MDB_RDONLY, &txn);
+
+    MDB_cursor *cur = NULL;
+    mdb_cursor_open (txn, store->dbi[IDX_T_ST], &cur);
+
+    db_rc = mdb_cursor_get (cur, &key, &data, MDB_SET);
+
+    if (db_rc == MDB_SUCCESS) rc = 1;
+    else if (db_rc == MDB_NOTFOUND) rc = 0;
+    else {
+        log_error ("DB error: %s", LSUP_strerror (db_rc));
+        rc = LSUP_DB_ERR;
+    }
+
+    if (cur) mdb_cursor_close (cur);
+    if (txn) mdb_txn_abort (txn);
+
+    return rc;
+}
+
+
 LSUP_rc
-LSUP_mdbstore_idcache_store (LSUP_MDBStore *store, const char *id)
+LSUP_mdbstore_add_term (LSUP_MDBStore *store, const LSUP_Buffer *sterm)
 {
     int db_rc;
     MDB_val key, data;
@@ -1042,15 +1063,15 @@ LSUP_mdbstore_idcache_store (LSUP_MDBStore *store, const char *id)
     RCCK (mdb_txn_begin (store->env, store->txn, 0, &txn));
 
     MDB_cursor *cur;
-    db_rc = mdb_cursor_open (txn, store->dbi[IDX_IDK_ID], &cur);
+    db_rc = mdb_cursor_open (txn, store->dbi[IDX_T_ST], &cur);
     if (UNLIKELY (db_rc != MDB_SUCCESS)) goto fail;
 
-    uint32_t k = XXH32 (id, strlen (id) + 1, HASH_SEED);
+    LSUP_Key k = LSUP_buffer_hash (sterm);
     key.mv_data = &k;
     key.mv_size = sizeof (k);
 
-    data.mv_data = (void *) id;
-    data.mv_size = strlen (id) + 1;
+    data.mv_data = sterm->addr;
+    data.mv_size = sterm->size;
 
     db_rc = mdb_cursor_put (cur, &key, &data, MDB_NOOVERWRITE);
     if (db_rc != MDB_SUCCESS && db_rc != MDB_KEYEXIST) goto fail;
@@ -1070,8 +1091,9 @@ fail:
 }
 
 
+/*
 LSUP_rc
-LSUP_mdbstore_idcache_get (LSUP_MDBStore *store)
+LSUP_mdbstore_tcache_get (LSUP_MDBStore *store)
 {
     int db_rc, rc = LSUP_NOACTION;
     MDB_txn *txn = NULL;
@@ -1109,6 +1131,7 @@ finally:
 
     return rc;
 }
+*/
 
 
 /* * * Static functions. * * */

+ 151 - 23
src/term.c

@@ -7,13 +7,16 @@
  */
 #define TERM_PACK_FMT "S(sUc)"
 
+#define MAX_VALID_TERM_TYPE     LSUP_TERM_BNODE /* For type validation. */
+
 /*
  * Extern variables.
  */
 
-IDCache *LSUP_id_cache = NULL;
+struct term_cache_t *LSUP_term_cache = NULL;
 uint32_t LSUP_default_dtype_key = 0;
 regex_t *LSUP_uri_ptn;
+LSUP_Term *LSUP_default_datatype = NULL;
 
 
 /*
@@ -25,12 +28,12 @@ static const char *invalid_uri_chars = "<>\" {}|\\^`";
 
 
 /*
- * API functions.
+ * Term API.
  */
 
 LSUP_Term *
 LSUP_term_new (
-        LSUP_TermType type, const char *data, const char *metadata)
+        LSUP_TermType type, const char *data, void *metadata)
 {
     LSUP_Term *term;
     CALLOC_GUARD (term, NULL);
@@ -79,7 +82,7 @@ fail:
 
 
 LSUP_Buffer *
-LSUP_buffer_new_from_term (const LSUP_Term *term)
+LSUP_term_serialize (const LSUP_Term *term)
 {
     if (UNLIKELY (!term)) return NULL;
 
@@ -100,10 +103,18 @@ LSUP_buffer_new_from_term (const LSUP_Term *term)
 LSUP_rc
 LSUP_term_init(
         LSUP_Term *term, LSUP_TermType type,
-        const char *data, const char *metadata)
+        const char *data, void *metadata)
 {
     // This can never be LSUP_TERM_UNDEFINED.
-    if (!data) return LSUP_VALUE_ERR;
+    if (!data) {
+        log_error ("No data provided for term.");
+        return LSUP_VALUE_ERR;
+    }
+    if (type <= LSUP_TERM_UNDEFINED || type > MAX_VALID_TERM_TYPE) {
+        log_error ("%d is not a valid term type.", type);
+        return LSUP_VALUE_ERR;
+    }
+
     term->type = type;
 
     // Validate URI.
@@ -129,12 +140,28 @@ LSUP_term_init(
     strcpy (term->data, data);
 
     if (term->type == LSUP_TERM_LT_LITERAL) {
-        term->lang = XXH32 (metadata, strlen (metadata) + 1, HASH_SEED);
-        LSUP_tcache_add_id (term->lang, metadata);
+        // Lang tags longer than 7 characters will be truncated.
+        strncpy (term->lang, metadata, sizeof (term->lang) - 1);
+        term->lang[7] = '\0';
+
+    } else if (term->type == LSUP_TERM_LITERAL) {
+        log_trace ("Storing data type.");
+        if (metadata && strcmp (metadata, DEFAULT_DTYPE) != 0) {
+            LSUP_Term *dtype = LSUP_uri_new ((char *) metadata);
+            term->datatype = LSUP_term_hash (dtype);
+
+            if (LSUP_tcache_get (term->datatype) == NULL)
+                LSUP_tcache_add (term->datatype, dtype);
+
+            else LSUP_term_free (dtype);
 
-    } else if (metadata && strcmp (metadata, DEFAULT_DTYPE) != 0) {
-        term->datatype = XXH32 (metadata, strlen (metadata) + 1, HASH_SEED);
-        LSUP_tcache_add_id (term->datatype, metadata);
+        } else term->datatype = LSUP_default_dtype_key;
+
+    // Blank node.
+    } else {
+        // TODO This is not usable for global skolemization.
+        term->bnode_id = XXH64 (
+                term->data, strlen (term->data) + 1, HASH_SEED);
     }
 
     return LSUP_OK;
@@ -161,6 +188,22 @@ LSUP_uri_init (LSUP_Term *term, const char *data)
 }
 
 
+LSUP_Key
+LSUP_term_hash (const LSUP_Term *term)
+{
+    LSUP_Buffer *buf;
+
+    if (UNLIKELY (!term)) buf = BUF_DUMMY;
+    else buf = LSUP_term_serialize (term);
+
+    LSUP_Key key = LSUP_buffer_hash (buf);
+
+    LSUP_buffer_free (buf);
+
+    return key;
+}
+
+
 bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
 {
     if (term1->type != term2->type)
@@ -173,7 +216,7 @@ bool LSUP_term_equals (const LSUP_Term *term1, const LSUP_Term *term2)
         return term1->datatype == term2->datatype;
 
     if (term1->type == LSUP_TERM_LT_LITERAL)
-        return term1->lang == term2->lang;
+        return strncmp (term1->lang, term2->lang, sizeof (term1->lang)) == 0;
 
     return true;
 }
@@ -195,34 +238,117 @@ void LSUP_term_free (LSUP_Term *term)
 }
 
 
+/*
+ * Triple API.
+ */
+
+LSUP_Triple *
+LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o)
+{
+    LSUP_Triple *spo = malloc (sizeof (*spo));
+    if (!spo) return NULL;
+
+    if (UNLIKELY (LSUP_triple_init (spo, s, p, o))) {
+        free (spo);
+        return NULL;
+    }
+
+    return spo;
+}
+
+
+LSUP_Triple *
+LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo)
+{
+    LSUP_Triple *spo = malloc (sizeof (*spo));
+    if (!spo) return NULL;
+
+    spo->s = LSUP_term_new_from_buffer (sspo->s);
+    spo->p = LSUP_term_new_from_buffer (sspo->p);
+    spo->o = LSUP_term_new_from_buffer (sspo->o);
+
+    return spo;
+}
+
+
+LSUP_BufferTriple *
+LSUP_triple_serialize (const LSUP_Triple *spo)
+{
+    LSUP_BufferTriple *sspo = malloc (sizeof (*sspo));
+    if (!sspo) return NULL;
+
+    sspo->s = LSUP_term_serialize (spo->s);
+    sspo->p = LSUP_term_serialize (spo->p);
+    sspo->o = LSUP_term_serialize (spo->o);
+
+    return sspo;
+}
+
+
+LSUP_rc
+LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o)
+{
+    // TODO validate term types.
+    spo->s = s;
+    spo->p = p;
+    spo->o = o;
+
+    return LSUP_OK;
+}
+
+
+void
+LSUP_triple_done (LSUP_Triple *spo)
+{
+    if (UNLIKELY (!spo)) return;
+
+    LSUP_term_done (spo->s);
+    LSUP_term_done (spo->p);
+    LSUP_term_done (spo->o);
+}
+
+
+void
+LSUP_triple_free (LSUP_Triple *spo)
+{
+    if (UNLIKELY (!spo)) return;
+
+    LSUP_term_free (spo->s);
+    LSUP_term_free (spo->p);
+    LSUP_term_free (spo->o);
+
+    free (spo);
+}
+
+
 LSUP_rc
-LSUP_tcache_add_id (const uint32_t key, const char *data)
+LSUP_tcache_add (const LSUP_Key key, LSUP_Term *term)
 {
-    struct id_cache_t *entry;
+    struct term_cache_t *entry;
 
-    HASH_FIND_INT (LSUP_id_cache, &key, entry);
+    HASH_FIND_INT (LSUP_term_cache, &key, entry);
     // Many calls will likely attempt inserting duplicates after the first one.
     if (LIKELY (entry)) return LSUP_NOACTION;
 
     MALLOC_GUARD (entry, LSUP_MEM_ERR);
     entry->key = key;
-    entry->data = strdup (data);
-    HASH_ADD_INT (LSUP_id_cache, key, entry);
+    entry->term = term;
+    HASH_ADD_INT (LSUP_term_cache, key, entry);
 
     return LSUP_OK;
 }
 
 
-const char *
-LSUP_tcache_get_id (const uint32_t key)
+const LSUP_Term *
+LSUP_tcache_get (const LSUP_Key key)
 {
-    struct id_cache_t *entry;
+    struct term_cache_t *entry;
 
-    HASH_FIND_INT (LSUP_id_cache, &key, entry);
-    if (entry) log_trace ("Id found for key %u: %s", key, entry->data);
+    HASH_FIND_INT (LSUP_term_cache, &key, entry);
+    if (entry) log_trace ("ID found for key %u: %s", key, entry->term->data);
     else log_trace ("No ID found for key %u.", key);
 
-    return (entry) ? entry->data : NULL;
+    return (entry) ? entry->term : NULL;
 }
 
 
@@ -231,3 +357,5 @@ LSUP_tcache_get_id (const uint32_t key)
 LSUP_Key LSUP_term_hash (const LSUP_Term *term);
 LSUP_Term *LSUP_uri_new (const char *data);
 LSUP_rc LSUP_uri_init (LSUP_Term *term, const char *data);
+LSUP_Term *LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n);
+LSUP_Key LSUP_triple_hash (const LSUP_Triple *trp);

+ 0 - 88
src/triple.c

@@ -1,88 +0,0 @@
-#include "triple.h"
-
-// Extern inline prototypes.
-LSUP_Term *LSUP_triple_pos (const LSUP_Triple *trp, LSUP_TriplePos n);
-LSUP_Buffer *LSUP_btriple_pos (const LSUP_BufferTriple *trp, LSUP_TriplePos n);
-
-
-LSUP_Triple *
-LSUP_triple_new(LSUP_Term *s, LSUP_Term *p, LSUP_Term *o)
-{
-    LSUP_Triple *spo = malloc (sizeof (*spo));
-    if (!spo) return NULL;
-
-    if (UNLIKELY (LSUP_triple_init (spo, s, p, o))) {
-        free (spo);
-        return NULL;
-    }
-
-    return spo;
-}
-
-
-LSUP_Triple *
-LSUP_triple_new_from_btriple (const LSUP_BufferTriple *sspo)
-{
-    LSUP_Triple *spo = malloc (sizeof (*spo));
-    if (!spo) return NULL;
-
-    spo->s = LSUP_term_new_from_buffer (sspo->s);
-    spo->p = LSUP_term_new_from_buffer (sspo->p);
-    spo->o = LSUP_term_new_from_buffer (sspo->o);
-
-    return spo;
-}
-
-
-LSUP_BufferTriple *
-LSUP_btriple_new_from_triple (const LSUP_Triple *spo)
-{
-    LSUP_BufferTriple *sspo = malloc (sizeof (*sspo));
-    if (!sspo) return NULL;
-
-    sspo->s = LSUP_buffer_new_from_term (spo->s);
-    sspo->p = LSUP_buffer_new_from_term (spo->p);
-    sspo->o = LSUP_buffer_new_from_term (spo->o);
-
-    return sspo;
-}
-
-
-LSUP_rc
-LSUP_triple_init (LSUP_Triple *spo, LSUP_Term *s, LSUP_Term *p, LSUP_Term *o)
-{
-    spo->s = s;
-    spo->p = p;
-    spo->o = o;
-
-    return LSUP_OK;
-}
-
-
-void
-LSUP_triple_done (LSUP_Triple *spo)
-{
-    if (UNLIKELY (!spo)) return;
-
-    LSUP_term_done (spo->s);
-    LSUP_term_done (spo->p);
-    LSUP_term_done (spo->o);
-}
-
-
-void
-LSUP_triple_free (LSUP_Triple *spo)
-{
-    if (UNLIKELY (!spo)) return;
-
-    LSUP_term_free (spo->s);
-    LSUP_term_free (spo->p);
-    LSUP_term_free (spo->o);
-
-    free (spo);
-}
-
-
-/* Inline extern prototypes. */
-
-LSUP_Key LSUP_triple_hash (const LSUP_Triple *trp);

+ 1 - 1
test/assets/triples.h

@@ -1,7 +1,7 @@
 #ifndef _TEST_ASSETS_H
 #define _TEST_ASSETS_H
 
-#include "triple.h"
+#include "term.h"
 
 #define NUM_TRP 10
 

+ 1 - 1
test/test_codec_nt.c

@@ -165,7 +165,7 @@ static int test_encode_nt_graph()
     if (!gr) return LSUP_MEM_ERR;
 
     size_t ins;
-    LSUP_graph_add_trp (gr, trp, &ins);
+    LSUP_graph_add (gr, trp, &ins);
 
     char *out = calloc (1, 1);
     LSUP_CodecIterator *it = nt_codec.encode_graph_init (gr);

+ 4 - 4
test/test_graph.c

@@ -34,7 +34,7 @@ _graph_add (LSUP_store_type type)
     ASSERT (gr != NULL, "Error creating graph!");
 
     size_t ct;
-    LSUP_graph_add_trp (gr, trp, &ct);
+    LSUP_graph_add (gr, trp, &ct);
 
     EXPECT_INT_EQ (ct, 8);
     EXPECT_INT_EQ (LSUP_graph_size (gr), 8);
@@ -110,7 +110,7 @@ _graph_lookup (LSUP_store_type type)
     LSUP_Graph *gr = LSUP_graph_new (type);
 
     size_t ct;
-    LSUP_graph_add_trp (gr, trp, &ct);
+    LSUP_graph_add (gr, trp, &ct);
 
     EXPECT_INT_EQ (ct, 8);
     EXPECT_INT_EQ (LSUP_graph_size (gr), 8);
@@ -153,7 +153,7 @@ _graph_remove (LSUP_store_type type)
     LSUP_Graph *gr = LSUP_graph_new (type);
 
     size_t ct;
-    LSUP_graph_add_trp (gr, trp, &ct);
+    LSUP_graph_add (gr, trp, &ct);
 
     EXPECT_INT_EQ (ct, 8);
     EXPECT_INT_EQ (LSUP_graph_size (gr), 8);
@@ -220,7 +220,7 @@ static int test_graph_copy()
     LSUP_Graph *gr1 = LSUP_graph_new (LSUP_STORE_MEM);
     ASSERT (gr1 != NULL, "Error creating graph!");
 
-    LSUP_graph_add_trp (gr1, trp, NULL);
+    LSUP_graph_add (gr1, trp, NULL);
 
     LSUP_Graph *gr2 = LSUP_graph_copy (gr1);
     EXPECT_INT_EQ (LSUP_graph_size (gr2), 8);

+ 1 - 1
test/test_store_ht.c

@@ -13,7 +13,7 @@ static int test_htstore()
     LSUP_BufferTriple *ser_trp[NUM_TRP];
 
     for (int i = 0; i < NUM_TRP; i++)
-        ser_trp[i] = LSUP_btriple_new_from_triple (trp + i);
+        ser_trp[i] = LSUP_triple_serialize (trp + i);
 
     // Test adding.
     LSUP_HTIterator *it = LSUP_htstore_add_init (store);

+ 5 - 5
test/test_store_mdb.c

@@ -19,7 +19,7 @@ static int test_triple_store()
     LSUP_BufferTriple ser_trp[NUM_TRP];
 
     for (int i = 0; i < NUM_TRP; i++) {
-        LSUP_BufferTriple *tmp = LSUP_btriple_new_from_triple (trp + i);
+        LSUP_BufferTriple *tmp = LSUP_triple_serialize (trp + i);
         ser_trp[i] = *tmp;
         free (tmp);
     }
@@ -108,7 +108,7 @@ static int test_quad_store()
     EXPECT_PASS (LSUP_mdbstore_setup (path, true));
 
     LSUP_Term *ctx1 = LSUP_uri_new ("urn:c:1");
-    LSUP_Buffer *sc1 = LSUP_buffer_new_from_term (ctx1);
+    LSUP_Buffer *sc1 = LSUP_term_serialize (ctx1);
 
     LSUP_MDBStore *store = LSUP_mdbstore_new (path, sc1); // quad store.
     ASSERT (store != NULL, "Error initializing store!");
@@ -117,7 +117,7 @@ static int test_quad_store()
     LSUP_BufferTriple ser_trp[NUM_TRP];
 
     for (int i = 0; i < NUM_TRP; i++) {
-        LSUP_BufferTriple *tmp = LSUP_btriple_new_from_triple (trp + i);
+        LSUP_BufferTriple *tmp = LSUP_triple_serialize (trp + i);
         ser_trp[i] = *tmp;
         free (tmp);
     }
@@ -128,7 +128,7 @@ static int test_quad_store()
     EXPECT_INT_EQ (ct, 6);
 
     LSUP_Term *ctx2 = LSUP_uri_new ("urn:c:2");
-    LSUP_Buffer *sc2 = LSUP_buffer_new_from_term (ctx2);
+    LSUP_Buffer *sc2 = LSUP_term_serialize (ctx2);
 
     // Only triples 4÷9 in context 2 (effectively 4 non-duplicates).
     EXPECT_PASS (LSUP_mdbstore_add (store, sc2, ser_trp + 4, 6, &ct));
@@ -139,7 +139,7 @@ static int test_quad_store()
 
     // This context has no triples.
     LSUP_Term *ctx3 = LSUP_uri_new ("urn:c:3");
-    LSUP_Buffer *sc3 = LSUP_buffer_new_from_term (ctx3);
+    LSUP_Buffer *sc3 = LSUP_term_serialize (ctx3);
 
     // Test lookups.
     LSUP_Buffer *lut[41][3] = {

+ 9 - 9
test/test_term.c

@@ -8,12 +8,12 @@ static int test_term_new()
 
     LSUP_Term *term = LSUP_term_new (LSUP_TERM_LITERAL, data, datatype);
     EXPECT_STR_EQ (term->data, data);
-    EXPECT_STR_EQ (LSUP_tcache_get_id(term->datatype), datatype);
+    EXPECT_STR_EQ (LSUP_tcache_get (term->datatype)->data, datatype);
 
     char *lang = "en-US";
     LSUP_term_init (term, LSUP_TERM_LT_LITERAL, data, lang);
     EXPECT_STR_EQ (term->data, data);
-    EXPECT_STR_EQ (LSUP_tcache_get_id(term->lang), lang);
+    EXPECT_STR_EQ (term->lang, lang);
 
     char *uri_data = "urn:id:2144564356";
     LSUP_uri_init (term, uri_data);
@@ -34,11 +34,11 @@ static int test_term_serialize_deserialize()
     LSUP_Buffer *sterm;
     LSUP_Term *dsterm;
 
-    sterm = LSUP_buffer_new_from_term (uri);
+    sterm = LSUP_term_serialize (uri);
     ASSERT (sterm != NULL, "Error serializing term!");
-    log_info ("%s", "Serialized URI: ");
-    LSUP_buffer_print (sterm);
-    log_info ("%s", "\n");
+    //log_info ("%s", "Serialized URI: ");
+    //LSUP_buffer_print (sterm);
+    //log_info ("%s", "\n");
     dsterm = LSUP_term_new_from_buffer (sterm);
     ASSERT (dsterm != NULL, "Error deserializing term!");
     ASSERT (LSUP_term_equals (dsterm, uri), "URI serialization error!");
@@ -46,7 +46,7 @@ static int test_term_serialize_deserialize()
     LSUP_buffer_free (sterm);
     LSUP_term_free (dsterm);
 
-    sterm = LSUP_buffer_new_from_term (lit);
+    sterm = LSUP_term_serialize (lit);
     ASSERT (sterm != NULL, "Error serializing term!");
     //log_info ("%s", "Serialized literal: ");
     //LSUP_buffer_print (sterm);
@@ -58,7 +58,7 @@ static int test_term_serialize_deserialize()
     LSUP_buffer_free (sterm);
     LSUP_term_free (dsterm);
 
-    sterm = LSUP_buffer_new_from_term (tlit);
+    sterm = LSUP_term_serialize (tlit);
     ASSERT (sterm != NULL, "Error serializing term!");
     //log_info ("%s", "Serialized typed literal: ");
     //LSUP_buffer_print (sterm);
@@ -70,7 +70,7 @@ static int test_term_serialize_deserialize()
     LSUP_buffer_free (sterm);
     LSUP_term_free (dsterm);
 
-    sterm = LSUP_buffer_new_from_term (tllit);
+    sterm = LSUP_term_serialize (tllit);
     ASSERT (sterm != NULL, "Error serializing term!");
     //log_info ("%s", "Serialized typed and language-tagged URI: ");
     //LSUP_buffer_print (sterm);