Browse Source

WIP Store lang tags and data types separately; bootstrap DB.

Stefano Cossu 3 năm trước cách đây
mục cha
commit
df8a1d2366

+ 6 - 6
cpython/py_namespace.h

@@ -83,13 +83,13 @@ NSMap_get (PyObject *self, PyObject *pfx_obj)
 
 
 static PyObject *
-NSMap_normalize_uri (PyObject *self, PyObject *fq_uri_obj)
+NSMap_denormalize_uri (PyObject *self, PyObject *fq_uri_obj)
 {
     if (PyUnicode_READY (fq_uri_obj) < 0) return NULL;
     const char *fq_uri = PyUnicode_AsUTF8 (fq_uri_obj);
 
     char *pfx_uri;
-    LSUP_rc rc = LSUP_nsmap_normalize_uri (
+    LSUP_rc rc = LSUP_nsmap_denormalize_uri (
             ((NSMapObject *)self)->ob_struct, fq_uri, &pfx_uri);
     if (rc < 0)  {
         PyErr_SetString (PyExc_ValueError, "Error normalizing URI.");
@@ -104,13 +104,13 @@ NSMap_normalize_uri (PyObject *self, PyObject *fq_uri_obj)
 
 
 static PyObject *
-NSMap_denormalize_uri (PyObject *self, PyObject *pfx_uri_obj)
+NSMap_normalize_uri (PyObject *self, PyObject *pfx_uri_obj)
 {
     if (PyUnicode_READY (pfx_uri_obj) < 0) return NULL;
     const char *pfx_uri = PyUnicode_AsUTF8 (pfx_uri_obj);
 
     char *fq_uri;
-    LSUP_rc rc = LSUP_nsmap_denormalize_uri (
+    LSUP_rc rc = LSUP_nsmap_normalize_uri (
             ((NSMapObject *)self)->ob_struct, pfx_uri, &fq_uri);
     if (rc < 0)  {
         PyErr_SetString (PyExc_ValueError, "Error denormalizing URI.");
@@ -158,11 +158,11 @@ static PyMethodDef NSMap_methods[] = {
     },
     {
         "normalize_uri", (PyCFunction) NSMap_normalize_uri, METH_O,
-        "Normalize a URI (i.e. convert from fully qualified to prefixed)."
+        "Normalize a URI (i.e. convert from prefixed to fully qualified)."
     },
     {
         "denormalize_uri", (PyCFunction) NSMap_denormalize_uri, METH_O,
-        "Denormalize a URI (i.e. convert from prefixed to fully qualified)."
+        "Denormalize a URI (i.e. convert from fully qualified to prefixed)."
     },
     {
         "as_dict", (PyCFunction) NSMap_as_dict, METH_NOARGS,

+ 28 - 32
docs/dev/deps.dot

@@ -5,45 +5,41 @@ digraph "source tree" {
     fontsize="16";
     fontname="Helvetica";
 	clusterrank="local";
-	"term" -> "tpl"
-	"environment" -> "store_mdb"
-	"store_htable" -> "uthash"
-	"graph" -> "environment"
-	"store_mdb" -> "lmdb"
-	"core" -> "log"
-	"buffer" -> "xxhash"
-	"nt_parser" -> "nt_grammar"
+	"codec_nt" -> "codec_base"
+	"buffer" -> "core"
+	"py_triple" -> "triple"
+	"codec_nt" -> "nt_parser"
+	"graph" -> "store_mdb"
+	"py_graph" -> "graph"
 	"py_graph" -> "codec_nt"
-	"profile" -> "graph"
-	"py_term" -> "term"
+	"triple" -> "term"
 	"py_lsup_rdf" -> "py_namespace"
-	"namespace" -> "core"
-	"term" -> "buffer"
-	"store_mdb" -> "uthash"
-	"environment" -> "uthash"
-	"codec_nt" -> "codec_base"
+	"nt_parser" -> "graph"
+	"py_graph" -> "py_triple"
+	"term" -> "uthash"
+	"lsup_rdf" -> "codec_nt"
+	"term" -> "tpl"
+	"store_htable" -> "triple"
 	"term" -> "namespace"
-	"graph" -> "store_htable"
-	"codec_base" -> "graph"
+	"buffer" -> "xxhash"
+	"store_mdb" -> "lmdb"
+	"profile" -> "lsup_rdf"
 	"store_mdb" -> "triple"
-	"py_triple" -> "py_term"
-	"buffer" -> "core"
-	"store_htable" -> "triple"
-	"py_graph" -> "graph"
-	"lsup_rdf" -> "graph"
-	"store_mdb" -> "namespace"
-	"py_triple" -> "triple"
 	"py_lsup_rdf" -> "py_graph"
-	"nt_parser" -> "graph"
+	"core" -> "lmdb"
+	"nt_parser" -> "nt_grammar"
 	"store_mdb" -> "store"
-	"triple" -> "term"
-	"graph" -> "store_mdb"
+	"environment" -> "store_mdb"
+	"graph" -> "store_htable"
+	"py_triple" -> "py_term"
+	"codec_base" -> "graph"
+	"py_term" -> "term"
+	"namespace" -> "core"
+	"graph" -> "environment"
 	"namespace" -> "uthash"
 	"py_namespace" -> "namespace"
+	"term" -> "buffer"
 	"nt_grammar" -> "graph"
-	"core" -> "lmdb"
-	"term" -> "uthash"
-	"codec_nt" -> "nt_parser"
-	"py_graph" -> "py_triple"
-	"store_htable" -> "namespace"
+	"core" -> "log"
+	"store_htable" -> "uthash"
 }

BIN
docs/dev/deps.pdf


+ 2 - 0
include/core.h

@@ -82,6 +82,8 @@ typedef int LSUP_rc;
 
 extern char *warning_msg[], *error_msg[];
 
+extern char *LSUP_root_path;
+
 
 typedef enum {
     LSUP_BOOL_UNION,

+ 74 - 0
include/data/bootstrap.h

@@ -0,0 +1,74 @@
+#ifndef LSUP_INIT_DATA_H
+#define LSUP_INIT_DATA_H
+
+
+/** @brief Initial namespace map.
+ */
+const char *init_nsmap[][2] = {
+    {"dc",       "http://purl.org/dc/elements/1.1/"},
+    {"dcterms",  "http://purl.org/dc/terms/"},
+    {"foaf",     "http://xmlns.com/foaf/0.1/"},
+    {"rdf",      "http://www.w3.org/1999/02/22-rdf-syntax-ns#"},
+    {"rdfs",     "http://www.w3.org/2000/01/rdf-schema#"},
+    {"xsd",      "http://www.w3.org/2001/XMLSchema#"},
+    {NULL}
+};
+
+/** @brief Initial data types loaded into the environment.
+ *
+ * For XSD type reference see http://www.w3.org/TR/xmlschema11-2/
+ */
+const char *init_datatypes[] = {
+    /* XSD primitive types. */
+    "xsd:string",
+    "xsd:boolean",
+    "xsd:decimal",
+    "xsd:float",
+    "xsd:double",
+    "xsd:duration",
+    "xsd:dateTime",
+    "xsd:time",
+    "xsd:date",
+    "xsd:gYearMonth",
+    "xsd:gYear",
+    "xsd:gMonthDay",
+    "xsd:gDay",
+    "xsd:gMonth",
+    "xsd:hexBinary",
+    "xsd:base64Binary",
+    "xsd:anyURI",
+    "xsd:QName",
+    "xsd:NOTATION",
+
+    /* Other ordinary (constructed) data types. Not compiled by default. */
+#ifdef XSD_CONSTRUCTED_DATATYPES
+    "xsd:normalizedString",
+    "xsd:token",
+    "xsd:language",
+    "xsd:NMTOKEN",
+    "xsd:NMTOKENS",
+    "xsd:Name",
+    "xsd:NCName",
+    "xsd:ID",
+    "xsd:IDREF",
+    "xsd:IDREFS",
+    "xsd:ENTITY",
+    "xsd:ENTITIES",
+    "xsd:integer",
+    "xsd:nonPositiveInteger",
+    "xsd:negativeInteger",
+    "xsd:long",
+    "xsd:int",
+    "xsd:short",
+    "xsd:byte",
+    "xsd:nonNegativeInteger",
+    "xsd:unsignedLong",
+    "xsd:unsignedInt",
+    "xsd:unsignedShort",
+    "xsd:unsignedByte",
+    "xsd:positiveInteger",
+#endif  /* XSD_CONSTRUCTED_DATATYPES */
+    NULL
+};
+
+#endif /* LSUP_INIT_DATA_H */

+ 2 - 4
include/environment.h

@@ -6,8 +6,6 @@
 #ifndef _LSUP_ENVIRONMENT_H
 #define _LSUP_ENVIRONMENT_H
 
-#include "uthash.h"
-
 #include "store_mdb.h"
 
 
@@ -42,7 +40,7 @@ extern LSUP_Env *LSUP_default_env;
 LSUP_Env *
 LSUP_env_new (
         const char *default_ctx, const char *mdb_path,
-        const char *mdb_ramdisk_path, const LSUP_NSMap *nsmap);
+        const LSUP_NSMap *nsmap);
 
 
 /** @brief Initialize the default environment.
@@ -65,7 +63,7 @@ LSUP_init (void);
  * handle is not freed.
  */
 void
-LSUP_env_done (LSUP_Env *env);
+LSUP_env_free (LSUP_Env *env);
 
 /** @brief Close the defailt environment.
  *

+ 1 - 1
include/lsup_rdf.h

@@ -1,6 +1,6 @@
 #ifndef _LSUP_RDF_H
 #define _LSUP_RDF_H
 
-#include "graph.h"
+#include "codec_nt.h"
 
 #endif

+ 8 - 8
include/namespace.h

@@ -82,13 +82,13 @@ const char *
 LSUP_nsmap_get (const LSUP_NSMap *map, const ns_pfx pfx);
 
 
-/** @brief Convert a FQ URI string to a prefixed string if the prefix is found.
+/** @brief Convert a namespace-prefixed string to a FQ URI sring if mapped.
  *
  * @param[in] map Namespace map to look up.
  *
- * @param[in] uri URI string to normalize.
+ * @param[in] uri URI string to denormalize.
  *
- * @param[out] String pointer to be filled with the prefixed URI. If the
+ * @param[out] fq_uri String pointer to be filled with the FQ URI. If the
  *  namespace is not in the map or an error occurred, this will be NULL.
  *  The caller is in charge of freeing the memory.
  *
@@ -97,16 +97,16 @@ LSUP_nsmap_get (const LSUP_NSMap *map, const ns_pfx pfx);
  */
 LSUP_rc
 LSUP_nsmap_normalize_uri (
-        const LSUP_NSMap *map, const char *fq_uri, char **pfx_uri);
+        const LSUP_NSMap *map, const char *pfx_uri, char **fq_uri);
 
 
-/** @brief Convert a namespace-prefixed string to a FQ URI sring if mapped.
+/** @brief Convert a FQ URI string to a prefixed string if the prefix is found.
  *
  * @param[in] map Namespace map to look up.
  *
- * @param[in] uri URI string to denormalize.
+ * @param[in] uri URI string to normalize.
  *
- * @param[out] String pointer to be filled with the FQ URI. If the
+ * @param[out] String pointer to be filled with the prefixed URI. If the
  *  namespace is not in the map or an error occurred, this will be NULL.
  *  The caller is in charge of freeing the memory.
  *
@@ -115,7 +115,7 @@ LSUP_nsmap_normalize_uri (
  */
 LSUP_rc
 LSUP_nsmap_denormalize_uri (
-        const LSUP_NSMap *map, const char *pfx_uri, char **fq_uri);
+        const LSUP_NSMap *map, const char *fq_uri, char **pfx_uri);
 
 
 /** @brief Dump all entries of a namespace map.

+ 3 - 3
include/store.h

@@ -5,9 +5,9 @@
  * Store feature flags.
  *
  * NOTE: LSUP_STORE_PERM need only be set by an implementation based on whether
- * its path is on a default temporary dir (e.g. MDB_RAMDISK_PATH). If this flag
- * is not set, it means the data will be cleared before the next execution of
- * the program. However, its being set does not guarantee the persistence of
+ * its path is on a default temporary dir (e.g. LSUP_MDB_RAMDISK_PATH). If this
+ * flag is not set, it means the data will be cleared before the next execution
+ * of the program. However, its being set does not guarantee the persistence of
  * the medium (i.e. a "permanent" store may have been created ad hoc on a
  * tempfs).
  */

+ 19 - 2
include/store_mdb.h

@@ -24,14 +24,13 @@
 
 #include "lmdb.h"
 #include "triple.h"
-#include "namespace.h"
 
 
 // FIXME find a better cross-platform path.
 #define DEFAULT_ENV_PATH "./mdb_store"
 
 // RAMdisk path for MDB volatile store.
-#define MDB_RAMDISK_PATH TMPDIR "/lsup_mem_graph"
+#define LSUP_MDB_RAMDISK_PATH TMPDIR "/lsup_mem_graph"
 #include "store.h"
 
 
@@ -371,4 +370,22 @@ LSUP_mdbstore_nsm_get (LSUP_MDBStore *store, LSUP_NSMap **nsm);
 LSUP_rc
 LSUP_mdbstore_nsm_store (LSUP_MDBStore *store, const LSUP_NSMap *nsm);
 
+
+/** @brief Populate the ID cache with data types and lang tags from store.
+ *
+ * @param[in] store The store to get data from.
+ */
+LSUP_rc
+LSUP_mdbstore_idcache_get (LSUP_MDBStore *store);
+
+
+/** @brief Store an ID into a MDB store.
+ *
+ * @param[in] store MDB store.
+ *
+ * @param[in] id ID to store.
+ */
+LSUP_rc
+LSUP_mdbstore_idcache_store (LSUP_MDBStore *store, const char *id);
+
 #endif

+ 17 - 12
include/term.h

@@ -19,27 +19,31 @@
  * Term types.
  */
 /* Undefined placeholder or result of an error. Invalid for most operations. */
-#define LSUP_TERM_UNDEFINED      0
+#define LSUP_TERM_UNDEFINED     0
 /* IRI reference. */
-#define LSUP_TERM_IRIREF         1
-/* Blank node. */
-#define LSUP_TERM_BNODE          2
+#define LSUP_TERM_IRIREF        1
+/* Namespace-prefixed IRI reference. */
+#define LSUP_TERM_NS_IRIREF     2
 /* Literal without language tag. */
-#define LSUP_TERM_LITERAL        3
+#define LSUP_TERM_LITERAL       3
 /* Language-tagged string literal. */
-#define LSUP_TERM_LT_LITERAL     4
+#define LSUP_TERM_LT_LITERAL    4
+/* Blank node. */
+#define LSUP_TERM_BNODE         5
 
 /*
  * In-term identifier types.
  */
+/* Namespace prefix string. */
+#define LSUP_ID_NS              10
 /* Data type IRI. */
-#define LSUP_ID_DATATYPE        10
+#define LSUP_ID_DATATYPE        11
 /* Language tag string. */
-#define LSUP_ID_LANG            11
+#define LSUP_ID_LANG            12
 /* Temporary blank node ID. TODO implement. */
-#define LSUP_ID_BNODE           12
+#define LSUP_ID_BNODE           13
 
-/** @brief Default data type for untyped literals.
+/** @brief Default data type for untyped literals (prefixed IRI).
  */
 #define DEFAULT_DTYPE           "http://www.w3.org/2001/XMLSchema#string"
 
@@ -63,6 +67,7 @@ typedef struct term_t {
         uint32_t        datatype;   // Data type hash for LSUP_TERM_LITERAL.
         uint32_t        lang;       // Lang tag hash for LSUP_TERM_LT_LITERAL.
         uint32_t        bnode_id;   // Blank node ID. TODO implement.
+        LSUP_NSMap *    nsm;        // NSM handle for prefixed IRI.
     };
     LSUP_TermType      type;       // Term type.
 } LSUP_Term;
@@ -227,8 +232,8 @@ LSUP_tcache_add_id (const uint32_t key, const char *data);
  *
  * @param[in] key Key for the queried term.
  *
- * @return The retieved term if found, or NULL. The string must not be modified
- *  or freed.
+ * @return The retrieved term if found, or NULL. The string must not be
+ *  modified or freed.
  */
 const char *
 LSUP_tcache_get_id (const uint32_t key);

+ 1 - 1
profile.c

@@ -1,5 +1,5 @@
 #include <time.h>
-#include "graph.h"
+#include "lsup_rdf.h"
 
 #ifndef NT
 #define NT 100000

+ 0 - 1
src/codec/nt_lexer.re

@@ -1,4 +1,3 @@
-#include "graph.h"
 #include "src/codec/nt_grammar.h"
 #include "nt_parser.h"
 

+ 1 - 0
src/core.c

@@ -32,6 +32,7 @@ char *err_msg[] = {
     "LSUP_ENV_ERR: Invalid environment. Did you call LSUP_init()?",
 };
 
+char *LSUP_root_path = __FILE__; // This is trimmed to root path on init.
 
 int mkdir_p(const char *path, mode_t mode)
 {

+ 23 - 11
src/environment.c

@@ -1,3 +1,5 @@
+#include <unistd.h>
+
 #include "environment.h"
 
 
@@ -15,7 +17,7 @@ LSUP_Env *LSUP_default_env = NULL;
 LSUP_Env *
 LSUP_env_new (
         const char *default_ctx, const char *mdb_path,
-        const char *mdb_ramdisk_path, const LSUP_NSMap *nsmap)
+        const LSUP_NSMap *nsmap)
 {
     LSUP_Env *env;
     CALLOC_GUARD (env, NULL);
@@ -26,23 +28,18 @@ LSUP_env_new (
     LSUP_term_free (default_ctx_uri);
     log_info ("Set up default context.");
 
-    // Permanent store.
+    // Set up store if not existing.
     if (LSUP_mdbstore_setup (mdb_path, false) != LSUP_OK) return NULL;
     env->mdb_store = LSUP_mdbstore_new (mdb_path, env->default_ctx);
     if (UNLIKELY (!env->mdb_store)) return NULL;
     log_info ("Initialized persistent back end at %s.", mdb_path);
 
-    // RAM disk store.
-    if (LSUP_mdbstore_setup (mdb_ramdisk_path, true) != LSUP_OK)
-        return NULL;
-    env->mdb_store_ramdisk = LSUP_mdbstore_new (
-            mdb_ramdisk_path, env->default_ctx);
-    if (UNLIKELY (!env->mdb_store_ramdisk)) return NULL;
-    log_info ("Initialized RAM disk back end at %s.", mdb_ramdisk_path);
-
     // Get default namespace from store.
     RCNL (LSUP_mdbstore_nsm_get (env->mdb_store, &env->nsm));
 
+    // Load data types, lang tags from mdb into memory cache.
+    LSUP_mdbstore_idcache_get (env->mdb_store);
+
     return env;
 }
 
@@ -83,11 +80,25 @@ LSUP_init (void)
                 "store.", mdb_path
             );
         }
+
+        // Default permanent store.
         LSUP_default_env = LSUP_env_new (
-                DEFAULT_CTX_LABEL, mdb_path, MDB_RAMDISK_PATH, NULL);
+                DEFAULT_CTX_LABEL, mdb_path, NULL);
+
+        // RAM disk store.
+        if (LSUP_mdbstore_setup (LSUP_MDB_RAMDISK_PATH, true) != LSUP_OK)
+            log_error ("Error setting up RAM disk store.");
+
+        LSUP_default_env->mdb_store_ramdisk = LSUP_mdbstore_new (
+                LSUP_MDB_RAMDISK_PATH, LSUP_default_env->default_ctx);
+        if (UNLIKELY (!LSUP_default_env->mdb_store_ramdisk))
+            log_error ("Error setting up RAM disk store.");
+
+        log_info ("Initialized RAM disk back end at %s.", LSUP_MDB_RAMDISK_PATH);
 
         if (!LSUP_default_env) rc = LSUP_DB_ERR;
 
+        // Set automatic teardown TODO Is this a good idea?
         atexit (LSUP_done);
 
         rc = LSUP_OK;
@@ -105,6 +116,7 @@ LSUP_env_free (LSUP_Env *env)
     LSUP_buffer_free (env->default_ctx);
     LSUP_nsmap_free (env->nsm);
 
+    // Free ID cache.
     IDCache *entry, *tmp;
     HASH_ITER (hh, LSUP_id_cache, entry, tmp) {
         HASH_DEL (LSUP_id_cache, entry);

+ 34 - 34
src/namespace.c

@@ -141,39 +141,6 @@ LSUP_nsmap_get (const NSMap *map, const ns_pfx pfx)
 
 LSUP_rc
 LSUP_nsmap_normalize_uri (
-        const NSMap *map, const char *fq_uri, char **pfx_uri_p)
-{
-    char *pfx_uri = NULL;
-
-    NSIndex *entry;
-    for (entry = map->np; entry != NULL; entry = entry->hh.next) {
-        if (memcmp (entry->ns->ns, fq_uri, strlen (entry->ns->ns)) == 0)
-            break;
-    }
-
-    if (entry) {
-        pfx_uri = malloc (
-                strlen (entry->ns->pfx)
-                + strlen (fq_uri) - strlen (entry->ns->ns)
-                + 2); // one for terminating \x00, one for the colon.
-        if (UNLIKELY (! (pfx_uri))) return LSUP_MEM_ERR;
-
-        sprintf (
-                pfx_uri, "%s:%s",
-                entry->ns->pfx, fq_uri + strlen (entry->ns->ns));
-
-    }
-
-    else pfx_uri = strdup (fq_uri);
-
-    *pfx_uri_p = pfx_uri;
-
-    return LSUP_OK;
-}
-
-
-LSUP_rc
-LSUP_nsmap_denormalize_uri (
         const NSMap *map, const char *pfx_uri, char **fq_uri_p)
 {
     char *fq_uri = NULL;
@@ -209,6 +176,39 @@ LSUP_nsmap_denormalize_uri (
 }
 
 
+LSUP_rc
+LSUP_nsmap_denormalize_uri (
+        const NSMap *map, const char *fq_uri, char **pfx_uri_p)
+{
+    char *pfx_uri = NULL;
+
+    NSIndex *entry;
+    for (entry = map->np; entry != NULL; entry = entry->hh.next) {
+        if (memcmp (entry->ns->ns, fq_uri, strlen (entry->ns->ns)) == 0)
+            break;
+    }
+
+    if (entry) {
+        pfx_uri = malloc (
+                strlen (entry->ns->pfx)
+                + strlen (fq_uri) - strlen (entry->ns->ns)
+                + 2); // one for terminating \x00, one for the colon.
+        if (UNLIKELY (! (pfx_uri))) return LSUP_MEM_ERR;
+
+        sprintf (
+                pfx_uri, "%s:%s",
+                entry->ns->pfx, fq_uri + strlen (entry->ns->ns));
+
+    }
+
+    else pfx_uri = strdup (fq_uri);
+
+    *pfx_uri_p = pfx_uri;
+
+    return LSUP_OK;
+}
+
+
 const char ***
 LSUP_nsmap_dump (const NSMap *map)
 {
@@ -217,7 +217,7 @@ LSUP_nsmap_dump (const NSMap *map)
     Namespace *cur;
     for (cur = map->pn; cur != NULL; cur = cur->hh.next) i++;
 
-    const char ***data = malloc (2 * i + 1 * sizeof (char *));
+    const char ***data = malloc (2 * (i + 1) * sizeof (char *));
     if (UNLIKELY (!data)) return NULL;
 
     for (size_t j = 0; j < i; j++) {

+ 0 - 1
src/store_htable.c

@@ -1,6 +1,5 @@
 #include "uthash.h"
 
-#include "namespace.h"
 #include "store_htable.h"
 
 

+ 173 - 64
src/store_mdb.c

@@ -1,13 +1,12 @@
 #include <ftw.h>
 
-#include "uthash.h"
-
 #include "store_mdb.h"
+#include "data/bootstrap.h"
 
 /**
  * Number of DBs defined.
  */
-#define N_DB 12
+#define N_DB 13
 
 /**
  * Memory map size.
@@ -106,12 +105,6 @@ typedef struct ctx_triple_map_t {
  * Static variables.
  */
 
-/*
- * TODO At the moment up to 64-bit key / hash values are allowed. Later on,
- * 128-bit keys should be allowed by compile options, and that will no longer
- * be compatible with integer keys and data. When 128-bit keys are supported,
- * integer keys should remain available for code compiled with 64-bit keys.
- */
 #define DUPSORT_MASK        MDB_DUPSORT
 #define DUPFIXED_MASK       MDB_DUPSORT | MDB_DUPFIXED
 
@@ -119,12 +112,15 @@ typedef struct ctx_triple_map_t {
  * Main DBs. These are the master information containers.
  *
  * Data columns are: identifier prefix, DB label, flags.
+ *
+ * The number of entries must match the N_DB constant defined above.
  */
 #define MAIN_TABLE \
     ENTRY(  T_ST,    "t:st",    0               )   /* Key to ser. term */  \
     ENTRY(  SPO_C,   "spo:c",   DUPFIXED_MASK   )   /* Triple to context */ \
     ENTRY(  C_,      "c:",      0               )   /* Track empty ctx */   \
     ENTRY(  PFX_NS,  "pfx:ns",  0               )   /* Prefix to NS */      \
+    ENTRY(  IDK_ID,  "idk:id",  0               )   /* ID key to ID */      \
 
 /**
  * Lookup DBs. These are indices and may be destroyed and rebuilt.
@@ -267,7 +263,7 @@ LSUP_mdbstore_new (const char *path, const LSUP_Buffer *default_ctx)
     LSUP_MDBStore *store;
     MALLOC_GUARD (store, NULL);
     store->features = LSUP_STORE_CTX;
-    if (strcmp (path, MDB_RAMDISK_PATH) != 0)
+    if (strcmp (path, LSUP_MDB_RAMDISK_PATH) != 0)
             store->features |= LSUP_STORE_PERM;
 
     db_rc = mdb_env_create (&store->env);
@@ -283,8 +279,8 @@ LSUP_mdbstore_new (const char *path, const LSUP_Buffer *default_ctx)
     if (env_mapsize == NULL) mapsize = DEFAULT_MAPSIZE;
     else sscanf (env_mapsize, "%lu", &mapsize);
     log_info (
-            "Setting environment map size at %s to %lu bytes.",
-            path, mapsize);
+            "Setting environment map size at %s to %lu Mb.",
+            path, mapsize / 1024 / 1024);
     db_rc = mdb_env_set_mapsize (store->env, mapsize);
 
     db_rc = mdb_env_set_maxdbs (store->env, N_DB);
@@ -294,22 +290,50 @@ LSUP_mdbstore_new (const char *path, const LSUP_Buffer *default_ctx)
     if (UNLIKELY (db_rc != MDB_SUCCESS)) return NULL;
 
     // Assign DB handles to store->dbi.
-    MDB_txn *txn;
-    mdb_txn_begin (store->env, NULL, 0, &txn);
+    mdb_txn_begin (store->env, NULL, 0, &store->txn);
     for (int i = 0; i < N_DB; i++) {
-        db_rc = mdb_dbi_open (txn, db_labels[i], db_flags[i], store->dbi + i);
-        if (UNLIKELY (db_rc != MDB_SUCCESS)) {
-            mdb_txn_abort (txn);
-            return NULL;
-        }
+        db_rc = mdb_dbi_open (
+                store->txn, db_labels[i], db_flags[i], store->dbi + i);
+        if (UNLIKELY (db_rc != MDB_SUCCESS)) goto fail;
     }
 
-    mdb_txn_commit (txn);
+    if (store->features & LSUP_STORE_PERM) {
+        // Bootstrap the permanent store with initial data.
+        MDB_stat stat;
+        mdb_stat (store->txn, store->dbi[IDX_PFX_NS], &stat);
+
+        if (stat.ms_entries == 0) {
+            log_debug ("Loading initial data into %s", path);
+
+            // Load initial NS map.
+            LSUP_NSMap *nsm = LSUP_nsmap_new();
+
+            for (int i = 0; init_nsmap[i][0] != NULL; i++)
+                LSUP_nsmap_add (nsm, init_nsmap[i][0], init_nsmap[i][1]);
+
+            LSUP_mdbstore_nsm_store (store, nsm);
+
+            // Load initial IDs.
+            for (int i = 0; init_datatypes[i] != NULL; i++) {
+                char *fq_uri;
+                LSUP_nsmap_normalize_uri (nsm, init_datatypes[i], &fq_uri);
+                db_rc = LSUP_mdbstore_idcache_store (store, fq_uri);
+                free (fq_uri);
+                if (UNLIKELY (db_rc < 0)) goto fail;
+            }
+        }
+    }
 
     store->state |= LSSTORE_OPEN;
+    mdb_txn_commit (store->txn);
     store->txn = NULL;
 
     return store;
+
+fail:
+    log_error (LSUP_strerror (db_rc));
+    if (store->txn) mdb_txn_abort (store->txn);
+    return NULL;
 }
 
 
@@ -373,6 +397,10 @@ LSUP_mdbstore_add_init (LSUP_MDBStore *store, const LSUP_Buffer *sc)
     it->i = 0;
 
     // No other write transaction may be open.
+    if (UNLIKELY (it->store->txn)) {
+        log_error ("A write transaction is already open.");
+        return NULL;
+    }
     mdb_txn_begin (store->env, NULL, 0, &it->store->txn);
 
     // Take care of context first.
@@ -786,7 +814,7 @@ LSUP_mdbstore_remove(
         LSUP_MDBStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
         const LSUP_Buffer *so, const LSUP_Buffer *sc, size_t *ct)
 {
-    LSUP_rc rc = LSUP_NOACTION;
+    LSUP_rc rc = LSUP_NOACTION, db_rc;
 
     LSUP_Key ck = NULL_KEY;
 
@@ -795,13 +823,13 @@ LSUP_mdbstore_remove(
         ck = LSUP_buffer_hash (sc);
     }
 
-    MDB_txn *txn;
-
-    mdb_txn_begin (store->env, NULL, 0, &txn);
+    // No other write transaction may be open.
+    if (UNLIKELY (store->txn)) return LSUP_TXN_ERR;
+    mdb_txn_begin (store->env, NULL, 0, &store->txn);
 
     MDB_cursor *dcur, *icur;
-    mdb_cursor_open (txn, store->dbi[IDX_SPO_C], &dcur);
-    mdb_cursor_open (txn, store->dbi[IDX_C_SPO], &icur);
+    mdb_cursor_open (store->txn, store->dbi[IDX_SPO_C], &dcur);
+    mdb_cursor_open (store->txn, store->dbi[IDX_C_SPO], &icur);
 
     MDB_val spok_v, ck_v;
 
@@ -816,56 +844,59 @@ LSUP_mdbstore_remove(
     while (mdbiter_next_key (it, NULL) == LSUP_OK) {
         spok_v.mv_data = it->spok;
 
-        rc = mdb_cursor_get (dcur, &spok_v, &ck_v, MDB_GET_BOTH);
-        if (rc == MDB_NOTFOUND) continue;
-        if (UNLIKELY (rc != MDB_SUCCESS)) goto _remove_abort;
+        db_rc = mdb_cursor_get (dcur, &spok_v, &ck_v, MDB_GET_BOTH);
+        if (db_rc == MDB_NOTFOUND) continue;
+        if (UNLIKELY (db_rc != MDB_SUCCESS)) goto fail;
 
         log_trace (
                 "Removing {%lx, %lx, %lx}",
                 it->spok[0], it->spok[1], it->spok[2]);
 
         // Delete spo:c entry.
-        rc = mdb_cursor_del (dcur, 0);
-        if (UNLIKELY (rc != MDB_SUCCESS)) goto _remove_abort;
+        db_rc = mdb_cursor_del (dcur, 0);
+        if (UNLIKELY (db_rc != MDB_SUCCESS)) goto fail;
 
         // Restore ck address after each delete.
         spok_v.mv_data = it->spok;
         ck_v.mv_data = &ck;
 
         // Delete c:spo entry.
-        rc = mdb_cursor_get (icur, &ck_v, &spok_v, MDB_GET_BOTH);
-        if (rc == MDB_NOTFOUND) continue;
-        if (UNLIKELY (rc != MDB_SUCCESS)) goto _remove_abort;
+        db_rc = mdb_cursor_get (icur, &ck_v, &spok_v, MDB_GET_BOTH);
+        if (db_rc == MDB_NOTFOUND) continue;
+        if (UNLIKELY (db_rc != MDB_SUCCESS)) goto fail;
 
-        rc = mdb_cursor_del (icur, 0);
-        if (UNLIKELY (rc != MDB_SUCCESS)) goto _remove_abort;
+        db_rc = mdb_cursor_del (icur, 0);
+        if (UNLIKELY (db_rc != MDB_SUCCESS)) goto fail;
 
         spok_v.mv_data = it->spok;
         ck_v.mv_data = &ck;
 
         // If there are no more contexts associated with this triple,
         // remove from indices.
-        rc = mdb_cursor_get (dcur, &spok_v, NULL, MDB_SET);
-        if (rc == MDB_SUCCESS) continue;
-        if (UNLIKELY (rc != MDB_NOTFOUND)) goto _remove_abort;
+        db_rc = mdb_cursor_get (dcur, &spok_v, NULL, MDB_SET);
+        if (db_rc == MDB_SUCCESS) continue;
+        if (UNLIKELY (db_rc != MDB_NOTFOUND)) goto fail;
 
-        index_triple (store, OP_REMOVE, it->spok, ck);
+        rc = index_triple (store, OP_REMOVE, it->spok, ck);
     }
 
     LSUP_mdbiter_free (it);
 
-    if (UNLIKELY (mdb_txn_commit (txn) != MDB_SUCCESS)) {
+    if (UNLIKELY (mdb_txn_commit (store->txn) != MDB_SUCCESS)) {
         rc = LSUP_TXN_ERR;
-        goto _remove_abort;
+        goto fail;
     }
+    store->txn = NULL;
 
     return rc;
 
-_remove_abort:
-    mdb_txn_abort (txn);
-    log_error ("Database error: %s", LSUP_strerror (rc));
+fail:
+    mdb_txn_abort (store->txn);
+    store->txn = NULL;
 
-    return rc;
+    log_error ("Database error: %s", LSUP_strerror (db_rc));
+
+    return rc == LSUP_TXN_ERR ? rc : LSUP_DB_ERR;
 }
 
 
@@ -917,7 +948,7 @@ LSUP_mdbstore_nsm_get (LSUP_MDBStore *store, LSUP_NSMap **nsm_p)
     }
 
     MDB_val ns_v, pfx_v;
-    if (mdb_cursor_get (cur, &ns_v, &pfx_v, MDB_FIRST) != MDB_SUCCESS)
+    if (mdb_cursor_get (cur, &pfx_v, &ns_v, MDB_FIRST) != MDB_SUCCESS)
         goto finally;
 
     do {
@@ -930,7 +961,7 @@ LSUP_mdbstore_nsm_get (LSUP_MDBStore *store, LSUP_NSMap **nsm_p)
 
         free (ns);
     } while (mdb_cursor_get (
-                cur, &ns_v, &pfx_v, MDB_NEXT_NODUP) == MDB_SUCCESS);
+                cur, &pfx_v, &ns_v, MDB_NEXT_NODUP) == MDB_SUCCESS);
 
 finally:
     mdb_cursor_close (cur);
@@ -944,10 +975,7 @@ LSUP_rc
 LSUP_mdbstore_nsm_store (LSUP_MDBStore *store, const LSUP_NSMap *nsm)
 {
     MDB_txn *txn;
-    if (!store->txn) {
-        RCCK (mdb_txn_begin (store->env, NULL, 0, &txn));
-    }
-    else txn = store->txn;
+    RCCK (mdb_txn_begin (store->env, store->txn, 0, &txn));
 
     LSUP_rc rc = LSUP_NOACTION;
     int db_rc;
@@ -958,8 +986,8 @@ LSUP_mdbstore_nsm_store (LSUP_MDBStore *store, const LSUP_NSMap *nsm)
         ||
         mdb_cursor_open (txn, store->dbi[IDX_NS_PFX], &icur) != MDB_SUCCESS
     ) {
-        rc = LSUP_DB_ERR;
-        goto finally;
+        mdb_txn_abort (txn);
+        return LSUP_DB_ERR;
     }
 
     MDB_val pfx_v, ns_v;
@@ -969,34 +997,115 @@ LSUP_mdbstore_nsm_store (LSUP_MDBStore *store, const LSUP_NSMap *nsm)
         // At least 1 action. If not OK, it will change during the iteration.
         if (i == 0) rc = LSUP_OK;
 
-        pfx_v.mv_data = (void *)nsm_data[i][0];
+        pfx_v.mv_data = (void *) nsm_data[i];
         pfx_v.mv_size = strlen (nsm_data[i][0]) + 1;
-        ns_v.mv_data = (void *)nsm_data[i][1];
+        ns_v.mv_data = (void *) nsm_data[i] + 1;
         ns_v.mv_size = strlen (nsm_data[i][1]) + 1;
 
-        // If either ns or pfx exist, quit.
+        // If either ns or pfx exist, skip.
         if (
             mdb_cursor_get (dcur, &pfx_v, &ns_v, MDB_SET) != MDB_NOTFOUND
             ||
             mdb_cursor_get (icur, &ns_v, &pfx_v, MDB_SET) != MDB_NOTFOUND
         ) {
             rc = LSUP_CONFLICT;
-            goto finally;
+            continue;
         }
 
         db_rc = mdb_cursor_put (dcur, &pfx_v, &ns_v, 0);
+        db_rc |= mdb_cursor_put (icur, &ns_v, &pfx_v, 0);
         if (db_rc != MDB_SUCCESS) {
             log_error ("DB error: %s", LSUP_strerror (db_rc));
-            rc = LSUP_DB_ERR;
-            goto finally;
+            free (nsm_data);
+            return LSUP_DB_ERR;
         }
     }
+    free (nsm_data);
+
+    if (UNLIKELY (mdb_txn_commit (txn) != MDB_SUCCESS)) {
+        mdb_txn_abort (txn);
+        return LSUP_TXN_ERR;
+    }
+
+    return rc;
+}
+
+
+LSUP_rc
+LSUP_mdbstore_idcache_store (LSUP_MDBStore *store, const char *id)
+{
+    int db_rc;
+    MDB_val key, data;
+
+    MDB_txn *txn;
+    // If store->txn exists, open a child txn, otherwise parent should be NULL.
+    RCCK (mdb_txn_begin (store->env, store->txn, 0, &txn));
+
+    MDB_cursor *cur;
+    db_rc = mdb_cursor_open (txn, store->dbi[IDX_IDK_ID], &cur);
+    if (UNLIKELY (db_rc != MDB_SUCCESS)) goto fail;
+
+    uint32_t k = XXH32 (id, strlen (id) + 1, HASH_SEED);
+    key.mv_data = &k;
+    key.mv_size = sizeof (k);
+
+    data.mv_data = (void *) id;
+    data.mv_size = strlen (id) + 1;
+
+    db_rc = mdb_cursor_put (cur, &key, &data, MDB_NOOVERWRITE);
+    if (db_rc != MDB_SUCCESS && db_rc != MDB_KEYEXIST) goto fail;
+
+    if (txn != store->txn) {
+        db_rc = mdb_txn_commit (txn);
+        txn = NULL;
+        if (UNLIKELY (db_rc != MDB_SUCCESS)) goto fail;
+    }
+
+    return LSUP_OK;
+
+fail:
+    log_error (mdb_strerror (db_rc));
+    if (txn) mdb_txn_abort (txn);
+    return LSUP_DB_ERR;
+}
+
+
+LSUP_rc
+LSUP_mdbstore_idcache_get (LSUP_MDBStore *store)
+{
+    int db_rc, rc = LSUP_NOACTION;
+    MDB_txn *txn = NULL;
+    MDB_cursor *cur = NULL;
+
+    RCCK (mdb_txn_begin (store->env, NULL, MDB_RDONLY, &txn));
+
+    db_rc = mdb_cursor_open (txn, store->dbi[IDX_IDK_ID], &cur);
+    if (UNLIKELY (db_rc != MDB_SUCCESS)) {
+        log_error ("Database error: %s", LSUP_strerror (db_rc));
+        rc = LSUP_DB_ERR;
+        goto finally;
+    }
+
+    MDB_val key, data;
+    db_rc = mdb_cursor_get (cur, &key, &data, MDB_FIRST);
+    if (db_rc != MDB_NOTFOUND) rc = LSUP_OK;
+
+    while (db_rc == MDB_SUCCESS) {
+        rc = LSUP_tcache_add_id (
+                *(uint32_t *) key.mv_data, (char *) data.mv_data);
+        if (UNLIKELY (rc != LSUP_OK)) goto finally;
+
+        db_rc = mdb_cursor_get (cur, &key, &data, MDB_NEXT_NODUP);
+    }
+
+    if (db_rc != MDB_NOTFOUND) {
+        log_error ("Database error: %s", LSUP_strerror (db_rc));
+        rc = LSUP_DB_ERR;
+    }
 
 finally:
-    if (icur) mdb_cursor_close (icur);
-    if (dcur) mdb_cursor_close (dcur);
-    free (nsm_data);
-    if (txn != store->txn) mdb_txn_commit (txn);
+    if (cur) mdb_cursor_close (cur);
+    mdb_txn_abort (txn);
 
     return rc;
 }

+ 7 - 6
src/term.c

@@ -5,7 +5,7 @@
 /*
  * tpl packing format for the term structure.
  */
-#define TERM_PACK_FMT "S(suc)"
+#define TERM_PACK_FMT "S(sUc)"
 
 /*
  * Extern variables.
@@ -129,11 +129,11 @@ LSUP_term_init(
     strcpy (term->data, data);
 
     if (term->type == LSUP_TERM_LT_LITERAL) {
-        term->lang = XXH64 (metadata, strlen (metadata) + 1, HASH_SEED);
+        term->lang = XXH32 (metadata, strlen (metadata) + 1, HASH_SEED);
         LSUP_tcache_add_id (term->lang, metadata);
 
     } else if (metadata && strcmp (metadata, DEFAULT_DTYPE) != 0) {
-        term->datatype = XXH64 (metadata, strlen (metadata) + 1, HASH_SEED);
+        term->datatype = XXH32 (metadata, strlen (metadata) + 1, HASH_SEED);
         LSUP_tcache_add_id (term->datatype, metadata);
     }
 
@@ -201,7 +201,8 @@ LSUP_tcache_add_id (const uint32_t key, const char *data)
     struct id_cache_t *entry;
 
     HASH_FIND_INT (LSUP_id_cache, &key, entry);
-    if (entry) return LSUP_NOACTION;
+    // Many calls will likely attempt inserting duplicates after the first one.
+    if (LIKELY (entry)) return LSUP_NOACTION;
 
     MALLOC_GUARD (entry, LSUP_MEM_ERR);
     entry->key = key;
@@ -218,8 +219,8 @@ LSUP_tcache_get_id (const uint32_t key)
     struct id_cache_t *entry;
 
     HASH_FIND_INT (LSUP_id_cache, &key, entry);
-    if (entry) log_trace ("Id found for key %d: %s", key, entry->data);
-    else log_trace ("No ID found for key %d.", key);
+    if (entry) log_trace ("Id found for key %u: %s", key, entry->data);
+    else log_trace ("No ID found for key %u.", key);
 
     return (entry) ? entry->data : NULL;
 }

+ 5 - 1
test.c

@@ -5,11 +5,15 @@
 #include "test_store_mdb.c"
 #include "test_graph.c"
 
+#define TEST_STORE_PATH TMPDIR "/lsup_test_mdb"
+
 int main(int argc, char **argv) {
 
     // Set env variable to test path.
-    putenv ("LSUP_MDB_STORE_PATH=" TMPDIR "/lsup_test_mdb");
+    putenv ("LSUP_MDB_STORE_PATH=" TEST_STORE_PATH);
     // Clear out database from previous test.
+    rm_r (TEST_STORE_PATH);
+
     LSUP_init();
 
     int rc;

+ 2 - 2
test/test_namespace.c

@@ -24,11 +24,11 @@ test_namespace()
     char *fq_uri, *pfx_uri;
     fq_uri = "http://purl.org/dc/elements/1.1/title";
 
-    EXPECT_PASS (LSUP_nsmap_normalize_uri (nsm, fq_uri, &pfx_uri));
+    EXPECT_PASS (LSUP_nsmap_denormalize_uri (nsm, fq_uri, &pfx_uri));
     EXPECT_STR_EQ (pfx_uri, "dc:title");
 
     fq_uri = NULL;
-    EXPECT_PASS (LSUP_nsmap_denormalize_uri (nsm, pfx_uri, &fq_uri));
+    EXPECT_PASS (LSUP_nsmap_normalize_uri (nsm, pfx_uri, &fq_uri));
     EXPECT_STR_EQ (fq_uri, "http://purl.org/dc/elements/1.1/title");
 
     EXPECT_PASS (LSUP_nsmap_remove (nsm, "dc"));

+ 4 - 4
test/test_term.c

@@ -4,7 +4,7 @@
 static int test_term_new()
 {
     char *data = "hello";
-    char *datatype = "xsd:string";
+    char *datatype = "urn:my:datatype";
 
     LSUP_Term *term = LSUP_term_new (LSUP_TERM_LITERAL, data, datatype);
     EXPECT_STR_EQ (term->data, data);
@@ -36,9 +36,9 @@ static int test_term_serialize_deserialize()
 
     sterm = LSUP_buffer_new_from_term (uri);
     ASSERT (sterm != NULL, "Error serializing term!");
-    //log_info ("%s", "Serialized URI: ");
-    //LSUP_buffer_print (sterm);
-    //log_info ("%s", "\n");
+    log_info ("%s", "Serialized URI: ");
+    LSUP_buffer_print (sterm);
+    log_info ("%s", "\n");
     dsterm = LSUP_term_new_from_buffer (sterm);
     ASSERT (dsterm != NULL, "Error deserializing term!");
     ASSERT (LSUP_term_equals (dsterm, uri), "URI serialization error!");