Browse Source

Initial MDB integration work.

Stefano Cossu 4 years ago
parent
commit
58187aafa1
6 changed files with 332 additions and 9 deletions
  1. 10 5
      Makefile
  2. 1 0
      include/core.h
  3. 0 3
      include/htable.h
  4. 5 0
      include/store_mdb.h
  5. 2 1
      src/graph.c
  6. 314 0
      src/store_mdb.c

+ 10 - 5
Makefile

@@ -15,7 +15,9 @@ build:
 		-std=gnu99 \
 		-Iinclude -Iext/xxHash -Iext/openldap/libraries/liblmdb \
 		-luuid \
-		ext/xxHash/xxhash.c src/*.c \
+		ext/xxHash/xxhash.c ext/openldap/libraries/liblmdb/midl.c \
+		ext/openldap/libraries/liblmdb/mdb.c \
+		src/*.c \
 		-o bin/lsup_rdf.so
 
 test:
@@ -23,15 +25,18 @@ test:
 		-std=gnu99 \
 		-DDEBUG \
 		-Iinclude -Iext/xxHash -Iext/openldap/libraries/liblmdb -Itest \
-		-luuid \
-		ext/xxHash/xxhash.c src/*.c test.c \
+		-luuid -lpthread \
+		ext/xxHash/xxhash.c ext/openldap/libraries/liblmdb/midl.c \
+		ext/openldap/libraries/liblmdb/mdb.c \
+		src/*.c test.c \
 		-o bin/test
 
 profile:
 	gcc -g -Wall \
-<<<<<<< HEAD
 		-std=gnu99 \
 		-Iinclude -Iext/xxHash -Iext/openldap/libraries/liblmdb \
 		-luuid \
-		ext/xxHash/xxhash.c src/*.c profile.c \
+		ext/xxHash/xxhash.c ext/openldap/libraries/liblmdb/midl.c \
+		ext/openldap/libraries/liblmdb/mdb.c \
+		src/*.c profile.c \
 		-o bin/profile

+ 1 - 0
include/core.h

@@ -2,6 +2,7 @@
 #define _LSUP_CORE_H
 
 #include <ctype.h>
+#include <inttypes.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>

+ 0 - 3
include/htable.h

@@ -23,9 +23,6 @@
 #ifndef _LSUP_HTABLE_H
 #define _LSUP_HTABLE_H
 
-#include <inttypes.h>
-#include <stdbool.h>
-
 #include "core.h"
 
 // Max number of entries in the table. With HTABLE_BIG_SIZE, it is SIZE_MAX.

+ 5 - 0
include/store_mdb.h

@@ -0,0 +1,5 @@
+#include "lmdb.h"
+#include "core.h"
+
+MDB_env *LSUP_mdbenv;
+

+ 2 - 1
src/graph.c

@@ -297,7 +297,8 @@ LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo)
 }
 
 
-int LSUP_graph_match_callback(
+int
+LSUP_graph_match_callback(
         LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo,
         keyset_match_fn_t callback_fn, bool match_cond, void *ctx)
 {

+ 314 - 0
src/store_mdb.c

@@ -0,0 +1,314 @@
+#include <endian.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+#include "store_mdb.h"
+
+
+#if __BYTE_ORDER == __BIG_ENDIAN  // FIXME not portable.
+  #define REVERSEKEY 0
+  #define REVERSEDUP 0
+#else
+  #define REVERSEKEY MDB_REVERSEKEY
+  #define REVERSEDUP MDB_REVERSEDUP
+#endif
+
+#define INT_KEY_MASK MDB_INTEGERKEY | REVERSEKEY
+#define INT_DUP_KEY_MASK MDB_DUPSORT | MDB_DUPFIXED \
+    | MDB_INTEGERKEY | REVERSEKEY | REVERSEDUP
+
+#define INT_DUP_MASK MDB_DUPSORT | MDB_DUPFIXED \
+    | MDB_INTEGERDUP | REVERSEKEY | REVERSEDUP
+
+/**
+ * Number of DBs defined.
+ */
+#define N_DB 13
+
+#define ENV_DIR_MODE 0750
+#define ENV_FILE_MODE 0640
+
+
+typedef char DbLabel[8];
+
+
+/**
+ * Static handles.
+ */
+static char *env_path = NULL;
+static bool env_init = false;
+static bool db_init = false;
+
+/**
+ * Main DB labels
+ */
+// Term key to serialized term
+static const DbLabel DB_T_ST = "t:st";
+// Joined triple keys to context key
+static const DbLabel DB_SPO_C = "spo:c";
+// This has empty values and is used to keep track of empty contexts.
+static const DbLabel DB_C_ = "c:";
+// Prefix to namespace
+static const DbLabel DB_PFX_NS = "pfx:ns";
+
+/**
+ * Indices
+ */
+// Namespace to prefix
+static const DbLabel DB_NS_PFX = "ns:pfx";
+// Term hash to term key
+static const DbLabel DB_TH_T = "th:t";
+// 1-bound lookups
+static const DbLabel DB_S_PO = "s:po";
+static const DbLabel DB_P_SO = "p:so";
+static const DbLabel DB_O_SP = "o:sp";
+// 2-bound lookups
+static const DbLabel DB_PO_S = "po:s";
+static const DbLabel DB_SO_P = "so:p";
+static const DbLabel DB_SP_O = "sp:o";
+// Context lookup
+static const DbLabel DB_C_SPO = "c:spo";
+
+/**
+ * Order in which keys are looked up if two terms are bound.
+ * The indices with the smallest average number of values per key should be
+ * looked up first.
+ *
+ * 0 = s:po
+ * 1 = p:so
+ * 2 = o:sp
+ */
+static const uint8_t lookup_rank[3] = {0, 2, 1};
+
+static const uint8_t lookup_ordering_1bound[3][3] = {
+    {0, 1, 2}, // spo
+    {1, 0, 2}, // pso
+    {2, 0, 1}, // osp
+};
+
+static const uint8_t lookup_ordering_2bound[3][3] = {
+    {1, 2, 0}, // po:s
+    {0, 2, 1}, // so:p
+    {0, 1, 2}, // sp:o
+};
+
+/**
+ * Index of each DB in the following constants.
+ */
+typedef enum {
+    IDX_T_ST,
+    IDX_SPO_C,
+    IDX_C_,
+    IDX_PFX_NS,
+    IDX_NS_PFX,
+    IDX_TH_T,
+    IDX_S_PO,
+    IDX_P_SO,
+    IDX_O_SP,
+    IDX_PO_S,
+    IDX_SO_P,
+    IDX_SP_O,
+    IDX_C_SPO,
+} DBIdx;
+
+/**
+ * DB indices. These are populated on init.
+ */
+static MDB_dbi dbis[N_DB];
+
+/**
+ * DB labels.
+ */
+static const char *db_labels[N_DB] = {
+    DB_T_ST,
+    DB_SPO_C,
+    DB_C_,
+    DB_PFX_NS,
+    DB_NS_PFX,
+    DB_TH_T,
+    DB_S_PO,
+    DB_P_SO,
+    DB_O_SP,
+    DB_PO_S,
+    DB_SO_P,
+    DB_SP_O,
+    DB_C_SPO,
+};
+
+/**
+ * DB flags. These are aligned with the dbi_labels index.
+ */
+static const unsigned int db_flags[N_DB] = {
+    INT_KEY_MASK,
+    INT_DUP_KEY_MASK,
+    0,
+    INT_DUP_KEY_MASK,
+    0,
+    INT_DUP_MASK,
+    INT_DUP_KEY_MASK,
+    INT_DUP_MASK,
+    INT_DUP_MASK,
+    INT_DUP_MASK,
+    INT_DUP_KEY_MASK,
+    0,
+    INT_KEY_MASK,
+};
+
+/**
+ * 1-bound and 2-bound lookup indices.
+ */
+static DBIdx lookup_indices[6] = {
+    IDX_S_PO,
+    IDX_P_SO,
+    IDX_O_SP,
+    IDX_PO_S,
+    IDX_SO_P,
+    IDX_SP_O,
+};
+
+
+/**
+ * Static prototypes.
+ */
+static int _dbi_init(bool create);
+static int _mdbenv_init(bool create);
+static void _env_cleanup();
+
+
+/**
+ * API.
+ */
+int
+LSUP_store_open(bool create)
+{
+    if(UNLIKELY(!env_init)) _mdbenv_init(create);
+
+    atexit(_env_cleanup);
+
+    int rc = mdb_env_open(LSUP_mdbenv, env_path, 0, ENV_FILE_MODE);
+
+    if (UNLIKELY(!db_init)) rc |= _dbi_init(create);
+
+    return rc;
+}
+
+
+int
+LSUP_store_stats()
+{
+    // TODO
+    MDB_stat env_stat, db_stats[N_DB];
+    return 0;
+}
+
+
+size_t
+LSUP_store_size(MDB_txn *txn)
+{
+    if (!env_init) return 0;
+
+    MDB_stat stat;
+    mdb_stat(txn, dbis[IDX_SPO_C], &stat);
+
+    return stat.ms_entries;
+}
+
+
+/**
+ * Static functions.
+ */
+
+/**
+ * @brief Create and initialize the MDB env.
+ *
+ * This function takes care of creaating the environment path if not existing,
+ * and checking that it's a writable directory. If the path is not specified
+ * in the LSUP_STORE_PATH environment variable, a temporary file is used.
+ *
+ * This should be only called once per program run, therefore it's best to let
+ * LSUP_mdb_env_open() call it when needed.
+ *
+ * TODO create is not used; either use it or get rid of it.
+ */
+static int
+_mdbenv_init(bool create)
+{
+    env_path = getenv("LSUP_STORE_PATH");
+    struct stat path_stat;
+    int rc;
+
+    // Set environment path.
+    if (env_path == NULL) {
+        printf(
+                "WARNING: `LSUP_STORE_PATH' environment variable is not set. "
+                "A temporary store path will be open and will be DESTROYED "
+                "at the closing of the program.");
+        env_path = mkdtemp("lsup_mdb-XXXXXX");
+    } else {
+
+        rc = stat(env_path, &path_stat);
+        if (rc == ENOENT) {
+            if (mkdir(env_path, ENV_DIR_MODE) != 0) abort();
+        } else {
+            if (!S_ISDIR(path_stat.st_mode)) abort();
+        }
+    }
+
+    mdb_env_create(&LSUP_mdbenv);
+
+    // Set map size.
+    size_t mapsize;
+    char *env_mapsize = getenv("LSUP_MDB_MAPSIZE");
+
+    if (env_mapsize == NULL) {
+        mapsize = 1024LU << 30; // 1Tb
+    } else {
+        sscanf(env_mapsize, "%lu", &mapsize);
+    }
+
+    mdb_env_set_mapsize(LSUP_mdbenv, mapsize);
+
+    mdb_env_set_maxdbs(LSUP_mdbenv, N_DB);
+
+    env_init = true;
+
+    return rc;
+
+}
+
+
+/**
+ * @brief Open and allocate DB handles in an array.
+ *
+ * @param bool create [in]: If true, the DBs are created. This is only needed
+ *  on bootstrap.
+ */
+static int
+_dbi_init(bool create)
+{
+    MDB_txn *txn;
+    unsigned int txn_flags = create ? 0 : MDB_RDONLY;
+    unsigned int create_flag = create ? MDB_CREATE : 0;
+
+    mdb_txn_begin(LSUP_mdbenv, NULL, txn_flags, &txn);
+
+    for (int i = 0; i < N_DB; i++) {
+        mdb_dbi_open(txn, db_labels[i], db_flags[i] | create_flag, dbis + i);
+    }
+
+    mdb_txn_commit(txn);
+
+    return 0;
+}
+
+
+/* TODO */
+static void
+_env_cleanup()
+{
+    if (env_init) {
+        printf("Cleaning up MDB env.\n");
+        mdb_env_close(LSUP_mdbenv);
+    }
+}
+