Просмотр исходного кода

Replace index structure with klib macros.

Stefano Cossu 4 лет назад
Родитель
Сommit
a3ffbb328b
9 измененных файлов с 92 добавлено и 35 удалено
  1. 9 0
      .gitmodules
  2. 1 2
      Makefile
  3. 1 0
      ext/klib
  4. 1 0
      ext/libcork
  5. 1 0
      ext/qlibc
  6. 2 0
      include/buffer.h
  7. 1 1
      include/graph.h
  8. 10 0
      src/buffer.c
  9. 66 32
      src/graph.c

+ 9 - 0
.gitmodules

@@ -4,3 +4,12 @@
 [submodule "ext/uuid4"]
 	path = ext/uuid4
 	url = https://github.com/rxi/uuid4.git
+[submodule "ext/libcork"]
+	path = ext/libcork
+	url = https://github.com/dcreager/libcork.git
+[submodule "ext/klib"]
+	path = ext/klib
+	url = https://github.com/attractivechaos/klib.git
+[submodule "ext/qlibc"]
+	path = ext/qlibc
+	url = https://github.com/wolkykim/qlibc.git

+ 1 - 2
Makefile

@@ -11,7 +11,7 @@ check:
 build:
 	gcc -g -Wall \
 		-std=c99 \
-		-Iinclude -Iext/xxHash \
+		-Iinclude -Iext/xxHash -Iext/klib \
 		-luuid \
 		ext/xxHash/xxhash.c src/*.c \
 		-o bin/lsup_rdf.so
@@ -20,7 +20,6 @@ test:
 	gcc -g -Wall \
 		-std=c99 \
 		-DDEBUG \
-		-Iinclude -Iext/xxHash -Itest \
 		-luuid \
 		ext/xxHash/xxhash.c src/*.c test.c \
 		-o bin/test

+ 1 - 0
ext/klib

@@ -0,0 +1 @@
+Subproject commit 928581a78413bed4efa956731b35b18a638f20f3

+ 1 - 0
ext/libcork

@@ -0,0 +1 @@
+Subproject commit f7cf650a2f3bb1d47270978f52600bfd79d77c2e

+ 1 - 0
ext/qlibc

@@ -0,0 +1 @@
+Subproject commit 25d5f5ce44ec4c863edbeaecdcb4d3c05dcf3aa7

+ 2 - 0
include/buffer.h

@@ -15,6 +15,8 @@ int LSUP_buffer_init(LSUP_Buffer *buf, size_t size);
 
 void LSUP_buffer_print(const LSUP_Buffer *buf);
 
+int LSUP_buffer_copy(LSUP_Buffer *dest, const LSUP_Buffer *src);
+
 void LSUP_buffer_done(LSUP_Buffer *buf);
 
 

+ 1 - 1
include/graph.h

@@ -35,7 +35,7 @@ LSUP_Graph *
 LSUP_graph_new(size_t capacity, char *uri_str, LSUP_store_type store_type);
 
 int
-LSUP_graph_copy(LSUP_Graph *src, LSUP_Graph *dest);
+LSUP_graph_copy(LSUP_Graph *dest, LSUP_Graph *src);
 
 size_t
 LSUP_graph_capacity(LSUP_Graph *gr);

+ 10 - 0
src/buffer.c

@@ -46,6 +46,16 @@ void LSUP_buffer_print(const LSUP_Buffer *buf) {
 }
 
 
+int LSUP_buffer_copy(LSUP_Buffer *dest, const LSUP_Buffer *src)
+{
+    LSUP_buffer_init(dest, src->size);
+
+    memcpy(dest->addr, src->addr, src->size);
+
+    return LSUP_OK;
+}
+
+
 void LSUP_buffer_done(LSUP_Buffer *buf){
     if (buf->addr != NULL) {
         TRACE(STR, "Freeing buffer.");

+ 66 - 32
src/graph.c

@@ -1,3 +1,5 @@
+#include "khash.h"
+
 #include "graph.h"
 
 // Initial size of lookup graph. It will double each time capacity is reached.
@@ -6,9 +8,16 @@
 typedef enum KSetFlag {
     LSUP_KS_NONE        = 0,
     LSUP_KS_CHECK_CAP   = 1 << 0,
-    LSUP_KS_CHECK_DUP   = 1 << 1
+    LSUP_KS_CHECK_DUP   = 1 << 1,
 } KSetFlag;
 
+enum {
+    HS_ERROR        = -1,
+    HS_PRESENT      = 0,
+    HS_EMPTY        = 1,
+    HS_TOMBSTONE    = 2,
+};
+
 typedef struct Keyset {
     LSUP_TripleKey  *data;
     size_t          capacity;
@@ -16,16 +25,22 @@ typedef struct Keyset {
     size_t          free_i;
 } Keyset;
 
-typedef struct IndexEntry {
-    LSUP_Key key;
-    LSUP_SerTerm *val;
-} IndexEntry;
+/**
+ * Index (identity) hashing function.
+ *
+ * Since the key is already a strong hash, reuse it for bucket allocation.
+ */
+static inline LSUP_Key idx_key_hash_fn(LSUP_Key key)
+{ return key; }
+
+KHASH_INIT(
+        Index, LSUP_Key, LSUP_Buffer, 1, idx_key_hash_fn, kh_int_hash_equal)
 
 typedef struct Graph {
-    LSUP_store_type store_type;
-    Keyset *keys;
-    LSUP_Term *uri;
-    struct Index *idx;
+    LSUP_store_type store_type;     // In-memory or MDB-backed
+    Keyset *keys;                   // Key arrangements in triples
+    LSUP_Term *uri;                 // Graph "name" (URI)
+    khash_t(Index) *idx;            // Dictionary of keys to serialized terms
 } Graph;
 
 
@@ -265,7 +280,8 @@ LSUP_graph_init(
 
     switch (store_type ) {
         case LSUP_STORE_MEM:
-            gr->idx = idx_new(gr->keys->capacity, 0.8, &idx_fkey, &idx_fval);
+
+            gr->idx = kh_init_Index();
             break;
 
         case LSUP_STORE_MDB:
@@ -309,7 +325,7 @@ static int graph_copy_contents(LSUP_Graph *src, LSUP_Graph *dest)
 
 
 int
-LSUP_graph_copy(LSUP_Graph *src, LSUP_Graph *dest)
+LSUP_graph_copy(LSUP_Graph *dest, LSUP_Graph *src)
 {
     LSUP_graph_init(dest, src->keys->capacity, NULL, src->store_type);
 
@@ -338,23 +354,25 @@ LSUP_graph_add_triple(LSUP_Graph *gr, const LSUP_Triple *spo)
     LSUP_term_serialize(spo->p, sspo + 1);
     LSUP_term_serialize(spo->o, sspo + 2);
 
-    LSUP_Key sk = LSUP_sterm_to_key(sspo);
-    LSUP_Key pk = LSUP_sterm_to_key(sspo + 1);
-    LSUP_Key ok = LSUP_sterm_to_key(sspo + 2);
+    LSUP_TripleKey spok = {0, 0, 0};
 
-    // Add terms to index.
-    // TODO Optimize. This copies sterm data twice: once to serialize,
-    // once to add to index.
-    idx_insert(gr->idx, sk, sspo);
-    idx_insert(gr->idx, pk, sspo + 1);
-    idx_insert(gr->idx, ok, sspo + 2);
+    // Add term to index.
+    int status;
+    khiter_t cur;
+    for (int i = 0; i < 3; i++) {
+        spok[i] = LSUP_sterm_to_key(sspo + i);
 
-    LSUP_buffer_done(sspo);
-    LSUP_buffer_done(sspo + 1);
-    LSUP_buffer_done(sspo + 2);
+        cur = kh_put_Index(gr->idx, spok[i], &status);
+        if (status == HS_EMPTY || status == HS_TOMBSTONE) {
+            // If term is not indexed, store the struct in the index.
+            kh_value(gr->idx, cur) = sspo[i];
+        } else {
+            // If term is already in the index, discard and free it.
+            LSUP_buffer_done(sspo + i);
+        }
+    }
 
     // Add triple.
-    LSUP_TripleKey spok = {sk, pk, ok};
     keyset_add(gr->keys, &spok, LSUP_KS_CHECK_DUP);
 
     return LSUP_OK;
@@ -469,7 +487,7 @@ int LSUP_graph_match_callback(
 
     } else {
         printf("WARNING: no bound terms, making a compact copy.\n");
-        return LSUP_graph_copy(gr, res);
+        return LSUP_graph_copy(res, gr);
     }
 
     do {
@@ -494,7 +512,7 @@ int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo)
 
 int LSUP_graph_join(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 {
-    LSUP_graph_copy(gr1, res);
+    LSUP_graph_copy(res, gr1);
 
     return graph_copy_contents(gr2, res);
 }
@@ -539,10 +557,10 @@ int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 int LSUP_graph_xor(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 {
     if (!keyset_seek(gr1->keys, 0))
-        return LSUP_graph_copy(gr2, res);
+        return LSUP_graph_copy(res, gr2);
 
     if (!keyset_seek(gr2->keys, 0))
-        return LSUP_graph_copy(gr1, res);
+        return LSUP_graph_copy(res, gr1);
 
     LSUP_graph_init(res, gr1->keys->capacity, NULL, LSUP_STORE_MEM);
 
@@ -570,7 +588,14 @@ LSUP_graph_free(LSUP_Graph *gr)
     if (LIKELY(gr != NULL)) {
         LSUP_term_free(gr->uri);
         keyset_free(gr->keys);
-        idx_free(gr->idx);
+
+        // Free up index entries and index.
+        for(khiter_t i = kh_begin(gr->idx); i != kh_end(gr->idx); ++i) {
+            if(kh_exist(gr->idx, i))
+                LSUP_buffer_done(&kh_value(gr->idx, i));
+        }
+        kh_destroy_Index(gr->idx);
+
         free(gr);
     }
 }
@@ -594,9 +619,18 @@ int match_add_fn(LSUP_Graph *src, LSUP_Graph *dest, void *ctx)
 
     memcpy(dest->keys->data + dest->keys->free_i, spok, TRP_KLEN);
 
-    idx_insert(dest->idx, *spok[0], idx_get(src->idx, *spok[0]));
-    idx_insert(dest->idx, *spok[1], idx_get(src->idx, *spok[1]));
-    idx_insert(dest->idx, *spok[2], idx_get(src->idx, *spok[2]));
+    // Add term to index.
+    int status;
+    for (int i = 0; i < 3; i++) {
+        khiter_t cur = kh_put_Index(dest->idx, *spok[i], &status);
+        if (
+                kh_exist(src->idx, *spok[i] &&
+                (status == HS_EMPTY || status == HS_TOMBSTONE))) {
+            // If term is not indexed, store the struct in the index.
+            LSUP_buffer_copy(
+                    &kh_value(dest->idx, cur), &kh_value(src->idx, cur));
+        }
+    }
 
     dest->keys->cur = dest->keys->free_i;
     dest->keys->free_i ++;