Browse Source

Merge branch 'master' into lmdb

Stefano Cossu 4 years ago
parent
commit
0911407e74
10 changed files with 1001 additions and 218 deletions
  1. 6 5
      Makefile
  2. 8 0
      include/core.h
  3. 2 3
      include/graph.h
  4. 165 0
      include/htable.h
  5. 51 0
      profile.c
  6. 156 175
      src/graph.c
  7. 446 0
      src/htable.c
  8. 2 1
      test.c
  9. 7 34
      test/test_graph.c
  10. 158 0
      test/test_htable.c

+ 6 - 5
Makefile

@@ -12,7 +12,7 @@ check:
 
 build:
 	gcc -g -Wall \
-		-std=c99 \
+		-std=gnu99 \
 		-Iinclude -Iext/xxHash -Iext/openldap/libraries/liblmdb \
 		-luuid \
 		ext/xxHash/xxhash.c src/*.c \
@@ -20,7 +20,7 @@ build:
 
 test:
 	gcc -g -Wall \
-		-std=c99 \
+		-std=gnu99 \
 		-DDEBUG \
 		-Iinclude -Iext/xxHash -Iext/openldap/libraries/liblmdb -Itest \
 		-luuid \
@@ -29,8 +29,9 @@ test:
 
 profile:
 	gcc -g -Wall \
-		-std=c99 \
-		-Iinclude -Iext/xxHash -Iext/openldap/libraries/liblmdb -Itest \
+<<<<<<< HEAD
+		-std=gnu99 \
+		-Iinclude -Iext/xxHash -Iext/openldap/libraries/liblmdb \
 		-luuid \
-		ext/xxHash/xxhash.c src/*.c test.c \
+		ext/xxHash/xxhash.c src/*.c profile.c \
 		-o bin/profile

+ 8 - 0
include/core.h

@@ -34,6 +34,13 @@
 
 # define UUIDSTR_SIZE 37
 
+// Handy flags operations.
+#define SET_FLAG(n, f) ((n) |= (f))
+#define CLR_FLAG(n, f) ((n) &= ~(f))
+#define TGL_FLAG(n, f) ((n) ^= (f))
+#define CHK_FLAG(n, f) ((n) & (f))
+
+
 
 /* * * RETURN CODES * * */
 
@@ -49,6 +56,7 @@
 
 #define LSUP_ERROR          (-88801)
 #define LSUP_PARSE_ERR      (-88802)
+#define LSUP_VALUE_ERR      (-88803)
 
 
 typedef size_t LSUP_Key;

+ 2 - 3
include/graph.h

@@ -1,8 +1,6 @@
 #ifndef _LSUP_GRAPH_H
 #define _LSUP_GRAPH_H
 
-#include "khash.h"
-
 #include "triple.h"
 
 
@@ -28,7 +26,8 @@ typedef struct Graph LSUP_Graph;
  *  by the callback.
 */
 typedef int (*keyset_match_fn_t)(
-        LSUP_Graph *src, LSUP_Graph *dest, khiter_t cur, void *ctx);
+        LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
+        void *ctx);
 
 
 int

+ 165 - 0
include/htable.h

@@ -0,0 +1,165 @@
+/**
+ * Hash table implementation.
+ *
+ * This code is hack...ahem, built upon Klib:
+ * https://github.com/attractivechaos/klib/blob/master/khash.h
+ *
+ * After trying several hash map implementations, none met all the requirements
+ * (small, single-file; accept arbitrarily-sized elements; not an unsightly
+ * macro mess; reasonably fast), so I decided to expand a KLib macro and adapt
+ * it to a data type agnostic model.
+ *
+ * This table stores keys and optionally values as unspecified null pointers of
+ * arbitrary, but fixed, data sizes. For small keys / values of unusual size,
+ * this is convenient because it avoids creating (and having to manage) a
+ * pointer for each key and value. Data are all stored inline. The data types
+ * are set by casting on retrieval.
+ *
+ * For larger or variably-sized keys or values, or ones that are not convenient
+ * to copy into the table, pointers can obviously be used by specifying ptr_t
+ * key and/or value size.
+ */
+
+#ifndef _LSUP_HTABLE_H
+#define _LSUP_HTABLE_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "core.h"
+
+// Max number of entries in the table. With HTABLE_BIG_SIZE, it is SIZE_MAX.
+// Otherwise, UINT_MAX (4,294,967,295).
+#ifdef HTABLE_BIG_SIZE
+typedef size_t htsize_t;
+#define HTSIZE_MAX SIZE_MAX
+#else
+typedef uint32_t htsize_t;
+#define HTSIZE_MAX UINT32_MAX
+#endif
+
+// Size of key entries. With HTABLE_BIG_KEY it is 65535 (64Kb). Otherwise,
+// it is 256 bytes.
+#ifdef HTABLE_BIG_KEY
+typedef uint16_t ksize_t;
+#else
+typedef uint8_t ksize_t;
+#endif
+
+// Size of value entries. With HTABLE_BIG_KEY it is 65535 (64Kb). Otherwise,
+// it is 256 bytes. For values that may be larger than 64 Kb, use pointers.
+#ifdef HTABLE_BIG_VAL
+typedef uint16_t vsize_t;
+#else
+typedef uint8_t vsize_t;
+#endif
+
+
+typedef enum {
+    HTABLE_NOCOPY           = 1 << 0,
+    HTABLE_IS_SET           = 1 << 1,
+} LSUP_HTFlag;
+
+
+/**
+ * Key hashing function.
+ *
+ * Takes a void pointer, a key length and a seed.
+ */
+typedef uint64_t (*key_hash_fn_t)(
+        const void *key, ksize_t size, uint64_t seed);
+
+/**
+ * Key equality function (true: keys are equal).
+ *
+ * Takes two void pointers and a key length (which is constant within the
+ * hash table).
+ */
+typedef bool (*key_eq_fn_t)(const void *a, const void *b, ksize_t size);
+
+/**
+ * Hash table type.
+ *
+ * Supports up to UINT_MAX entries (~4 billions on most modern machines).
+ *
+ * If compiled with -DHTABLE_BIG_SIZE it supports up to size_t entries
+ * for extremely large in-memory graphs.
+ */
+typedef struct htable_t LSUP_HTable;
+
+extern LSUP_HTable *LSUP_htable_new(
+        htsize_t size, ksize_t ksize, vsize_t vsize,
+        key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, unsigned flags);
+
+extern int LSUP_htable_resize(LSUP_HTable *ht, htsize_t newsize);
+
+extern htsize_t LSUP_htable_capacity(LSUP_HTable *ht);
+
+extern htsize_t LSUP_htable_size(LSUP_HTable *ht);
+
+extern int LSUP_htable_insert(LSUP_HTable *ht, const void *key, void *val);
+
+extern int LSUP_htable_put(LSUP_HTable *ht, const void *key, void *val);
+
+/**
+ * @brief Test the existence of a given key and find its value.
+ *
+ * @param LSUP_HTable ht[in]: Hash table or set.
+ *
+ * @param const void *key[in]: Key to look up.
+ *
+ * @param void *val[out]: Pointer to be set to the address of the value found
+ * at the key address, if any. If NULL is passed, or if the hash table is a
+ * set, the value is never populated.
+ *
+ * @return int: LSUP_OK if the key is found; LSUP_NORESULT if the key is not
+ *  found; a negative value on error.
+ */
+extern int LSUP_htable_get(
+        const LSUP_HTable *ht, const void *key, void **valp);
+
+/*
+ * Remove the given key.
+ *
+ * @param LSUP_HTable ht[in]: Hash table or set.
+ *
+ * @param const void *key[in]: Key to remove.
+ *
+ * @return int: LSUP_OK if the key was removed; LSUP_NOACTION if it was not
+ *  found.
+ *
+ */
+extern int LSUP_htable_del(LSUP_HTable *ht, const void *key);
+
+/**
+ * Iterate over a hashmap or set.
+ *
+ * @param LSUP_HTable ht[in]: Hash table or set.
+ *
+ * @param htsize_t *cur[in]: an integer used as a cursor. Each successful
+ *  iteration of the function increases this value by 1. So the correct use
+ *  for this is to initialize a htsize_t variable to zero and passing its
+ *  pointer in a loop until necessary.
+ *
+ * @param void *key[out]: Pointer to be populated with the next key found.
+ *
+ * @param void **valp[out]: Pointer to the found value address. This can be
+ *  used as a normal lvalue. It may be NULL for sets or if the value is not
+ *  needed.
+ *
+ * @return int: LSUP_OK if the key is found; LSUP_END if the end of the data
+ *  is reached.
+ */
+extern int LSUP_htable_iter(
+        LSUP_HTable *ht, htsize_t *cur, void **keyp, void **valp);
+
+/*
+ * Free the memory used by the hash table.
+ *
+ * => It is the responsibility of the caller to remove elements if needed.
+ */
+extern void LSUP_htable_done(LSUP_HTable *ht);
+
+extern void LSUP_htable_free(LSUP_HTable *ht);
+
+#endif

+ 51 - 0
profile.c

@@ -0,0 +1,51 @@
+#include <time.h>
+#include "graph.h"
+
+#ifndef NT
+#define NT 1000000
+#endif
+
+static int test_graph_add_batch()
+{
+    size_t nt = NT;
+
+    LSUP_Triple *trp;
+    CRITICAL(trp = malloc(nt * sizeof(LSUP_Triple)));
+    for (size_t i = 0; i < nt; i++) {
+        //printf("i: %lu\n", i);
+        trp[i].s = LSUP_term_new(
+                LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL);
+        trp[i].p = LSUP_term_new(
+                LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL);
+        trp[i].o = LSUP_term_new(
+                LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL);
+    }
+    TRACE(STR, "Triples generated.");
+
+    LSUP_Graph *gr = LSUP_graph_new(nt, NULL, LSUP_STORE_MEM);
+
+    LSUP_graph_add(gr, trp, nt);
+    TRACE(STR, "Graph populated.");
+
+    LSUP_graph_free(gr);
+
+    return 0;
+}
+
+
+int main()
+{
+    int rc;
+    clock_t start, end;
+    double wallclock;
+
+    start = clock();
+    rc = test_graph_add_batch();
+    end = clock();
+
+    wallclock = (end - start) / CLOCKS_PER_SEC;
+
+    printf("Time elapsed: %lf s\n", wallclock);
+
+    return rc;
+}

+ 156 - 175
src/graph.c

@@ -1,3 +1,4 @@
+#include "htable.h"
 #include "graph.h"
 
 // Initial size of lookup graph. It will double each time capacity is reached.
@@ -15,57 +16,32 @@ typedef enum KSetFlag {
     LSUP_KS_CHECK_DUP   = 1 << 1,
 } KSetFlag;
 
-enum {
-    HS_ERROR        = -1,
-    HS_PRESENT      = 0,
-    HS_EMPTY        = 1,
-    HS_TOMBSTONE    = 2,
-};
-
 /**
- * Index (identity) hashing function.
+ * Identity hashing function.
  *
  * Since the key is already a strong hash, reuse it for bucket allocation.
  */
-static inline khint64_t idx_key_hash_fn(LSUP_Key key)
-{ return key; }
+static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed)
+{ return *(uint64_t*)key; }
+
 
 /**
- * Triple Key hash.
- *
- * Since triple keys are already hashes, interlace the first bytes of each
- * key element to preserve the individual identities.
+ * General XX64 hash. Strong (non-crypto) and extremely fast.
  */
-static inline khint64_t tkey_hash_fn(LSUP_TripleKey spok)
-{
-    khint64_t rc = (khint64_t)XXH64(spok, sizeof(LSUP_TripleKey), SEED);
-    TRACE("Key hash: 0x%lx\n", rc);
-    return rc;
-}
+static inline uint64_t xx64_hash_fn(
+        const void *key, ksize_t size, uint64_t seed)
+{ return XXH64(key, size, seed); }
 
-static inline int tkey_eq_fn(LSUP_Key *a, LSUP_Key *b)
-{
-    TRACE("a addr: %p", a);
-    TRACE("b addr: %p", b);
-    TRACE("Comparing a: {%lx, %lx, %lx}", a[0], a[1], a[2]);
-    TRACE("Wtih      b: {%lx, %lx, %lx}", b[0], b[1], b[2]);
-    // Evaluate from the least to the most probable to match.
-    return (
-            a[2] == b[2] &&
-            a[0] == b[0] &&
-            a[1] == b[1]);
-}
 
-KHASH_INIT(Keys, LSUP_Key *, char, 0, tkey_hash_fn, tkey_eq_fn)
+static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size)
+{ return memcmp(a, b, size) == 0; }
 
-KHASH_INIT(
-        Index, LSUP_Key, LSUP_Buffer, 1, idx_key_hash_fn, kh_int_hash_equal)
 
 typedef struct Graph {
     LSUP_store_type store_type;     // In-memory or MDB-backed
     LSUP_Term *uri;                 // Graph "name" (URI)
-    khash_t(Keys) *keys;
-    khash_t(Index) *idx;            // Dictionary of keys to serialized terms
+    LSUP_HTable *keys;
+    LSUP_HTable *idx;            // Dictionary of keys to serialized terms
 } Graph;
 
 /**
@@ -83,47 +59,67 @@ typedef bool (*LSUP_key_cmp_fn_t)(
         const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2);
 
 
-// Dummy callback for queries with all parameters unbound. Returns true.
+/**
+ * Dummy callback for queries with all parameters unbound. Returns true.
 static bool lookup_none_cmp_fn(
         const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
 { return true; }
+*/
 
-//  Keyset lookup for S key.
+/**
+ * Keyset lookup for S key.
+ */
 static bool lookup_sk_cmp_fn(
         const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
 { return spok[0][0] == k1; }
 
-//  Keyset lookup for P key.
+/**
+ * Keyset lookup for P key.
+ */
 static bool lookup_pk_cmp_fn(
         const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
 { return spok[0][1] == k1; }
 
-//  Keyset lookup for O key.
+/**
+ * Keyset lookup for O key.
+ */
 static bool lookup_ok_cmp_fn(
         const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
 { return spok[0][2] == k1; }
 
-//  Keyset lookup for S and P keys.
+/**
+ * Keyset lookup for S and P keys.
+ */
 static bool lookup_skpk_cmp_fn(
         const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
 { return spok[0][0] == k1 && spok[0][1] == k2; }
 
-//  Keyset lookup for S and O keys.
+/**
+ * Keyset lookup for S and O keys.
+ */
 static bool lookup_skok_cmp_fn(
         const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
 { return spok[0][0] == k1 && spok[0][2] == k2; }
 
-//  Keyset lookup for P and O keys.
+/**
+ * Keyset lookup for P and O keys.
+ */
 static bool lookup_pkok_cmp_fn(
         const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
 { return spok[0][1] == k1 && spok[0][2] == k2; }
 
 
-/* * * Post-lookup callbacks * * */
+/* * * Post-lookup callback prototypes * * */
 
-int match_add_fn(LSUP_Graph *src, LSUP_Graph *dest, khiter_t cur, void *ctx);
+int match_add_fn(
+        LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
+        void *ctx);
+
+
+int match_rm_fn(
+        LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
+        void *ctx);
 
-int match_rm_fn(LSUP_Graph *src, LSUP_Graph *dest, khiter_t cur, void *ctx);
 
 
 /* * * KEYSETS * * */
@@ -151,13 +147,14 @@ LSUP_graph_init(
         gr->uri = LSUP_term_new(LSUP_TERM_URI, uri_str, NULL, NULL);
     }
 
-    gr->keys = kh_init_Keys();
-    kh_resize_Keys(gr->keys, capacity);
+    gr->keys = LSUP_htable_new(
+            capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn, 0);
 
     switch (store_type ) {
         case LSUP_STORE_MEM:
-            gr->idx = kh_init_Index();
-            kh_resize_Index(gr->idx, capacity * IDX_SIZE_RATIO);
+            gr->idx = LSUP_htable_new(
+                capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t),
+                xx64_hash_fn, buffer_eq_fn, 0);
             break;
 
         case LSUP_STORE_MDB:
@@ -196,7 +193,7 @@ static int graph_copy_contents(LSUP_Graph *src, LSUP_Graph *dest)
     trp.o = NULL;
 
     return LSUP_graph_match_callback(
-            src, dest, &trp, match_add_fn, true, NULL);
+            src, dest, &trp, &match_add_fn, true, NULL);
 }
 
 
@@ -212,15 +209,15 @@ LSUP_graph_copy(LSUP_Graph *dest, LSUP_Graph *src)
 int
 LSUP_graph_resize(LSUP_Graph *gr, size_t size)
 {
-    kh_resize_Keys(gr->keys, size);
-    kh_resize_Index(gr->idx, size * IDX_SIZE_RATIO);
+    LSUP_htable_resize(gr->keys, size);
+    LSUP_htable_resize(gr->idx, size * IDX_SIZE_RATIO);
 
     return LSUP_OK;
 }
 
 
 size_t
-LSUP_graph_capacity(LSUP_Graph *gr) { return kh_end(gr->keys); }
+LSUP_graph_capacity(LSUP_Graph *gr) { return LSUP_htable_capacity(gr->keys); }
 
 
 char *
@@ -228,13 +225,14 @@ LSUP_graph_uri(LSUP_Graph *gr) { return gr->uri->data; }
 
 
 size_t
-LSUP_graph_size(LSUP_Graph *gr) { return kh_size(gr->keys); }
+LSUP_graph_size(LSUP_Graph *gr) { return LSUP_htable_size(gr->keys); }
 
 
 int
 LSUP_graph_add_triple(LSUP_Graph *gr, const LSUP_Triple *spo)
 {
     LSUP_SerTerm sspo[3];
+    LSUP_SerTerm *sterm;
 
     LSUP_term_serialize(spo->s, sspo);
     LSUP_term_serialize(spo->p, sspo + 1);
@@ -243,31 +241,26 @@ LSUP_graph_add_triple(LSUP_Graph *gr, const LSUP_Triple *spo)
     LSUP_TripleKey spok = NULL_TRP;
 
     // Add term to index.
-    int status;
-    khiter_t cur;
     for (int i = 0; i < 3; i++) {
         spok[i] = LSUP_sterm_to_key(sspo + i);
-
-        cur = kh_put_Index(gr->idx, spok[i], &status);
-        if (status == HS_EMPTY || status == HS_TOMBSTONE) {
-            // If term is not indexed, store the struct in the index.
-            kh_value(gr->idx, cur) = sspo[i];
+        TRACE("Indexing term key %lu\n", spok[i]);
+
+        // If term is already in the index, discard and free it.
+        if (LSUP_htable_get(gr->idx, spok + i, NULL) == LSUP_OK) {
+            CRITICAL(sterm = malloc(sizeof(LSUP_Buffer)));
+            //*sterm = sspo[i];
+            sterm = sspo + i;
+            LSUP_htable_put(gr->idx, spok + i, sterm);
         } else {
-            // If term is already in the index, discard and free it.
+            TRACE("%s", "Term is already indexed.");
             LSUP_buffer_done(sspo + i);
         }
     }
 
     // Add triple.
     TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
-    cur = kh_put_Keys(gr->keys, spok, &status);
-    //if (status == HS_EMPTY || status == HS_TOMBSTONE)
-    //    kh_key(gr->keys, cur) = spok;
-    TRACE("cur: %d\n", cur);
-    TRACE("Keyset size: %d\n", kh_size(gr->keys));
-    TRACE("Insert status: %d\n", status);
-
-    return status == HS_PRESENT ? LSUP_NOACTION : LSUP_OK;
+
+    return LSUP_htable_put(gr->keys, spok, NULL);
 }
 
 
@@ -294,13 +287,13 @@ LSUP_graph_add(LSUP_Graph *gr, const LSUP_Triple data[], size_t data_size)
 bool
 LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo)
 {
-    LSUP_Key sk = LSUP_term_to_key(spo->s);
-    LSUP_Key pk = LSUP_term_to_key(spo->p);
-    LSUP_Key ok = LSUP_term_to_key(spo->o);
-
-    LSUP_TripleKey spok = {sk, pk, ok};
+    LSUP_TripleKey spok = {
+        LSUP_term_to_key(spo->s),
+        LSUP_term_to_key(spo->p),
+        LSUP_term_to_key(spo->o),
+    };
 
-    return kh_get_Keys(gr->keys, spok) != kh_end(gr->keys);
+    return LSUP_htable_get(gr->keys, spok, NULL) == LSUP_OK;
 }
 
 
@@ -308,25 +301,27 @@ int LSUP_graph_match_callback(
         LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo,
         keyset_match_fn_t callback_fn, bool match_cond, void *ctx)
 {
-    if (kh_size(gr->keys) == 0)
+    if (LSUP_htable_size(gr->keys) == 0)
         return LSUP_NOACTION;
 
+    htsize_t cur = 0;
     LSUP_Key k1, k2;
     LSUP_key_cmp_fn_t cmp_fn;
+    LSUP_TripleKey i_spok;
 
-    LSUP_Key sk = LSUP_term_to_key(spo->s);
-    LSUP_Key pk = LSUP_term_to_key(spo->p);
-    LSUP_Key ok = LSUP_term_to_key(spo->o);
-
-    if (sk != NULL_KEY && pk != NULL_KEY && ok != NULL_KEY) {
-        LSUP_TripleKey spok = {sk, pk, ok};
+    LSUP_TripleKey spok = {
+        LSUP_term_to_key(spo->s),
+        LSUP_term_to_key(spo->p),
+        LSUP_term_to_key(spo->o),
+    };
 
+    if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
         if (match_cond == true) {
             // Shortcut for 3-term match—only if match_cond is true.
             LSUP_graph_init(res, 1, NULL, LSUP_STORE_MEM);
-            khint_t cur = kh_get_Keys(gr->keys, spok);
-            if(cur != kh_end(gr->keys)) {
-                callback_fn(gr, res, cur, ctx);
+            int rc = LSUP_htable_get(gr->keys, spok, NULL);
+            if(rc == LSUP_OK) {
+                callback_fn(gr, res, &spok, ctx);
                 return LSUP_OK;
             } else {
                 return LSUP_NOACTION;
@@ -335,30 +330,29 @@ int LSUP_graph_match_callback(
             // For negative condition (i.e. "apply this function to all triples
             // except the matching one")
             int rc = LSUP_NOACTION;
-            for (khiter_t i = kh_begin(gr->keys); i != kh_end(gr->keys); i++) {
-                LSUP_Key *cur = kh_key(gr->keys, i);
-                if (
-                    cur[0] != spok[0] ||
-                    cur[1] != spok[1] ||
-                    cur[2] != spok[2]
-                ) {
-                    callback_fn(gr, res, i, ctx);
-                    rc = LSUP_OK;
+            while (LSUP_htable_iter(
+                        gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
+                if (LIKELY(
+                    i_spok[2] != spok[2] ||
+                    i_spok[0] != spok[0] ||
+                    i_spok[1] != spok[1]
+                )) {
+                    rc = callback_fn(gr, res, &i_spok, ctx);
                 }
             }
 
             return rc;
         }
 
-    } else if (sk != NULL_KEY) {
-        k1 = sk;
+    } else if (spok[0] != NULL_KEY) {
+        k1 = spok[0];
 
-        if (pk != NULL_KEY) { // s p ?
-            k2 = pk;
+        if (spok[1] != NULL_KEY) { // s p ?
+            k2 = spok[1];
             cmp_fn = lookup_skpk_cmp_fn;
 
-        } else if (ok != NULL_KEY) { // s ? o
-            k2 = ok;
+        } else if (spok[2] != NULL_KEY) { // s ? o
+            k2 = spok[2];
             cmp_fn = lookup_skok_cmp_fn;
 
         } else { // s ? ?
@@ -366,19 +360,19 @@ int LSUP_graph_match_callback(
 
         }
 
-    } else if (pk != NULL_KEY) {
-        k1 = pk;
+    } else if (spok[1] != NULL_KEY) {
+        k1 = spok[1];
 
-        if (ok != NULL_KEY) { // ? p o
-            k2 = ok;
+        if (spok[2] != NULL_KEY) { // ? p o
+            k2 = spok[2];
             cmp_fn = lookup_pkok_cmp_fn;
 
         } else { // ? p ?
             cmp_fn = lookup_pk_cmp_fn;
         }
 
-    } else if (ok != NULL_KEY) { // ? ? o
-        k1 = ok;
+    } else if (spok[2] != NULL_KEY) { // ? ? o
+        k1 = spok[2];
         cmp_fn = lookup_ok_cmp_fn;
 
     } else {
@@ -386,10 +380,9 @@ int LSUP_graph_match_callback(
         return LSUP_graph_copy(res, gr);
     }
 
-    for(khiter_t i = kh_begin(gr->keys); i != kh_end(gr->keys); i++) {
-        LSUP_TripleKey *spok = (LSUP_TripleKey*)&kh_key(gr->keys, i);
-        if (cmp_fn(spok, k1, k2) == match_cond)
-            callback_fn(gr, res, i, ctx);
+    while (LSUP_htable_iter(gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
+        if (cmp_fn(&i_spok, k1, k2) == match_cond)
+            callback_fn(gr, res, &i_spok, ctx);
     }
 
     return LSUP_OK;
@@ -400,9 +393,7 @@ int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo)
 {
     LSUP_graph_init(res, LOOKUP_GR_INIT_SIZE, NULL, LSUP_STORE_MEM);
 
-    LSUP_graph_match_callback(gr, res, spo, &match_add_fn, true, NULL);
-
-    return LSUP_OK;
+    return LSUP_graph_match_callback(gr, res, spo, &match_add_fn, true, NULL);
 }
 
 
@@ -416,19 +407,18 @@ int LSUP_graph_join(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 
 int LSUP_graph_subtract(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 {
-    if (kh_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
+    if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
 
     LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM);
 
-    if (kh_size(gr1->keys) == 0) return LSUP_OK;
+    if (LSUP_htable_size(gr1->keys) == 0) return LSUP_OK;
 
-    for(khiter_t i = kh_begin(gr1->keys); i != kh_end(gr1->keys); i++) {
-        LSUP_TripleKey *spok = (LSUP_TripleKey*)&kh_key(gr1->keys, i);
+    htsize_t cur = 0;
+    LSUP_TripleKey spok;
 
-        khiter_t cur = kh_get_Keys(gr2->keys, *spok);
-        if (cur == kh_end(gr2->keys)) {
-            match_add_fn(res, gr1, cur, NULL);
-        }
+    while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
+        if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT)
+            match_add_fn(res, gr1, &spok, NULL);
     }
 
     return LSUP_OK;
@@ -439,16 +429,15 @@ int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 {
     LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM);
 
-    if (kh_size(gr1->keys) == 0 || kh_size(gr2->keys) == 0)
+    if (LSUP_htable_size(gr1->keys) == 0 || LSUP_htable_size(gr2->keys) == 0)
         return LSUP_OK;
 
-    for(khiter_t i = kh_begin(gr1->keys); i != kh_end(gr1->keys); i++) {
-        LSUP_TripleKey *spok = (LSUP_TripleKey*)&kh_key(gr1->keys, i);
+    htsize_t cur = 0;
+    LSUP_TripleKey spok;
 
-        khiter_t cur = kh_get_Keys(gr2->keys, *spok);
-        if (cur != kh_end(gr2->keys)) {
-            match_add_fn(res, gr1, cur, NULL);
-        }
+    while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
+        if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_OK)
+            match_add_fn(res, gr1, &spok, NULL);
     }
 
     return LSUP_OK;
@@ -457,29 +446,26 @@ int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 
 int LSUP_graph_xor(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 {
-    if (kh_size(gr1->keys) == 0) return LSUP_graph_copy(gr2, res);
-    if (kh_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
+    if (LSUP_htable_size(gr1->keys) == 0) return LSUP_graph_copy(gr2, res);
+    if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
 
     LSUP_graph_init(
             res, min(LSUP_graph_capacity(gr1), LSUP_graph_capacity(gr2)),
             NULL, LSUP_STORE_MEM);
 
-    for(khiter_t i = kh_begin(gr1->keys); i != kh_end(gr1->keys); i++) {
-        LSUP_TripleKey *spok = (LSUP_TripleKey*)&kh_key(gr1->keys, i);
+    htsize_t cur = 0;
+    LSUP_TripleKey spok;
 
-        khiter_t cur = kh_get_Keys(gr2->keys, *spok);
-        if (cur == kh_end(gr2->keys)) {
-            match_add_fn(res, gr1, cur, NULL);
-        }
+    while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
+        if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT)
+            match_add_fn(res, gr1, &spok, NULL);
     }
 
-    for(khiter_t i = kh_begin(gr2->keys); i != kh_end(gr2->keys); i++) {
-        LSUP_TripleKey *spok = (LSUP_TripleKey*)&kh_key(gr2->keys, i);
+    cur = 0;
 
-        khiter_t cur = kh_get_Keys(gr1->keys, *spok);
-        if (cur == kh_end(gr1->keys)) {
-            match_add_fn(res, gr2, cur, NULL);
-        }
+    while(LSUP_htable_iter(gr2->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
+        if (LSUP_htable_get(gr1->keys, (void**)&spok, NULL) == LSUP_NORESULT)
+            match_add_fn(res, gr2, &spok, NULL);
     }
 
     return LSUP_OK;
@@ -493,20 +479,19 @@ LSUP_graph_free(LSUP_Graph *gr)
         LSUP_term_free(gr->uri);
 
         // Free up triples.
-        /*
-        for(khiter_t i = kh_begin(gr->keys); i != kh_end(gr->keys); ++i) {
-            if(kh_exist(gr->keys, i))
-                free(&kh_value(gr->keys, i));
-        }
-        */
-        kh_destroy_Keys(gr->keys);
+        LSUP_htable_free(gr->keys);
 
         // Free up index entries and index.
-        for(khiter_t i = kh_begin(gr->idx); i != kh_end(gr->idx); ++i) {
-            if(kh_exist(gr->idx, i))
-                LSUP_buffer_done(&kh_value(gr->idx, i));
+        htsize_t cur = 0;
+        LSUP_TripleKey spok;
+        LSUP_Buffer *sterm;
+        while(LSUP_htable_iter(
+                    gr->idx, &cur, (void**)&spok, (void**)&sterm) == LSUP_OK) {
+            TRACE("Freeing indexed term buffer #%d at %p", cur, sterm);
+            LSUP_buffer_done(sterm);
         }
-        kh_destroy_Index(gr->idx);
+
+        LSUP_htable_free(gr->idx);
 
         free(gr);
     }
@@ -522,35 +507,31 @@ LSUP_graph_free(LSUP_Graph *gr)
  *
  * The source graph cursor must be set to the triple to be copied.
  */
-int match_add_fn(LSUP_Graph *src, LSUP_Graph *dest, khiter_t cur, void *ctx)
+int match_add_fn(
+        LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
+        void *ctx)
 {
-    LSUP_TripleKey *spok = (LSUP_TripleKey*)&kh_key(src->keys, cur);
     // Add term to index.
-    int status;
     for (int i = 0; i < 3; i++) {
-        khiter_t dest_cur = kh_put_Index(dest->idx, *spok[i], &status);
-        if (
-                kh_exist(src->idx, *spok[i] &&
-                (status == HS_EMPTY || status == HS_TOMBSTONE))) {
-            // If term is not indexed, store the struct in the index.
-            LSUP_buffer_copy(
-                    &kh_value(dest->idx, dest_cur), &kh_value(src->idx, cur));
+        // Index terms if not yet presents in destination.
+        void *src_val, *dest_val;
+
+        if(LSUP_htable_get(src->idx, *spok + i, &src_val) == LSUP_OK) {
+            CRITICAL(dest_val = malloc(sizeof(LSUP_Buffer)));
+            LSUP_buffer_copy(dest_val, src_val);
+            LSUP_htable_put(dest->idx, *spok + i, dest_val);
         }
     }
 
     // Add triple.
-    kh_put_Keys(src->keys, *spok, &status);
-
-    return LSUP_OK;
+    return LSUP_htable_put(dest->keys, spok, NULL);
 }
 
 
 /**
  * Callback for removing a matched triple.
  */
-int match_rm_fn(LSUP_Graph *src, LSUP_Graph *dest, khint_t cur, void *ctx)
-{
-    kh_del_Keys(dest->keys, cur);
-
-    return LSUP_OK;
-}
+int match_rm_fn(
+        LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
+        void *ctx)
+{ return LSUP_htable_del(dest->keys, spok); }

+ 446 - 0
src/htable.c

@@ -0,0 +1,446 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+
+#include "htable.h"
+
+
+#define BUCKET_EMPTY        1 << 0
+#define BUCKET_DELETED      1 << 1
+
+#if defined(DEBUG)
+#define    ASSERT        assert
+#else
+#define    ASSERT(x)
+#endif
+
+#define    MAX_GROWTH_STEP          (1024U * 1024)
+
+#define    APPROX_85_PERCENT(x)     (((x) * 870) >> 10)
+#define    APPROX_40_PERCENT(x)     (((x) * 409) >> 10)
+
+#define MIN_HT_SIZE         1 << 3
+
+
+typedef struct {
+    LSUP_TripleKey  key;                // TODO Make configurable but
+                                        // statically allocated via macros
+    void *          val;
+    uint64_t        hash;
+    uint16_t        psl;
+} bucket_t;
+
+typedef struct htable_t {
+    htsize_t        size;
+    htsize_t        nitems;
+    unsigned        flags;
+    uint64_t        divinfo;
+    bucket_t *      buckets;
+    uint64_t        seed;
+
+    key_hash_fn_t   key_hash_fn;
+    key_eq_fn_t     key_eq_fn;
+
+    ksize_t         ksize;
+    vsize_t         vsize;
+
+    void *          del_marker;         // Used to fill deleted buckets.
+} HTable;
+
+
+
+/* * * GENERIC UTILITIES * * */
+
+static inline bool is_empty_bucket(const HTable *ht, const bucket_t *bucket)
+{ return memcmp(bucket->key, ht->del_marker, ht->ksize) == 0; }
+
+/*
+ * Find first bit.
+ */
+static inline int fls(int x)
+{ return x ? (sizeof(int) * CHAR_BIT) - __builtin_clz(x) : 0; }
+
+
+/*
+ * Fast 32bit division and remainder.
+ *
+ * Reference:
+ *
+ *    Torbjörn Granlund and Peter L. Montgomery, "Division by Invariant
+ *    Integers Using Multiplication", ACM SIGPLAN Notices, Issue 6, Vol 29,
+ *    http://gmplib.org/~tege/divcnst-pldi94.pdf, 61-72, June 1994.
+ *
+ * The following example computes q = a / b and r = a % b:
+ *
+ *    uint64_t divinfo = fast_div32_init(b);
+ *    q = fast_div32(a, b, divinfo);
+ *    r = fast_rem32(a, b, divinfo);
+ */
+
+static inline uint64_t
+fast_div32_init(uint32_t div)
+{
+    uint64_t mt;
+    uint8_t s1, s2;
+    int l;
+
+    l = fls(div - 1);
+    mt = (uint64_t)(0x100000000ULL * ((1ULL << l) - div));
+    s1 = (l > 1) ? 1U : (uint8_t)l;
+    s2 = (l == 0) ? 0 : (uint8_t)(l - 1);
+    return (uint64_t)(mt / div + 1) << 32 | (uint32_t)s1 << 8 | s2;
+}
+
+static inline uint32_t
+fast_div32(uint32_t v, uint32_t div, uint64_t divinfo)
+{
+    const uint32_t m = divinfo >> 32;
+    const unsigned s1 = (divinfo & 0x0000ff00) >> 8;
+    const unsigned s2 = (divinfo & 0x000000ff);
+    const uint32_t t = (uint32_t)(((uint64_t)v * m) >> 32);
+    (void)div; // unused
+    return (t + ((v - t) >> s1)) >> s2;
+}
+
+
+static inline uint32_t
+fast_rem32(uint32_t v, uint32_t div, uint64_t divinfo)
+{ return v - div * fast_div32(v, div, divinfo); }
+
+
+static int __attribute__((__unused__))
+//static int
+validate_psl_p(const HTable *ht, const bucket_t *bucket, unsigned i)
+{
+    unsigned base_i = fast_rem32(bucket->hash, ht->size, ht->divinfo);
+    unsigned diff = (base_i > i) ? ht->size - base_i + i : i - base_i;
+    return is_empty_bucket(ht, bucket) || diff == bucket->psl;
+}
+
+
+/* * * PUBLIC API * * */
+
+HTable *LSUP_htable_new(
+        htsize_t size, ksize_t ksize, vsize_t vsize,
+        key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, unsigned flags)
+{
+    HTable *ht;
+    CRITICAL(ht = calloc(1, sizeof(HTable)));
+
+    ht->ksize = ksize;
+    ht->vsize = vsize;
+    ht->key_hash_fn = key_hash_fn;
+    ht->key_eq_fn = key_eq_fn;
+    ht->flags = flags;
+    ht->size = 0;
+
+    CRITICAL(ht->del_marker = calloc(1, ksize));
+
+    LSUP_htable_resize(ht, size);
+
+    return ht;
+}
+
+
+/**
+ * Resize a table.
+ */
+int LSUP_htable_resize(HTable *ht, htsize_t newsize)
+{
+    TRACE("Resizing htable to %lu.", (size_t)newsize);
+
+    bucket_t *oldbuckets = ht->buckets;
+    const htsize_t oldsize = ht->size;
+
+    // Clip size to min & max limits.
+    if (newsize < MIN_HT_SIZE) newsize = MIN_HT_SIZE;
+    if (newsize > HTSIZE_MAX) newsize = HTSIZE_MAX;
+
+    CRITICAL(ht->buckets = calloc(newsize, sizeof(bucket_t)));
+
+    ht->size = newsize;
+    ht->nitems = 0;
+
+    ht->divinfo = fast_div32_init(newsize);
+    ht->seed ^= random() | (random() << 32);
+
+    for (unsigned i = 0; i < oldsize; i++) {
+        const bucket_t *bucket = &oldbuckets[i];
+
+        /* Skip the empty buckets. */
+        if (!is_empty_bucket(ht, bucket))
+            LSUP_htable_insert(ht, bucket->key, bucket->val);
+    }
+    if (oldbuckets != NULL) free(oldbuckets);
+
+    return LSUP_OK;
+}
+
+
+htsize_t LSUP_htable_capacity(LSUP_HTable *ht)
+{ return ht->size; }
+
+
+htsize_t LSUP_htable_size(LSUP_HTable *ht)
+{ return ht->nitems; }
+
+
+/*
+ * Insert without resizing (assuming resizing is already done).
+ */
+int LSUP_htable_insert(HTable *ht, const void *key, void *val)
+{
+    bucket_t *bucket, entry;
+
+    ASSERT(key != NULL);
+
+    /*
+     * Setup the bucket entry.
+     */
+    memcpy(entry.key, key, ht->ksize);
+    //memcpy(entry.val, val, ht->vsize);
+    entry.val = val;
+    entry.hash = ht->key_hash_fn(entry.key, ht->ksize, ht->seed);
+    entry.psl = 0;
+
+    /*
+     * From the paper: "when inserting, if a record probes a location
+     * that is already occupied, the record that has traveled longer
+     * in its probe sequence keeps the location, and the other one
+     * continues on its probe sequence" (page 12).
+     *
+     * Basically: if the probe sequence length (PSL) of the element
+     * being inserted is greater than PSL of the element in the bucket,
+     * then swap them and continue.
+     */
+    htsize_t i = fast_rem32(entry.hash, ht->size, ht->divinfo);
+
+    for(;;) {
+        bucket = ht->buckets + i;
+
+        if(is_empty_bucket(ht, ht->buckets + i)) break;
+
+        ASSERT(validate_psl_p(ht, bucket, i));
+
+        // There is a key in the bucket.
+        TRACE("Entry key: {%lu, %lu, %lu}; bucket key: {%lu, %lu, %lu}", entry.key[0], entry.key[1], entry.key[2], bucket->key[0], bucket->key[1], bucket->key[2]);
+        if (ht->key_eq_fn(bucket->key, entry.key, ht->ksize)) {
+            // Duplicate key: do nothing.
+            TRACE(STR, "Duplicate key.");
+            return LSUP_NOACTION;
+        }
+
+        /*
+         * We found a "rich" bucket.  Capture its location.
+         */
+        if (entry.psl > bucket->psl) {
+            //TRACE("Entry PSL: %d; Bucket PSL: %d", entry.psl, bucket->psl);
+            bucket_t tmp;
+
+            TRACE(STR, "SWAP");
+            /*
+             * Place our key-value pair by swapping the "rich"
+             * bucket with our entry.  Copy the structures.
+             */
+            tmp = entry;
+            entry = *bucket;
+            *bucket = tmp;
+        }
+
+        entry.psl++;
+
+        /* Continue to the next bucket. */
+        ASSERT(validate_psl_p(ht, bucket, i));
+        i = fast_rem32(i + 1, ht->size, ht->divinfo);
+    }
+
+    /*
+     * Found a free bucket: insert the entry.
+     */
+    TRACE("Inserting {%lu, %lu, %lu} in bucket #%d", entry.key[0], entry.key[1], entry.key[2], i);
+    //*bucket = entry; // copy
+    memcpy(bucket, &entry, sizeof(bucket_t)); // copy
+    ht->nitems++;
+
+    ASSERT(validate_psl_p(ht, bucket, i));
+
+    return LSUP_OK;
+}
+
+
+/*
+ * rhashmap_put: insert a value given the key.
+ *
+ * => If the key is already present, return its associated value.
+ * => Otherwise, on successful insert, return the given value.
+ */
+int LSUP_htable_put(HTable *ht, const void *key, void *val)
+{
+    const size_t threshold = APPROX_85_PERCENT(ht->size);
+
+    /*
+     * If the load factor is more than the threshold, then resize.
+     */
+    if (UNLIKELY(ht->nitems > threshold)) {
+        /*
+         * Grow the hash table by doubling its size, but with
+         * a limit of MAX_GROWTH_STEP.
+         */
+        const size_t grow_limit = ht->size + MAX_GROWTH_STEP;
+        const size_t newsize = min(ht->size << 1, grow_limit);
+        LSUP_htable_resize(ht, newsize);
+    }
+
+    return LSUP_htable_insert(ht, key, val);
+}
+
+
+int LSUP_htable_get(const HTable *ht, const void *key, void **valp)
+{
+    const uint64_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
+    htsize_t n = 0, i = fast_rem32(hash, ht->size, ht->divinfo);
+
+    if (key == NULL) return LSUP_VALUE_ERR;
+
+    /*
+     * Lookup is a linear probe.
+     */
+    for(;;) {
+        bucket_t *bucket = ht->buckets + i;
+        ASSERT(validate_psl_p(ht, bucket, i));
+
+        if (ht->key_eq_fn(bucket->key, key, ht->ksize)) {
+            // Key found within max probe length.
+            if (valp != NULL)
+                *valp = bucket->val;
+
+            return LSUP_OK;
+        }
+
+        /*
+         * Stop probing if we hit an empty bucket; also, if we hit a
+         * bucket with PSL lower than the distance from the base location,
+         * then it means that we found the "rich" bucket which should
+         * have been captured, if the key was inserted -- see the central
+         * point of the algorithm in the insertion function.
+         */
+        if (is_empty_bucket(ht, bucket) || n > bucket->psl) {
+            valp = NULL;
+
+            return LSUP_NORESULT;
+        }
+
+        n++;
+
+        /* Continue to the next bucket. */
+        i = fast_rem32(i + 1, ht->size, ht->divinfo);
+    }
+}
+
+
+int LSUP_htable_del(HTable *ht, const void *key)
+{
+    const size_t threshold = APPROX_40_PERCENT(ht->size);
+    const uint32_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
+    unsigned n = 0, i = fast_rem32(hash, ht->size, ht->divinfo);
+    bucket_t *bucket;
+
+    ASSERT(key != NULL);
+
+    for(;;) {
+        /*
+         * The same probing logic as in the lookup function.
+         */
+        bucket_t *bucket = ht->buckets + i;
+        if (is_empty_bucket(ht, bucket) || n > bucket->psl)
+            return LSUP_NOACTION;
+
+        ASSERT(validate_psl_p(ht, bucket, i));
+
+        if (!ht->key_eq_fn(bucket->key, key, ht->ksize)) {
+            /* Continue to the next bucket. */
+            i = fast_rem32(i + 1, ht->size, ht->divinfo);
+            n++;
+        }
+    }
+
+    ht->nitems--;
+
+    /*
+     * The probe sequence must be preserved in the deletion case.
+     * Use the backwards-shifting method to maintain low variance.
+     */
+
+    while(1) {
+        bucket_t *nbucket;
+
+        memcpy(bucket->key, ht->del_marker, ht->ksize);
+
+        i = fast_rem32(i + 1, ht->size, ht->divinfo);
+        nbucket = ht->buckets + i;
+        ASSERT(validate_psl_p(ht, nbucket, i));
+
+        /*
+         * Stop if we reach an empty bucket or hit a key which
+         * is in its base (original) location.
+         */
+        if (is_empty_bucket(ht, nbucket) || nbucket->psl == 0)
+            break;
+
+        nbucket->psl--;
+        *bucket = *nbucket;
+        bucket = nbucket;
+    }
+
+    /*
+     * If the load factor is less than threshold, then shrink by
+     * halving the size, but not less than 1.
+     */
+    if (ht->nitems < threshold) {
+        size_t newsize = max(ht->size >> 1, 1);
+        (void)LSUP_htable_resize(ht, newsize);
+    }
+
+    return LSUP_OK;
+}
+
+
+extern int LSUP_htable_iter(
+        LSUP_HTable *ht, htsize_t *cur, void **keyp, void **valp)
+{
+    while (*cur < ht->size) {
+        bucket_t *bucket = ht->buckets + *cur;
+
+        (*cur)++;
+
+        if (is_empty_bucket(ht, bucket)) {
+            TRACE("Empty bucket: %d. Skipping.", (*cur) - 1);
+            continue;
+        }
+
+        // Copy key, and if relevant, value.
+        *keyp = bucket->key;
+        if (valp != NULL && ht->vsize > 0) *valp = bucket->val;
+
+        return LSUP_OK;
+    }
+    return LSUP_END;
+}
+
+
+void LSUP_htable_done(HTable *ht)
+{
+    if(LIKELY(ht->buckets != NULL)) free(ht->buckets);
+    free(ht->del_marker);
+}
+
+
+void LSUP_htable_free(HTable *ht)
+{
+    if(LIKELY(ht != NULL)) {
+        LSUP_htable_done(ht);
+        free(ht);
+    }
+}

+ 2 - 1
test.c

@@ -1,9 +1,10 @@
 #include "test_term.c"
+#include "test_htable.c"
 #include "test_graph.c"
 
 int main(int argc, char **argv) {
 
-    int result = (term_tests() | graph_tests());
+    int result = (term_tests() | htable_tests() | graph_tests());
 
     printf("Test result: %lu\n", (size_t)result);
 

+ 7 - 34
test/test_graph.c

@@ -76,7 +76,6 @@ static int test_graph_heap()
     LSUP_Graph *gr = LSUP_graph_new(10, "urn:gr:1", LSUP_STORE_MEM);
 
     ASSERT(strcmp(LSUP_graph_uri(gr), "urn:gr:1") == 0, "Graph URI mismatch!");
-    EXPECT_INT_EQ(LSUP_graph_capacity(gr), 16);
     EXPECT_INT_EQ(LSUP_graph_size(gr), 0);
 
     LSUP_graph_free(gr);
@@ -94,40 +93,15 @@ static int test_graph_add()
 
     LSUP_graph_add(gr, trp, NUM_TRP);
 
-    _free_triples(trp); // gr takes ownership of data.
-
-    EXPECT_INT_EQ(LSUP_graph_capacity(gr), 16);
-    EXPECT_INT_EQ(LSUP_graph_size(gr), 8);
-
-    LSUP_graph_free(gr);
-
-    return 0;
-}
-
-
-static int test_graph_add_100k()
-{
-    size_t nt = 10000;
-
-    LSUP_Triple *trp;
-    CRITICAL(trp = malloc(nt * sizeof(LSUP_Triple)));
-    for (size_t i = 0; i < nt; i++) {
-        //printf("i: %lu\n", i);
-        trp[i].s = LSUP_term_new(
-                LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL);
-        trp[i].p = LSUP_term_new(
-                LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL);
-        trp[i].o = LSUP_term_new(
-                LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL);
+    for (int i = 0; i < sizeof(trp); i++) {
+        printf("checking triple #%d... ", i);
+        ASSERT(LSUP_graph_contains(gr, trp + i), "Triple not in graph!");
+        printf("OK.\n");
     }
-    TRACE(STR, "Triples generated.");
-
-    LSUP_Graph *gr = LSUP_graph_new(nt, NULL, LSUP_STORE_MEM);
 
-    LSUP_graph_add(gr, trp, nt);
-    TRACE(STR, "Graph populated.");
+    _free_triples(trp); // gr copied data.
 
-    _free_triples(trp); // gr takes ownership of data.
+    EXPECT_INT_EQ(LSUP_graph_size(gr), 8);
 
     LSUP_graph_free(gr);
 
@@ -138,8 +112,7 @@ static int test_graph_add_100k()
 int graph_tests()
 {
     RUN(test_graph_heap);
-    //RUN(test_graph_add);
-    RUN(test_graph_add_100k);
+    RUN(test_graph_add);
     return 0;
 }
 

+ 158 - 0
test/test_htable.c

@@ -0,0 +1,158 @@
+#include "test.h"
+#include "htable.h"
+
+#define _CT 8
+
+static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed)
+{ return *(uint64_t*)key; }
+
+static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size)
+{ return memcmp(a, b, size) == 0; }
+
+
+static int htable_idx()
+{
+    LSUP_Key keys[_CT] = {5, 8, 13, 21, 34, 55, 89, 5};
+
+    LSUP_HTable *ht = LSUP_htable_new(
+            _CT, sizeof(LSUP_Key), sizeof(LSUP_Buffer),
+            id_hash_fn, buffer_eq_fn, 0);
+
+    LSUP_Buffer values[_CT];
+
+    for (int i = 0; i < _CT; i++) {
+        char tmp[64];
+        sprintf(tmp, "<%lu>", keys[i]);
+        LSUP_buffer_init(values + i, strlen(tmp) + 1);
+        memcpy((values + i)->addr, tmp, strlen(tmp) + 1);
+        printf("Buffer to insert: ");
+        LSUP_buffer_print(values + i);
+
+        if (LSUP_htable_put(ht, keys + i, values + i) != LSUP_OK)
+            LSUP_buffer_done(values + i);
+    }
+
+    EXPECT_INT_EQ(LSUP_htable_size(ht), 7);
+
+    for (int i = 0; i < _CT; i++) {
+        LSUP_Buffer* vtmp;
+        char ptmp[64];
+        LSUP_htable_get(ht, keys + i, (void**)&vtmp);
+
+        printf("Key in get: <%lu>: ", keys[i]);
+        LSUP_buffer_print(vtmp);
+
+        sprintf(ptmp, "<%lu>", keys[i]);
+
+        EXPECT_INT_EQ(memcmp(ptmp, vtmp->addr, vtmp->size), 0);
+    }
+
+    LSUP_Key *ktmp;
+    LSUP_Buffer *vtmp;
+    htsize_t cur = 0;
+
+    while(LSUP_htable_iter(ht, &cur, (void**)&ktmp, (void**)&vtmp) == LSUP_OK) {
+        printf("Key in iter: <%lu>: ", *ktmp);
+        LSUP_buffer_print(vtmp);
+
+        char ptmp[64];
+        sprintf(ptmp, "<%lu>", *ktmp);
+
+        EXPECT_INT_EQ(memcmp(ptmp, vtmp->addr, vtmp->size), 0);
+    }
+
+    cur = 0;
+    while(LSUP_htable_iter(ht, &cur, (void**)&ktmp, (void**)&vtmp) == LSUP_OK) {
+        LSUP_buffer_done(vtmp);
+    }
+
+    printf("Freeing hash table.\n");
+    LSUP_htable_free(ht);
+
+    return 0;
+}
+
+
+static int htable_keys()
+{
+    LSUP_TripleKey keys[_CT] = {
+        {1, 1, 1},
+        {2, 1, 1},
+        {1, 2, 3},
+        {1, 9, 9},
+        {5, 6, 7},
+        {7, 6, 5},
+        {1, 1, 1}, // Duplicate.
+        {2, 1, 1}, // Duplicate.
+    };
+
+    LSUP_HTable *ht = LSUP_htable_new(
+            _CT, sizeof(LSUP_TripleKey), sizeof(LSUP_Buffer),
+            id_hash_fn, buffer_eq_fn, 0);
+
+    LSUP_Buffer values[_CT];
+
+    for (int i = 0; i < _CT; i++) {
+        char tmp[64];
+        sprintf(tmp, "<%lu : %lu : %lu>", keys[i][0], keys[i][1], keys[i][2]);
+        LSUP_buffer_init(values + i, strlen(tmp) + 1);
+        memcpy((values + i)->addr, tmp, strlen(tmp) + 1);
+        TRACE(STR, "Buffer to insert: ");
+        LSUP_buffer_print(values + i);
+
+        if (LSUP_htable_put(ht, keys + i, values + i) != LSUP_OK)
+            LSUP_buffer_done(values + i);
+    }
+
+    EXPECT_INT_EQ(LSUP_htable_size(ht), 6);
+
+    for (int i = 0; i < _CT; i++) {
+        LSUP_Buffer* vtmp;
+        char ptmp[64];
+        LSUP_htable_get(ht, keys[i], (void**)&vtmp);
+
+        printf(
+                "Key in get: <%lu : %lu : %lu>: ",
+                keys[i][0], keys[i][1], keys[i][2]);
+        LSUP_buffer_print(vtmp);
+
+        sprintf(ptmp, "<%lu : %lu : %lu>", keys[i][0], keys[i][1], keys[i][2]);
+
+        EXPECT_INT_EQ(memcmp(ptmp, vtmp->addr, vtmp->size), 0);
+    }
+
+    LSUP_TripleKey *ktmp;
+    LSUP_Buffer *vtmp;
+    htsize_t cur = 0;
+
+    while(LSUP_htable_iter(ht, &cur, (void**)&ktmp, (void**)&vtmp) == LSUP_OK) {
+        printf(
+                "Key in iter: <%lu : %lu : %lu>: ",
+                (*ktmp)[0], (*ktmp)[1], (*ktmp)[2]);
+        LSUP_buffer_print(vtmp);
+
+        char ptmp[64];
+        sprintf(ptmp, "<%lu : %lu : %lu>", (*ktmp)[0], (*ktmp)[1], (*ktmp)[2]);
+
+        EXPECT_INT_EQ(memcmp(ptmp, vtmp->addr, vtmp->size), 0);
+    }
+
+    cur = 0;
+    while(LSUP_htable_iter(ht, &cur, (void**)&ktmp, (void**)&vtmp) == LSUP_OK) {
+        LSUP_buffer_done(vtmp);
+    }
+
+    printf("Freeing hash table.\n");
+    LSUP_htable_free(ht);
+
+    return 0;
+}
+
+
+int htable_tests()
+{
+    RUN(htable_idx);
+    RUN(htable_keys);
+
+    return 0;
+}