Browse Source

WIP hashmap overhaul.

Stefano Cossu 2 years ago
parent
commit
8bfba20c5f
6 changed files with 202 additions and 125 deletions
  1. 3 0
      .gitmodules
  2. 1 1
      TODO.md
  3. 1 1
      ext/log
  4. 87 17
      include/store_htable.h
  5. 76 85
      src/store_htable.c
  6. 34 21
      src/store_mdb.c

+ 3 - 0
.gitmodules

@@ -28,3 +28,6 @@
 	path = ext/re2c
 	url = https://github.com/skvadrik/re2c.git
     shallow = true
+[submodule "ext/hashmap"]
+	path = ext/hashmap
+	url = git@github.com:scossu/hashmap.c.git

+ 1 - 1
TODO.md

@@ -22,7 +22,7 @@
     - *D* Subclass term types
 - *D* Namespaced IRIs
 - *D* Relative IRIs
-- *P* Atomic multi-graph updates
+- *P* Transaction control
 - *P* Turtle serialization / deserialization
 - *P* Full UTF-8 support
 - *P* Extended tests

+ 1 - 1
ext/log

@@ -1 +1 @@
-Subproject commit 9b7a338c3cee9a68f118c22c9e322b809fda198d
+Subproject commit f9ea34994bd58ed342d2245cd4110bb5c6790153

+ 87 - 17
include/store_htable.h

@@ -79,13 +79,22 @@ LSUP_htstore_copy (const LSUP_HTStore *src);
 LSUP_rc
 LSUP_htstore_copy_contents (LSUP_HTStore *dest, const LSUP_HTStore *src);
 
+/** @brief Count triples in a store.
+ *
+ * @parm[in] store HTStore handle.
+ *
+ * @return Number of triples in the store.
+ */
+size_t
+LSUP_htstore_size (const LSUP_HTStore *ht);
+
 
 /** @brief Add a term to the store.
  *
  * @parm[in] store HTStore handle.
  *
- * @param[in] sterm Serialized term to insert. The term is copied and will be
- *  freed together with the store.
+ * @param[in] sterm Serialized term to insert. The term is copied and may be
+ *  safely freed after the operation.
  *
  * @return LSUP_OK on success; LSUP_NOACTION if the term exists already; <0
  *  on error.
@@ -94,44 +103,105 @@ LSUP_rc
 LSUP_htstore_add_term (LSUP_HTStore *store, const LSUP_Buffer *sterm);
 
 
+/** @brief Initialize a loop to add triples to a store.
+ *
+ * @param[in] store Store handler.
+ *
+ * @return Iterator to be used with #LSUP_htstore_add_iter(). It must be freed
+ *  with #LSUP_htstore_add_done().
+ */
 LSUP_HTIterator *
 LSUP_htstore_add_init (LSUP_HTStore *store);
 
 
 /** @brief Add triples to the store.
  *
- * @param[in] store Store handle.
+ * @param[in] it Iterator handle created with #LSUP_htstore_add_init().
  *
- * @param[in] sspo Triples to add, serialized into buffer triples.
+ * @param[in] sspo Serialized buffer triple to add.
  */
 LSUP_rc
 LSUP_htstore_add_iter (LSUP_HTIterator *it, const LSUP_BufferTriple *sspo);
 
 
+/** @brief Free resources related to an add loop.
+ *
+ * @param[in] it Iterator to free.
+ */
 void
 LSUP_htstore_add_done (LSUP_HTIterator *it);
 
-
-LSUP_rc
-LSUP_htstore_remove(
-        LSUP_HTStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
-        const LSUP_Buffer *so, size_t *ct);
-
+/** @brief Find triples by pattern matching and return an iterator.
+ *
+ * The iterator may yield results by using #LSUP_htiter_next() and must be
+ * freed with #LSUP_htiter_free().
+ *
+ * @param[in] store Store to search in.
+ *
+ * @param[in] ss Serialized subject term. If NULL, the term is unbound.
+ *
+ * @param[in] sp Serialized predicate term. If NULL, the term is unbound.
+ *
+ * @param[in] so Serialized object term. If NULL, the term is unbound.
+ *
+ * @return Iterator for lookup results.
+ */
 LSUP_HTIterator *
-LSUP_htstore_lookup(
+LSUP_htstore_lookup (
         LSUP_HTStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
         const LSUP_Buffer *so);
 
-size_t
-LSUP_htstore_size (const LSUP_HTStore *ht);
 
-void
-LSUP_htiter_free (LSUP_HTIterator *it);
+/** @brief Find the next triple in a lookup and return the result.
+ *
+ * @param[in] it Iterator obtained from #LSUP_htstore_lookup().
+ *
+ * @param[out] spo Serialized triple pointer to be populated with the result,
+ *  if found.
+ *
+ * @return LSUP_OK if a result was found; LSUP_END if the end of the iterator
+ *  is reached.
+ */
+LSUP_rc
+LSUP_htiter_next (LSUP_HTIterator *it, LSUP_BufferTriple *sspo);
 
+
+/** @brief Count of lookup results or triples added in an iteration.
+ *
+ * @param[in] it Iterator handle.
+ *
+ * @return Number of results yielded, or triples added. at a certain point of
+ *  an iterator.
+ */
 size_t
-LSUP_htiter_cur (const LSUP_HTIterator *it);
+LSUP_htiter_count (const LSUP_HTIterator *it);
+
 
+/** @brief Remove triples by pattern matching.
+ *
+ * The search criteria are the same used for #LSUP_htstore_lookup().
+ *
+ * @param[in] store Store to remove triples from.
+ *
+ * @param[in] ss Serialized subject term. If NULL, the term is unbound.
+ *
+ * @param[in] sp Serialized predicate term. If NULL, the term is unbound.
+ *
+ * @param[in] so Serialized object term. If NULL, the term is unbound.
+ *
+ * @param[out] Optional pointer to a counter. If not NULL, it is populated with
+ *      the number of triples removed. It is undefined if LSUP_DB_ERR is
+ *      returned.
+ *
+ * @return LSUP_OK if any triples were deleted; LSUP_NOACTION if no triples
+ *      were found for deletion; <0 on error.
+ */
 LSUP_rc
-LSUP_htiter_next (LSUP_HTIterator *it, LSUP_BufferTriple *sspo);
+LSUP_htstore_remove (
+        LSUP_HTStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
+        const LSUP_Buffer *so, size_t *ct);
+
+void
+LSUP_htiter_free (LSUP_HTIterator *it);
 
 #endif  // _LSUP_STORE_HTABLE_H

+ 76 - 85
src/store_htable.c

@@ -22,12 +22,15 @@ typedef struct ht_store_t {
 
 typedef struct ht_iterator_t {
     HTStore *           store;      // Store being iterated.
-    size_t              i;          // Number of records found at any point of
+    size_t              cur;        // Internal has table cursor.
+    size_t              ct;         // Number of records found at any point of
                                     // a lookup iteration, or number of records
                                     // added at any point of an add loop.
     LSUP_Key            luk[3];     // 0÷3 lookup keys.
     LSUP_key_eq_fn_t    eq_fn;      // Equality function to test triples.
     int                 rc;         // Return code for *next* result.
+                                    // When the end of results is reached, this
+                                    // is set to LSUP_END.
     LSUP_TripleKey *    entry;      // Retrieved SPO key.
 } HTIterator;
 
@@ -149,12 +152,16 @@ static void htstore_idx_free_fn (void *item)
 
 /* * * Other prototypes. * * */
 
-inline static LSUP_rc tkey_to_strp (
+inline static LSUP_rc
+tkey_to_strp (
         const HTStore *store, const LSUP_Key *spok, LSUP_BufferTriple *sspo);
 
 static LSUP_rc
 htstore_add_key_iter (HTIterator *it, const LSUP_TripleKey spok);
 
+static LSUP_rc
+htiter_next_key (HTIterator *it, LSUP_TripleKey *spok);
+
 
 /* * * API * * */
 
@@ -287,6 +294,7 @@ LSUP_htstore_add_term (HTStore *store, const LSUP_Buffer *sterm)
     hashmap_set (
             store->idx, &(IndexEntry){
                 .key = tk,
+                // This shall be freed with the index hashmap.
                 .sterm = LSUP_buffer_new (sterm->size, sterm->addr)
             });
 
@@ -301,7 +309,7 @@ LSUP_htstore_add_init (HTStore *store)
     MALLOC_GUARD (it, NULL);
 
     it->store = store;
-    it->i = 0;
+    it->ct = 0;
 
     return it;
 }
@@ -316,7 +324,17 @@ LSUP_htstore_add_iter (HTIterator *it, const LSUP_BufferTriple *sspo)
         LSUP_buffer_hash (sspo->o),
     };
 
-    return htstore_add_key_iter (it, spok);
+    LSUP_rc rc = htstore_add_key_iter (it, spok);
+
+    if (rc != LSUP_OK) return rc;
+
+    for (int i = 0; i < 3; i++) {
+        rc = LSUP_htstore_add_term (it->store, LSUP_btriple_pos (sspo, i));
+        if (rc != LSUP_OK) return rc;
+    }
+
+    return rc;
+}
 
 
 void
@@ -329,33 +347,22 @@ LSUP_htstore_remove(
         LSUP_HTStore *store, const LSUP_Buffer *ss, const LSUP_Buffer *sp,
         const LSUP_Buffer *so,  size_t *ct)
 {
+    LSUP_rc rc = LSUP_NOACTION;
+
     LSUP_HTIterator *it = LSUP_htstore_lookup (store, ss, sp, so);
     if (UNLIKELY (!it)) return LSUP_DB_ERR;
 
-    if (ct) *ct = 0;
-
-    /*
-    void **entry;
-    size_t i = 0;
-    while (hashmap_iter (store->keys, &i, entry)) {
-        hashmap_delete (store->keys, *entry);
-    }
-    if (ct) *ct = i;
-    */
     LSUP_TripleKey *tmp;
-    HASH_ITER (hh, store->keys, it->entry, tmp) {
-        if (it->eq_fn (it->entry->key, it->luk)) {
-            HASH_DEL (store->keys, it->entry);
-            free (it->entry);
-
-            if (ct) (*ct)++;
+    while (htiter_next_key (it, tmp)) {
+        if (it->rc == LSUP_OK) {
+            tmp = hashmap_delete (store->keys, tmp);
+            free (tmp);
         }
     }
+    if (ct) *ct = it->ct;
 
     LSUP_htiter_free (it);
 
-    // TODO clean up orphan indices in separate (async, scheduled) function.
-
     return LSUP_OK;
 }
 
@@ -368,10 +375,11 @@ LSUP_htstore_lookup (HTStore *store, const LSUP_Buffer *ss,
     MALLOC_GUARD (it, NULL);
 
     it->store = store;
-    //it->cur = 0;
+    it->cur = 0;
+    it->ct = 0;
     it->rc = LSUP_END;
 
-    if (HASH_COUNT (store->keys) == 0) return it;
+    if (hashmap_count (store->keys) == 0) return it;
 
     LSUP_TripleKey spok = {
         LSUP_buffer_hash (ss),
@@ -401,7 +409,6 @@ LSUP_htstore_lookup (HTStore *store, const LSUP_Buffer *ss,
         } else {
             it->eq_fn = lookup_sk_eq_fn;
         }
-    //it->cur = 0;
 
     } else if (spok[1] != NULL_KEY) {
         it->luk[0] = spok[1];
@@ -422,9 +429,9 @@ LSUP_htstore_lookup (HTStore *store, const LSUP_Buffer *ss,
     // ? ? ?
     } else it->eq_fn = lookup_none_eq_fn;
 
-    it->entry = it->store->keys; // First record in hash table.
-    it->rc = it->entry == NULL ? LSUP_END : LSUP_OK;
-    it->i = 0;
+    // Position cursor at first record in hash table.
+    if (hashmap_iter (it->store->keys, &it->cur, (void **) it->entry))
+        it->rc = LSUP_OK;  // Else it's LSUP_END already.
 
     return it;
 }
@@ -436,8 +443,8 @@ LSUP_htiter_free (LSUP_HTIterator *it)
 
 
 size_t
-LSUP_htiter_cur (const LSUP_HTIterator *it)
-{ return it->i; }
+LSUP_htiter_count (const LSUP_HTIterator *it)
+{ return it->cur; }
 
 
 LSUP_rc
@@ -448,6 +455,7 @@ htiter_next_key (HTIterator *it, LSUP_TripleKey *spok)
     // If the previous iteration hit the end, return.
     if (it->rc != LSUP_OK) return it->rc;
 
+    // This value is for internal looping only. It shall never be returned.
     it->rc = LSUP_NORESULT;
     /*
 #ifdef DEBUG
@@ -462,33 +470,34 @@ htiter_next_key (HTIterator *it, LSUP_TripleKey *spok)
 #endif
     */
 
+    // Iteration resumes from lookup where first hashmap_iter was called.
     do {
-        if (!it->entry) it->rc = LSUP_END;
-
-        else {
-            if (it->eq_fn (*it->entry, it->luk)) {
-                log_trace (
-                    "Found spok: {%lx, %lx, %lx}",
-                    it->entry[0], it->entry[1], it->entry[2]
-                );
-                /*
+        if (!it->entry) return LSUP_END;
+
+        // Loop through all triples until a match is found, or end is reached.
+        if (it->eq_fn (*it->entry, it->luk)) {
+            log_trace (
+                "Found spok: {%lx, %lx, %lx}",
+                it->entry[0], it->entry[1], it->entry[2]
+            );
+            /*
 #ifdef DEBUG
-                IndexEntry *tmp = NULL;
-                HASH_FIND (hh, it->store->idx, it->entry->key + 0, KLEN, tmp);
-                LSUP_buffer_print(tmp->sterm);
-                HASH_FIND (hh, it->store->idx, it->entry->key + 1, KLEN, tmp);
-                LSUP_buffer_print(tmp->sterm);
-                HASH_FIND (hh, it->store->idx, it->entry->key + 2, KLEN, tmp);
-                LSUP_buffer_print(tmp->sterm);
+            IndexEntry *tmp = NULL;
+            HASH_FIND (hh, it->store->idx, it->entry->key + 0, KLEN, tmp);
+            LSUP_buffer_print(tmp->sterm);
+            HASH_FIND (hh, it->store->idx, it->entry->key + 1, KLEN, tmp);
+            LSUP_buffer_print(tmp->sterm);
+            HASH_FIND (hh, it->store->idx, it->entry->key + 2, KLEN, tmp);
+            LSUP_buffer_print(tmp->sterm);
 #endif
-                */
-
-                it->rc = LSUP_OK;
-                it->i++;
-            }
+            */
 
-            it->entry = it->entry->hh.next;
+            it->rc = LSUP_OK;
+            it->ct++;
         }
+        if (hashmap_iter (it->store->keys, &it->cur, (void **) it->entry))
+            it->rc = LSUP_OK;
+
     } while (it->rc == LSUP_NORESULT);
 
     return it->rc;
@@ -499,17 +508,16 @@ LSUP_rc
 LSUP_htiter_next (HTIterator *it, LSUP_BufferTriple *sspo)
 {
     LSUP_TripleKey *cur;
-    while (htiter_next_key (it, cur) {
-            tkey_to_strp (it->store, it->entry->key, sspo);
-            if (!sspo->s || !sspo->p || !sspo->o) return LSUP_DB_ERR;
-    }
+    LSUP_rc rc = htiter_next_key (it, cur);
+    if (rc != LSUP_OK) return rc;
 
+    return tkey_to_strp (it->store, *it->entry, sspo);
 }
 
 
 /* * * Statics * * */
 
-static inline LSUP_rc
+inline static LSUP_rc
 tkey_to_strp (
         const HTStore *store, const LSUP_Key *spok, LSUP_BufferTriple *sspo)
 {
@@ -517,15 +525,15 @@ tkey_to_strp (
     IndexEntry *tmp;
 
     tmp = hashmap_get (store->idx, spok + 0);
-    if (UNLIKELY (!tmp)) return LSUP_ERROR;
+    if (UNLIKELY (!tmp)) return LSUP_DB_ERR;
     sspo->s = tmp->sterm;
 
     tmp = hashmap_get (store->idx, spok + 1);
-    if (UNLIKELY (!tmp)) return LSUP_ERROR;
+    if (UNLIKELY (!tmp)) return LSUP_DB_ERR;
     sspo->p = tmp->sterm;
 
     tmp = hashmap_get (store->idx, spok + 2);
-    if (UNLIKELY (!tmp)) return LSUP_ERROR;
+    if (UNLIKELY (!tmp)) return LSUP_DB_ERR;
     sspo->o = tmp->sterm;
 
     return LSUP_OK;
@@ -538,34 +546,17 @@ htstore_add_key_iter (HTIterator *it, const LSUP_TripleKey spok)
     // Add triple.
     log_trace ("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
 
-    TripleEntry *k_ins = NULL;
-    HASH_FIND (hh, it->store->keys, spok, TRP_KLEN, k_ins);
-    if (k_ins == NULL) {
-        log_trace ("Triple not found, inserting.");
-        MALLOC_GUARD (k_ins, LSUP_MEM_ERR);
-
-        memcpy (k_ins->key, spok, TRP_KLEN);
-        HASH_ADD (hh, it->store->keys, key, TRP_KLEN, k_ins);
-
-        it->i++;
-    } else {
-        log_trace ("Triple found. Skipping.");
+    if (hashmap_get (it->store->keys, &spok)) {
+        log_trace ("Triple found. Not adding.");
         return LSUP_NOACTION;
     }
 
-    // Add terms to index. Terms are copied.
-    for (int i = 0; i < 3; i++) {
-        IndexEntry *ins = NULL;
-        HASH_FIND (hh, it->store->idx, spok + i, KLEN, ins);
-        if (ins == NULL) {
-            MALLOC_GUARD (ins, LSUP_MEM_ERR);
-            ins->key = spok[i];
-            ins->sterm = LSUP_buffer_new (
-                    (LSUP_btriple_pos (sspo, i))->size,
-                    (LSUP_btriple_pos (sspo, i))->addr);
-            HASH_ADD (hh, it->store->idx, key, KLEN, ins);
-        }
-    }
+    log_trace ("Triple not found, inserting.");
+
+    hashmap_set (it->store->keys, &spok);
+    if (hashmap_oom(it->store->keys)) return LSUP_MEM_ERR;
+
+    it->ct++;
 
     return LSUP_OK;
 }

+ 34 - 21
src/store_mdb.c

@@ -6,7 +6,7 @@
 #include "data/bootstrap.h"
 
 /**
- * Number of DBs defined.
+ * Number of DBs defined. See MAIN_TABLE and LOOKUP_TABLE defines below.
  */
 #define N_DB 13
 
@@ -32,7 +32,6 @@
 
 typedef char DbLabel[8];
 
-// TODO Most of these are no longer used. Clean up.
 typedef enum {
     LSSTORE_INIT         = 1, // Is the store environment set up on disk?
     LSSTORE_OPEN         = 3, // Is the environment open? Assumes init is set.
@@ -84,6 +83,7 @@ typedef struct mdbstore_iter_t {
 } MDBIterator;
 
 
+/*
 // Set of single keys.
 typedef struct key_set_t {
     LSUP_Key            key;
@@ -96,11 +96,13 @@ typedef struct triple_set_t {
     UT_hash_handle      hh;
 } TripleSet;
 
-// Map of context to triple set.
 typedef struct ctx_triple_map_t {
     LSUP_Key            ck;
-    TripleSet *         spok;
+    struct hashmap *    spok;
 } CtxTripleMap;
+*/
+// Set of single keys.
+typedef struct hashmap KeySet;
 
 /*
  * Static variables.
@@ -209,7 +211,8 @@ static int index_triple(
         LSUP_MDBStore *store, StoreOp op, LSUP_TripleKey spok, LSUP_Key ck);
 
 inline static LSUP_rc lookup_0bound (MDBIterator *it, size_t *ct);
-inline static LSUP_rc lookup_1bound (uint8_t idx0, MDBIterator *it, size_t *ct);
+inline static LSUP_rc lookup_1bound (
+        uint8_t idx0, MDBIterator *it, size_t *ct);
 inline static LSUP_rc lookup_2bound (
         uint8_t idx0, uint8_t idx1, MDBIterator *it, size_t *ct);
 inline static LSUP_rc lookup_3bound(MDBIterator *it, size_t *ct);
@@ -452,7 +455,8 @@ LSUP_mdbstore_add_iter (MDBIterator *it, const LSUP_BufferTriple *sspo)
                 &it->key, &it->data, MDB_NOOVERWRITE);
         if (db_rc != MDB_SUCCESS && db_rc != MDB_KEYEXIST) {
             log_error (
-                    "MDB error while inserting term: %s", LSUP_strerror(db_rc));
+                    "MDB error while inserting term: %s",
+                    LSUP_strerror(db_rc));
             return LSUP_DB_ERR;
         }
     }
@@ -669,11 +673,11 @@ LSUP_mdbstore_lookup(
 
 /** @brief Get next iterator key.
  *
- * The ck pointer is filled with an array of contexts that the triple appears
+ * ckset is filled with an array of contexts that the triple appears
  * in, if not NULL.
  */
 inline static LSUP_rc
-mdbiter_next_key (LSUP_MDBIterator *it, KeySet **ck_p)
+mdbiter_next_key (LSUP_MDBIterator *it, KeySet *ckset)
 {
     if (UNLIKELY (!it)) return LSUP_VALUE_ERR;
 
@@ -736,17 +740,15 @@ mdbiter_next_key (LSUP_MDBIterator *it, KeySet **ck_p)
 
     // Get all contexts for a triple if requested. Add up to previous
     // iterations if the same pointer is passed.
-    if (ck_p) {
+    if (ckset) {
         key.mv_data = it->spok;
         db_rc = mdb_cursor_get (it->ctx_cur, &key, &data, MDB_SET_KEY);
         if (db_rc == MDB_SUCCESS) {
             do {
-                KeySet *entry;
-                HASH_FIND (hh, *ck_p, data.mv_data, KLEN, entry);
-                if (!entry) {
-                    MALLOC_GUARD (entry, LSUP_MEM_ERR);
-                    entry->key = *(LSUP_Key *) data.mv_data;
-                    HASH_ADD (hh, *ck_p, key, KLEN, entry);
+                LSUP_Key entry;
+                if (!hashmap_get (ckset, data.mv_data)) {
+                    entry = *(LSUP_Key *) data.mv_data;
+                    hashmap_set (ckset, &entry);
                 }
             } while (
                     mdb_cursor_get (it->ctx_cur, &key, &data, MDB_NEXT_DUP)
@@ -763,9 +765,15 @@ LSUP_mdbiter_next (
         LSUP_MDBIterator *it, LSUP_BufferTriple *sspo, LSUP_Buffer **ctx_p)
 {
     LSUP_rc rc;
-    KeySet *ck = NULL;
 
-    rc = (ctx_p) ? mdbiter_next_key (it, &ck) : mdbiter_next_key (it, NULL);
+    KeySet *ckset = NULL;
+    if (ctx_p) {
+        KeySet *ckset = hashmap_new (
+                sizeof (LSUP_Key), 0, LSUP_HASH_SEED, 0,
+                key_hash_fn, key_cmp_fn, NULL, NULL);
+        if (UNLIKELY (hashmap_oom (ckset))) return LSUP_MEM_ERR;
+    }
+    rc = mdbiter_next_key (it, ckset);
 
     if (rc == LSUP_OK) {
         if (sspo) {
@@ -778,15 +786,20 @@ LSUP_mdbiter_next (
 
         // One-shot contexts for current triple.
         if (ctx_p) {
-            KeySet *ccur;
-            LSUP_Buffer *ctx = malloc (HASH_COUNT (ck) * sizeof (*ctx));
+            LSUP_Key *ck;
+            // 1 extra slot for sentinel.
+            LSUP_Buffer *ctx = calloc (
+                    (hashmap_count (ckset) + 1), sizeof (*ctx));
+            if (!ctx) return LSUP_MEM_ERR;
+
             size_t i = 0;
-            for (ccur = ck; ccur != NULL; ccur = ccur->hh.next)
-                key_to_sterm (it, ccur->key, ctx + (i++));
+            while (hashmap_iter (ckset, &i, (void **) &ck))
+                key_to_sterm (it, *ck, ctx + i);
 
             // TODO error handling.
         }
     }
+    if (ckset) hashmap_free (ckset);
 
     return rc;
 }