瀏覽代碼

Iniital pass at store separation:

* Complete separation of memory and MDB back ends.
* Move most graph operation to back ends.
* Make signature of factory functions consistent (LMDB style).
* Add triple methods.
* Stop using LSUP_SerTerm.
* Many more small changes.
Stefano Cossu 3 年之前
父節點
當前提交
777d17e215
共有 14 個文件被更改,包括 1432 次插入982 次删除
  1. 9 0
      include/core.h
  2. 158 52
      include/graph.h
  3. 67 39
      include/htable.h
  4. 74 0
      include/store_htable.h
  5. 91 23
      include/store_mdb.h
  6. 22 45
      include/term.h
  7. 61 9
      include/triple.h
  8. 1 1
      src/buffer.c
  9. 293 368
      src/graph.c
  10. 109 138
      src/htable.c
  11. 236 108
      src/store_htable.c
  12. 203 166
      src/store_mdb.c
  13. 53 29
      src/term.c
  14. 55 4
      src/triple.c

+ 9 - 0
include/core.h

@@ -64,8 +64,17 @@ typedef enum {
     LSUP_VALUE_ERR      = -88803,
     LSUP_TXN_ERR        = -88804,
     LSUP_DB_ERR         = -88805,
+    LSUP_NOT_IMPL_ERR   = -88806,
 } LSUP_rc;
 
+typedef enum {
+    LSUP_BOOL_UNION,
+    LSUP_BOOL_SUBTRACTION,
+    LSUP_BOOL_INTERSECTION,
+    LSUP_BOOL_XOR,
+} LSUP_bool_op;
+
+
 typedef size_t LSUP_Key;
 typedef LSUP_Key LSUP_DoubleKey[2];
 typedef LSUP_Key LSUP_TripleKey[3];

+ 158 - 52
include/graph.h

@@ -1,7 +1,8 @@
 #ifndef _LSUP_GRAPH_H
 #define _LSUP_GRAPH_H
 
-#include "triple.h"
+#include "store_htable.h"
+#include "store_mdb.h"
 
 
 typedef enum LSUP_store_type {
@@ -9,87 +10,195 @@ typedef enum LSUP_store_type {
     LSUP_STORE_MDB
 } LSUP_store_type;
 
+
+/** @brief Graph object.
+ */
 typedef struct Graph LSUP_Graph;
 
+/** @brief Graph iterator.
+ *
+ * This opaque handle is generated by #LSUP_graph_lookup and is used to iterate
+ * over lookup results with #LSUP_graph_iter_next. It must be freed with
+ * #LSUP_graph_iter_free when done.
+ */
+typedef struct GraphIterator LSUP_GraphIterator;
 
-/**
- * Post-lookup callback type.
+
+/** @brief Create an empty graph.
  *
- * src is the graph that yielded a match. Its index ponts at the matched triple
- *  key and is accessible via `keyset_peek(ks)`.
+ * The new graph has zero capacity and a random URN. To change either one, use
+ * #LSUP_graph_resize and #LSUP_graph_set_uri, respectively.
  *
- * dest is an optional keyset that may be acted upon. It may be NULL.
+ * @param store_type[in] TYpe of store for the graph. One of the values of
+ *  #LSUP_store_type.
  *
- * cur is the cursor pointing to the matching record in the source.
+ * @param gr[out] Pointer to a pointer to the new graph. It must be freed with
+ *  #LSUP_graph_free when done.
  *
- * ctx is an optional arbitrary pointer to additional data that may be used
- *  by the callback.
-*/
-typedef int (*keyset_match_fn_t)(
-        LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
-        void *ctx);
+ * @return LSUP_OK if the graph was created, or < 0 if an error occurred.
+ */
+LSUP_rc
+LSUP_graph_new(const LSUP_store_type store_type, LSUP_Graph **gr);
 
 
-int
-LSUP_graph_init(
-        LSUP_Graph *gr, size_t capacity, char *uri_str,
-        LSUP_store_type store_type);
+/** @brief copy a graph into a new one.
+ *
+ * The new graph is compacted to the minimum required size.
+ *
+ * src[in] Graph to be copied.
+ *
+ * @param uri URI of the destination graph. If NULL, a UUID4 URN is generated.
+ *
+ * @param gr[out] Pointer to a pointer to the destination graph. It must be
+ *  freed with #LSUP_graph_free when done.
+ *
+ * @return LSUP_OK if the graph was copied, or < 0 if an error occurred.
+ */
+LSUP_rc
+LSUP_graph_copy(const LSUP_Graph *src, LSUP_Graph **dest);
+
+
+/** Perform a boolean operation between two graphs.
+ *
+ * This method yields a new graph as the result of the operation.
+ *
+ * @param op[in] Operation to perform. One of #LSUP_bool_op.
+ *
+ * @param gr1[in] First operand.
+ *
+ * @param gr2[in] Second operand.
+ *
+ * @param res[out] Result graph. It must be freed with #LSUP_graph_free when
+ *  done.
+ */
+LSUP_rc
+LSUP_graph_bool_op(
+        const LSUP_bool_op op, const LSUP_Graph *gr1, const LSUP_Graph *gr2,
+        LSUP_Graph **res);
 
-LSUP_Graph *
-LSUP_graph_new(size_t capacity, char *uri_str, LSUP_store_type store_type);
 
-// TODO Make src const; invert operands.
-int
-LSUP_graph_copy(LSUP_Graph *dest, LSUP_Graph *src);
+/** @brief Free a graph.
+ */
+void
+LSUP_graph_free(LSUP_Graph *gr);
 
-int
-LSUP_graph_resize(LSUP_Graph *gr, size_t size);
 
+/** @brief Number of triples that can be stored without resizing the graph.
+ *
+ * @return Dynamic capacity of an in-memory graph or maximum allowed memory for
+ *  an MDB graph.
+ */
 size_t
 LSUP_graph_capacity(const LSUP_Graph *gr);
 
+
+/** @brief Number of triples in a graph.
+ */
 size_t
 LSUP_graph_size(const LSUP_Graph *gr);
 
+
+/** @brief Change the capacity of an in-memory graph.
+ *
+ * This is useful ahead of a bulk load to save multiple reallocs. Otherwise,
+ * the graph expands automatically on new inserts when capacity is reached.
+ *
+ * @param gr[in] Graph to be resized.
+ *
+ * @param size[in] New size. This will never be smaller than the current
+ *  occupied space. Therefore setting this value to 0 effectively compacts the
+ *  graph storage.
+ *
+ * @return LSUP_OK if the operation was successful; LSUP_VALUE_ERR if the store
+ *  type of the graph is not LSUP_STORE_MEM; <0 if an error occurs while
+ *  resizing.
+ */
+LSUP_rc
+LSUP_graph_resize(LSUP_Graph *gr, size_t size);
+
+
+/** @brief Read-only graph URI.
+ *
+ * To change the graph URI, use #LSUP_graph_set_uri.
+ */
 LSUP_Term *
 LSUP_graph_uri(const LSUP_Graph *gr);
 
+
+/** Set the URI of a graph.
+ *
+ * @param gr[in] Graph whose URI is to be changed.
+ *
+ * @param uri[in] New URI as a string. If NULL, a UUID4 URN is generated.
+ */
+LSUP_rc
+LSUP_graph_set_uri(LSUP_Graph *gr, const char *uri);
+
 bool
-LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *t);
+LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo);
 
 
-/**
- * Execute a custom function on a graph based on a match pattern.
+/** @brief Add triples and/or serialized triples to a graph.
+ *
+ * For API users it may be more convenient to use the more specialized
+ * #LSUP_graph_add_trp.
+ */
+LSUP_rc
+LSUP_graph_add(
+        LSUP_Graph *gr,
+        const LSUP_Triple trp[], size_t trp_ct,
+        const LSUP_SerTriple strp[], size_t strp_ct);
+
+#define LSUP_graph_add_trp(gr, trp, ct) LSUP_graph_add(gr, trp, ct, NULL, 0)
+
+
+/** @brief Delete triples by a matching pattern.
  *
- * This function executes an arbitrary callback on a graph, `res`, based on
- * triples matched by a pattern on graph `gr`. `res` must be initialized but
- * need not be empty. `res` can point to the same object as `gr` if changes
- * are to be done in place (e.g. removing triples).
+ * @param gr[in] Graph to delete triples from.
  *
- * @param[in] gr Graph to perform pattern matching.
- * @param[out] res Result graph to apply the callback to.
- * @param[in] spo Triple pattern. Each term of the triple members can be either
- *  a term pointer or NULL. If NULL, the term is unbound.
- * @param[in] callback_fn Callback function to apply.
- * @param[in] match_cond If true, apply the callback to each triple a match is
- *  found for. Otherwise, apply to each triple no match is found for.
- * @param[in|out] ctx Arbitrary context that may be handled in the callback
- *  function.
+ * @param ptn[in] Matching pattern. Any and all of s, p, o can be NULL.
  *
- * @return LSUP_OK on match, LSUP_NOACTION on no match, <0 on error.
+ * @param ct[out] If not NULL it is populated with the number of triples
+ *  deleted.
  */
-int LSUP_graph_match_callback(
-        LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo,
-        keyset_match_fn_t callback_fn, bool match_cond, void *ctx);
+LSUP_rc
+LSUP_graph_remove(LSUP_Graph *gr, const LSUP_Triple *spo, size_t *ct);
 
 
-/**
- * Add triples to a graph.
+/** Look up triples by a matching pattern and yield an iterator.
+ *
+ * @param gr[in] Graph to look up.
+ *
+ * @param spo[in] Triple to look for. Any and all terms can be NULL, which
+ *  indicate unbound terms.
+ *
+ * @param it[out] Pointer to a #LSUP_GraphIterator to be generated. It must be
+ *  freed with #LSUP_graph_iter_free after use.
  */
-int
-LSUP_graph_add(LSUP_Graph *gr, const LSUP_Triple data[], size_t data_size);
+LSUP_rc
+LSUP_graph_lookup(
+        const LSUP_Graph *gr, const LSUP_Triple *spo,
+        LSUP_GraphIterator **it_p);
+
+
+/** @brief Advance a cursor obtained by a lookup and return a matching triple.
+ *
+ * @param it[in] Iterator handle obtained through #LSUP_graph_lookup.
+ *
+ * @param spo[out] Triple to be populated with the next result.
+ *
+ * @return LSUP_OK if a result was found; LSUP_END if the end of the match list
+ *  was reached.
+ */
+LSUP_rc
+LSUP_graph_iter_next(LSUP_GraphIterator *it, LSUP_Triple *spo);
+
+
+/** @brief Free a graph iterator.
+ */
+void
+LSUP_graph_iter_free(LSUP_GraphIterator *it);
 
-int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo);
 
 /**
  * Set-theoretical union (gr1 ∪ gr2).
@@ -119,7 +228,4 @@ int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res);
  */
 int LSUP_graph_xor(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res);
 
-void
-LSUP_graph_free(LSUP_Graph *gr);
-
 #endif

+ 67 - 39
include/htable.h

@@ -1,19 +1,19 @@
 /**
  * Hash table implementation.
  *
- * This code is hack...ahem, built upon Klib:
- * https://github.com/attractivechaos/klib/blob/master/khash.h
+ * This code is hack...ahem, built upon rhashmap:
+ * https://github.com/rmind/rhashmap
  *
  * After trying several hash map implementations, none met all the requirements
- * (small, single-file; accept arbitrarily-sized elements; not an unsightly
- * macro mess; reasonably fast), so I decided to expand a KLib macro and adapt
- * it to a data type agnostic model.
+ * (small, single-file; accept arbitrarily-sized elements; no undebuggable
+ * macro spaghetti; reasonably fast), so I decided to expand an existing
+ * library and adapt it to a data type agnostic model.
  *
- * This table stores keys and optionally values as unspecified null pointers of
+ * This table stores keys and optionally values in a contiguous array of
  * arbitrary, but fixed, data sizes. For small keys / values of unusual size,
  * this is convenient because it avoids creating (and having to manage) a
- * pointer for each key and value. Data are all stored inline. The data types
- * are set by casting on retrieval.
+ * pointer for each key and value. The data types are set by casting on
+ * retrieval.
  *
  * For larger or variably-sized keys or values, or ones that are not convenient
  * to copy into the table, pointers can obviously be used by specifying ptr_t
@@ -25,12 +25,32 @@
 
 #include "core.h"
 
-// Max number of entries in the table. With HTABLE_BIG_SIZE, it is SIZE_MAX.
-// Otherwise, UINT_MAX (4,294,967,295).
-#ifdef HTABLE_BIG_SIZE
+/* Max number of entries in the table and hash size. */
+
+/*
+ * This allows a table size limited to size_t, which is probably much more than
+ * any current system would want to handle in memory.
+ */
+#if defined(HTABLE_HUGE_SIZE)
+typedef size_t ht_hash_t;
 typedef size_t htsize_t;
 #define HTSIZE_MAX SIZE_MAX
+
+/*
+ * This allows max UINT_MAX entries (4,294,967,295) and a large hash size to
+ * take full advantage of a very large table.
+ */
+#elif defined(HTABLE_BIG_SIZE)
+typedef size_t ht_hash_t;
+typedef uint32_t htsize_t;
+#define HTSIZE_MAX UINT32_MAX
+
+/*
+ * This allows max UINT_MAX entries but the hash size is smaller, thus it is
+ * only recommended for up to a few million entries.
+ */
 #else
+typedef uint32_t ht_hash_t;
 typedef uint32_t htsize_t;
 #define HTSIZE_MAX UINT32_MAX
 #endif
@@ -43,7 +63,7 @@ typedef uint16_t ksize_t;
 typedef uint8_t ksize_t;
 #endif
 
-// Size of value entries. With HTABLE_BIG_KEY it is 65535 (64Kb). Otherwise,
+// Size of value entries. With HTABLE_BIG_VAL it is 65535 (64Kb). Otherwise,
 // it is 256 bytes. For values that may be larger than 64 Kb, use pointers.
 #ifdef HTABLE_BIG_VAL
 typedef uint16_t vsize_t;
@@ -52,12 +72,6 @@ typedef uint8_t vsize_t;
 #endif
 
 
-typedef enum {
-    HTABLE_NOCOPY           = 1 << 0,
-    HTABLE_IS_SET           = 1 << 1,
-} LSUP_HTFlag;
-
-
 /**
  * Key hashing function.
  *
@@ -77,26 +91,39 @@ typedef bool (*key_eq_fn_t)(const void *a, const void *b, ksize_t size);
 /**
  * Hash table type.
  *
- * Supports up to UINT_MAX entries (~4 billions on most modern machines).
+ * By default it should keep a good performance up to a few million entries
+ * due to its small hash size.
+ *
+ * If compiled with -DHTABLE_BIG_SIZE it supports up to UINT_MAX entries (~4
+ * billions on most modern machines) for very large in-memory graphs.
  *
- * If compiled with -DHTABLE_BIG_SIZE it supports up to size_t entries
- * for extremely large in-memory graphs.
+ * If compiled with -DHTABLE_HUGE_SIZE it supports up to SIZE_MAX entries
+ * (probably more then you will ever want to load in memory).
  */
 typedef struct htable_t LSUP_HTable;
 
-extern LSUP_HTable *LSUP_htable_new(
+LSUP_rc
+LSUP_htable_new(
         htsize_t size, ksize_t ksize, vsize_t vsize,
-        key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, unsigned flags);
+        key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, LSUP_HTable **ht);
 
-extern int LSUP_htable_resize(LSUP_HTable *ht, htsize_t newsize);
+LSUP_rc
+LSUP_htable_copy(const LSUP_HTable *src, LSUP_HTable **dest);
 
-extern htsize_t LSUP_htable_capacity(LSUP_HTable *ht);
+LSUP_rc
+LSUP_htable_resize(LSUP_HTable *ht, htsize_t newsize);
 
-extern htsize_t LSUP_htable_size(LSUP_HTable *ht);
+htsize_t
+LSUP_htable_capacity(LSUP_HTable *ht);
 
-extern int LSUP_htable_insert(LSUP_HTable *ht, const void *key, void *val);
+htsize_t
+LSUP_htable_size(LSUP_HTable *ht);
 
-extern int LSUP_htable_put(LSUP_HTable *ht, const void *key, void *val);
+LSUP_rc
+LSUP_htable_insert(LSUP_HTable *ht, const void *key, const void *val);
+
+LSUP_rc
+LSUP_htable_put(LSUP_HTable *ht, const void *key, const void *val);
 
 /**
  * @brief Test the existence of a given key and find its value.
@@ -106,14 +133,15 @@ extern int LSUP_htable_put(LSUP_HTable *ht, const void *key, void *val);
  * @param const void *key[in]: Key to look up.
  *
  * @param void *val[out]: Pointer to be set to the address of the value found
- * at the key address, if any. If NULL is passed, or if the hash table is a
- * set, the value is never populated.
+ * at the key address, if any. The memory pointed to is owned by the hash
+ * table. If NULL is passed, or if the hash table is a set, the value is never
+ * populated.
  *
  * @return int: LSUP_OK if the key is found; LSUP_NORESULT if the key is not
  *  found; a negative value on error.
  */
-extern int LSUP_htable_get(
-        const LSUP_HTable *ht, const void *key, void **valp);
+LSUP_rc
+LSUP_htable_get(const LSUP_HTable *ht, const void *key, void **val);
 
 /*
  * Remove the given key.
@@ -126,7 +154,8 @@ extern int LSUP_htable_get(
  *  found.
  *
  */
-extern int LSUP_htable_del(LSUP_HTable *ht, const void *key);
+LSUP_rc
+LSUP_htable_remove(LSUP_HTable *ht, const void *key);
 
 /**
  * Iterate over a hashmap or set.
@@ -147,16 +176,15 @@ extern int LSUP_htable_del(LSUP_HTable *ht, const void *key);
  * @return int: LSUP_OK if the key is found; LSUP_END if the end of the data
  *  is reached.
  */
-extern int LSUP_htable_iter(
-        LSUP_HTable *ht, htsize_t *cur, void **keyp, void **valp);
+LSUP_rc
+LSUP_htable_iter(LSUP_HTable *ht, htsize_t *cur, void **keyp, void **valp);
 
 /*
  * Free the memory used by the hash table.
  *
- * => It is the responsibility of the caller to remove elements if needed.
+ * It is the responsibility of the caller to free data pointed to if pointers
+ * were used for keys or values.
  */
-extern void LSUP_htable_done(LSUP_HTable *ht);
-
-extern void LSUP_htable_free(LSUP_HTable *ht);
+void LSUP_htable_free(LSUP_HTable *ht);
 
 #endif

+ 74 - 0
include/store_htable.h

@@ -1,9 +1,83 @@
+/** @file store.htable.h
+ *
+ * @brief Simple in-memory triple store back end based on hash tables.
+ *
+ * This is the simplest choice to do in-memory manipulation of RDF graphs and
+ * it has some limitations: most notably, it only supports triples without
+ * context (one graph per store) and it is not indexed. This means that it is
+ * optimized for fast writes and sequential lookups (iteration). Lookups on
+ * arbitrary terms are supported but require iterating over all the triples.
+ * This implementation is most convenient for graphs where retrieval is done
+ * via iteration.
+ *
+ * Also, as it may be obvious, this store is not persistent.
+ *
+ * For faster random lookups and persistence, the MDB backend is preferred. If
+ * persistence is not required (e.g. ingesting and manipulating a very large
+ * graph and outputting some results on a file) an ad-hoc MDB store located in
+ * RAM disk can be used, which is much faster.
+ */
 #ifndef _LSUP_STORE_HTABLE_H
 #define _LSUP_STORE_HTABLE_H
 
 #include "triple.h"
 #include "htable.h"
 
+
 typedef struct HTStore LSUP_HTStore;
+typedef struct HTIterator LSUP_HTIterator;
+
+LSUP_rc
+LSUP_htstore_new(size_t capacity, LSUP_HTStore **ht_p);
+
+void
+LSUP_htstore_free(LSUP_HTStore *ht);
+
+LSUP_rc
+LSUP_htstore_resize(LSUP_HTStore *ht, htsize_t size);
+
+LSUP_rc
+LSUP_htstore_add(LSUP_HTStore *store, const LSUP_SerTriple *sspo);
+
+LSUP_rc
+LSUP_htstore_remove(
+        LSUP_HTStore *store, const LSUP_SerTriple *sspo, size_t *ct);
+
+LSUP_rc
+LSUP_htstore_lookup(
+        LSUP_HTStore *store, const LSUP_SerTriple *sspo,
+        LSUP_HTIterator **it_p, size_t *ct);
+
+htsize_t
+LSUP_htstore_size(LSUP_HTStore *ht);
+
+htsize_t
+LSUP_htstore_capacity(const LSUP_HTStore *ht);
+
+LSUP_rc
+LSUP_htiter_next(LSUP_HTIterator *it, LSUP_SerTriple *sspo);
+
+void
+LSUP_htiter_free(LSUP_HTIterator *it);
+
+
+/** @brief Boolean operation on hash table triples.
+ *
+ * The resulting store is compacted to the minimum size necessary to hold all
+ * results.
+ *
+ * @param op[in] Operation type. See #LSUP_bool_op
+ *
+ * @param s1[in] First store.
+ *
+ * @param s2[in] Second store.
+ *
+ * @param dest[out] Destination store. It must be freed with #LSUP_htstore_free
+ *  after use.
+ */
+LSUP_rc
+LSUP_htstore_bool_op(
+        const LSUP_bool_op op, const LSUP_HTStore *s1, const LSUP_HTStore *s2,
+        LSUP_HTStore **dest);
 
 #endif  // _LSUP_STORE_HTABLE_H

+ 91 - 23
include/store_mdb.h

@@ -50,12 +50,12 @@ typedef LSUP_rc (*store_match_fn_t)(const LSUP_TripleKey spok, void *data);
  *  in which case it will be set either to the environment variable
  *  LSUP_STORE_PATH, or if that is not set, a default local path.
  */
-LSUP_rc LSUP_store_setup(char **path/*, bool clear*/);
+LSUP_rc LSUP_mdbstore_setup(char **path/*, bool clear*/);
 
 
 /** @brief Open an MDB store.
  *
- * The store must have been set up with #LSUP_store_setup.
+ * The store must have been set up with #LSUP_mdbstore_setup.
  *
  * Some environment variables affect various store parameters:
  *
@@ -66,14 +66,16 @@ LSUP_rc LSUP_store_setup(char **path/*, bool clear*/);
  * @param[in,out] store Uninitialized store struct pointer.
  *
  * @param[in] path MDB environment path. This must be the path given by
- * #LSUP_store_setup.
+ * #LSUP_mdbstore_setup.
  *
  * @param[in] default_ctx Serialized URI to be used as a default context for
  *  triples inserted without a context specified. If NULL, the store operates
  *  in triple mode.
  */
-LSUP_MDBStore *
-LSUP_store_new(const char *path, const LSUP_Buffer *default_ctx);
+LSUP_rc
+LSUP_mdbstore_new(
+        const char *path, const LSUP_Buffer *default_ctx,
+        LSUP_MDBStore **store);
 
 
 /** @brief Close a store and free its handle.
@@ -81,7 +83,7 @@ LSUP_store_new(const char *path, const LSUP_Buffer *default_ctx);
  * @param[in] store Store pointer.
  *
  */
-void LSUP_store_free(LSUP_MDBStore *store);
+void LSUP_mdbstore_free(LSUP_MDBStore *store);
 
 
 /** @brief Print stats about a store and its databases.
@@ -90,7 +92,7 @@ void LSUP_store_free(LSUP_MDBStore *store);
  *
  * @param store[in] The store to get stats for.
  */
-LSUP_rc LSUP_store_stats(LSUP_MDBStore *store);
+LSUP_rc LSUP_mdbstore_stats(LSUP_MDBStore *store);
 
 
 /** @brief Store size.
@@ -99,10 +101,14 @@ LSUP_rc LSUP_store_stats(LSUP_MDBStore *store);
  *
  * @return Number of stored SPO triples across all contexts.
  */
-size_t LSUP_store_size(LSUP_MDBStore *store);
+size_t LSUP_mdbstore_size(LSUP_MDBStore *store);
 
 
-/** @brief Add a batch of triples with optional context to the store.
+/** @brief Initialize bulk triple load.
+ *
+ * This is the first step of a bulk load. It is best used when the data at hand
+ * need to be pre-processed, which can be done in the same loop as the next
+ * step to keep memory usage low.
  *
  * @param store[in] The store to add to.
  *
@@ -110,15 +116,72 @@ size_t LSUP_store_size(LSUP_MDBStore *store);
  *  default context is not NULL, triples will be added to the default context
  *  for the store, If the default context for the store is NULL, regardless of
  *  the value of sc, triples will be added with no context.
+ *
+ * @param it[out] Pointer to an iterator pointer to be passed to the following
+ *  load steps.
+ */
+void
+LSUP_mdbstore_add_init(
+        LSUP_MDBStore *store, const LSUP_Buffer *sc, LSUP_MDBIterator **it);
+
+
+/** @brief Add one triple into the store.
+ *
+ * This must be called after #LSUP_mdbstore_add_init, using the iterator
+ * yielded by that function. It may be called multiple times and must be
+ * followed by #LSUP_mdbstore_add_done.
+ *
+ * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
+ *
+ * @param sspo[in] Serialized triple to be added.
+ */
+LSUP_rc
+LSUP_mdbstore_add_iter(struct MDBIterator *it, const LSUP_SerTriple *sspo);
+
+
+/** @brief Finalize an add loop.
+ *
+ * This must be called after #LSUP_mdbstore_add_iter.
+ *
+ * @param it[in] Iterator obtained by #LSUP_mdbstore_add_init.
+ *
+ * @param inserted[out] If not NULL this is populated with the number of
+ *  triples effectively inserted.
+ */
+LSUP_rc
+LSUP_mdbstore_add_done(LSUP_MDBIterator *it, size_t *inserted);
+
+
+/** @brief Add a batch of triples with optional context to the store.
+ *
+ * This is a shortcut for calling #LSUP_mdbstore_add_init,
+ * #LSUP_mdbstore_add_iter and #LSUP_mdbstore_add_done in a sequence
+ * when an array of pre-serialized triples is available.
+ *
+ * @param store[in] The store to add to.
+ *
+ * @param sc[in] Context as a serialized term. If this is NULL, and the
+ *  default context is not NULL, triples will be added to the default context
+ *  for the store. If the default context for the store is NULL, regardless of
+ *  the value of sc, triples will be added with no context.
 
  * @param data[in] Triples to be inserted as a 2D array of triples in the shape
  * of data[n][3], where n is the value of data_size.
  *
+ * @param inserted[out] If not NULL, it will be filled with the count of
+ *  effectively inserted triples.
+ *
  * @param data_size[in] Number of triples to be inserted.
  */
-LSUP_rc LSUP_store_add(
+LSUP_rc LSUP_mdbstore_add(
         struct MDBStore *store, const LSUP_Buffer *sc,
-        const LSUP_SerTriple *data, const size_t data_size);
+        const LSUP_SerTriple strp[], const size_t ct, size_t *inserted);
+
+
+LSUP_rc
+LSUP_mdbstore_remove(
+        LSUP_MDBStore *store, const LSUP_SerTriple *sspo,
+        const LSUP_Buffer *sc, size_t *ct);
 
 
 /** @brief Look up matching triples and optional context.
@@ -128,15 +191,20 @@ LSUP_rc LSUP_store_add(
  *
  * @param store[in] The store to be queried.
  *
- * @param sspoc Array of 4 serialized term pointers representing the s, p, o, c
+ * @param sspo Serialized triple representing the s, p, o
  * terms. Any and all of these may be NULL, which indicates an unbound query
  * term. Stores with context not set will always ignore the fourth term.
  *
+ * @param sc Serialized context to limit search to. It may be NULL, in which
+ *  case search is done in all contexts. Note that triples inserted without
+ *  context are assigned the *default* context, indicated by the "default_ctx"
+ *  member of the store struct.
+ *
  * @param it[out] Pointer to a pointer to an #LSUP_MDBIterator that will be
  * populated with a result iterator. This is always created even if no matches
- * are found and must be freed with #LSUP_store_it_free after use. If matches
+ * are found and must be freed with #LSUP_mdbiter_free after use. If matches
  * are found, the iterator points to the first result which can be retrieved
- * with #LSUP_store_it_next.
+ * with #LSUP_mdbiter_next.
  *
  * @param ct[out] If not NULL, this will be populated with the number of
  *  entries found. It is very inexpensive to set for lookups without context,
@@ -145,9 +213,9 @@ LSUP_rc LSUP_store_add(
  *
  * @return LSUP_OK if entries were found, LSUP_NORESULT if none were found.
  */
-LSUP_rc LSUP_store_lookup(
-        LSUP_MDBStore *store, LSUP_SerTerm *sspoc[],
-        LSUP_MDBIterator **it, size_t *ct);
+LSUP_rc LSUP_mdbstore_lookup(
+        LSUP_MDBStore *store, const LSUP_SerTriple *sspo,
+        const LSUP_Buffer *sc, LSUP_MDBIterator **it, size_t *ct);
 
 
 /** @brief Yield the matching triples and advance the iterator.
@@ -159,11 +227,11 @@ LSUP_rc LSUP_store_lookup(
  * NOTE: Iterators keep LMDB cursors and (read only) transactions open. Don't
  * hold on to them longer than necessary.
  *
- * NOTE: The memory pointed to by the individual LSUP_SerTerm pointers is
+ * NOTE: The memory pointed to by the individual LSUP_Buffer pointers is
  * owned by the database. It must not be written to or freed. To modify
  * the data or use them beyond the caller's scope, this memory must be copied.
  *
- * @param it[in] Opaque iterator handle obtained with #LSUP_store_lookup.
+ * @param it[in] Opaque iterator handle obtained with #LSUP_mdbstore_lookup.
  *
  * @param sspo[out] #LSUP_SerTriple to be populated with three serialized terms
  * if found, NULL if not found. Internal callers may pass NULL if they don't
@@ -172,19 +240,19 @@ LSUP_rc LSUP_store_lookup(
  * @return LSUP_OK if results were found; LSUP_END if no (more) results were
  * found; LSUP_DB_ERR if a MDB_* error occurred.
  */
-LSUP_rc LSUP_store_it_next(LSUP_MDBIterator *it, LSUP_SerTerm **sspo);
+LSUP_rc LSUP_mdbiter_next(LSUP_MDBIterator *it, LSUP_SerTriple *sspo);
 
 
 /** @brief Free an iterator allocated by a lookup.
  *
  * @param it[in] Iterator pointer. It will be set to NULL after freeing.
  */
-void LSUP_store_it_free(struct MDBIterator *it);
+void LSUP_mdbiter_free(struct MDBIterator *it);
 
 
 /** @brief Contexts that a triple key appears in.
  *
- * This function is most conveniently used by a callback to #LSUP_store_lookup
+ * This function is most conveniently used by a callback to #LSUP_mdbstore_lookup
  * because it handles triple keys.
  *
  * @param store[in] The store to be queried.
@@ -196,7 +264,7 @@ void LSUP_store_it_free(struct MDBIterator *it);
  *
  * @param ct[out] Number of contexts found.
  */
-LSUP_rc LSUP_store_triple_contexts(
+LSUP_rc LSUP_mdbstore_triple_contexts(
         LSUP_MDBStore *store, LSUP_Key spok[], LSUP_Key **ck, size_t *ct);
 
 #endif

+ 22 - 45
include/term.h

@@ -23,7 +23,6 @@
 #define NULL_TRP {NULL_KEY, NULL_KEY, NULL_KEY}
 
 
-typedef LSUP_Buffer LSUP_SerTerm;
 typedef XXH64_hash_t LSUP_TermHash64;
 typedef char langtag[LANG_SIZE];
 
@@ -78,49 +77,27 @@ LSUP_uri_new(const char *data)
 /**
  * Generate a random URN with the format: `urn:lsup:<uuid4>`.
  */
-char *
-LSUP_term_gen_random_str();
+inline LSUP_Term *
+LSUP_uri_random()
+{
+    uuid_t uuid;
+    uuid_generate_random(uuid);
+
+    uuid_str_t uuid_str;
+    uuid_unparse_lower(uuid, uuid_str);
+
+    char uri[UUIDSTR_SIZE + 10];
+    sprintf(uri, "urn:uuid4:%s", uuid_str);
+
+    return LSUP_uri_new(uri);
+}
+
+
 
 
 /** Simple ad-hoc serialization function.
  *
- * This function allocates and returns the following byte sequence:
- *
- * - `sizeof(char)` bytes for the term type;
- * - `LANG_SIZE` bytes for the language tag;
- * - Arbitrary bytes with NUL-terminated strings for data and datatype.
- *
- * The index for `data` is consistently `LANG_SIZE + sizeof(char)`. The
- * index for `datatype` is found by the terminating NULL for `data`.
- *
- * Serialized representations of some RDF terms:
- *
- * <http://hello.org>
- *
- * 0      1                size=19
- * | \x01 | http://hello.org\x00 |
- * type   data
- *
- * "hello"
- *
- * 0      1      size=7
- * | \x03 | hello\x00 |
- * type   data
- *
- * "hello"^^xsd:string
- *
- * 0      1           7          size=18
- * | \x03 | hello\x00 | xsd:string\x00 |
- * type   data        datatype
- *
- * (note: the "xsd:" prefix is used for simplification here, it would be
- * normally be a fully qualified URI)
- *
- * "hello"@en-US
- *
- * 0      1           7               18             size=26
- * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 |
- * type   data        datatype         lang
+ * The resulting term must be freed with #LSUP_term_done after use.
  */
 LSUP_rc LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm);
 
@@ -128,11 +105,11 @@ LSUP_rc LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term);
 
 
 inline LSUP_Key
-LSUP_sterm_to_key(const LSUP_SerTerm *sterm)
+LSUP_sterm_to_key(const LSUP_Buffer *sterm)
 {
-    if (sterm == NULL) return NULL_KEY;
+    if (UNLIKELY (sterm == NULL)) return NULL_KEY;
 
-    return (LSUP_Key)XXH64(sterm->addr, sterm->size, SEED);
+    return XXH64(sterm->addr, sterm->size, SEED);
 }
 
 
@@ -142,13 +119,13 @@ LSUP_sterm_to_key(const LSUP_SerTerm *sterm)
 inline LSUP_Key
 LSUP_term_to_key(const LSUP_Term *term)
 {
-    if (term == NULL) return NULL_KEY;
+    if (UNLIKELY (term == NULL)) return NULL_KEY;
 
     LSUP_Buffer sterm_s;
     LSUP_Buffer *sterm = &sterm_s;
 
     LSUP_term_serialize(term, sterm);
-    LSUP_Key key = LSUP_sterm_to_key(sterm);
+    LSUP_Key key = XXH64(sterm->addr, sterm->size, SEED);
 
     LSUP_buffer_done(sterm);
 

+ 61 - 9
include/triple.h

@@ -10,9 +10,9 @@ typedef struct LSUP_Triple {
 } LSUP_Triple;
 
 typedef struct LSUP_SerTriple {
-    LSUP_SerTerm *s;
-    LSUP_SerTerm *p;
-    LSUP_SerTerm *o;
+    LSUP_Buffer *s;
+    LSUP_Buffer *p;
+    LSUP_Buffer *o;
 } LSUP_SerTriple;
 
 typedef enum {
@@ -22,10 +22,60 @@ typedef enum {
 } LSUP_TriplePos;
 
 
+/** @brief Serialize a RDF triple into a buffer triple.
+ *
+ * The internal structure must be freed with #LSUP_striple_done after use.
+ *
+ * @param spo[in] Triple to be serialized.
+ * @param sspo[out] Buffer triple handle. It must point to an already allocated
+ *  structure.
+ *
+ * @return LSUP_OK or an error code resulting from #LSUP_term_serialize.
+ */
+LSUP_rc
+LSUP_triple_serialize(const LSUP_Triple *spo, LSUP_SerTriple *sspo);
+
+
+/** @brief Deserialize a buffer triple into a RDF triple.
+ *
+ * The internal structure must be freed with #LSUP_triple_done after use.
+ *
+ * @param sspo[in] Buffer triple to be serialized.
+ * @param spo[out] RDF triple handle. It must point to an already allocated
+ *  structure.
+ *
+ * @return LSUP_OK or an error code resulting from #LSUP_term_deserialize.
+ */
+LSUP_rc
+LSUP_triple_deserialize(const LSUP_SerTriple *sspo, LSUP_Triple *spo);
+
+
+/** @brief Free the internal pointers of a triple.
+ *
+ * The striple structure itself is not freed, so it can be used with a stack-
+ * allocated structure.
+ *
+ * @param spo[in] Triple to be freed.
+ */
+void
+LSUP_triple_done(LSUP_Triple *spo);
+
+
+/** @brief Free the internal pointers of a buffer triple.
+ *
+ * The triple structure itself is not freed, so it can be used with a stack-
+ * allocated structure.
+ *
+ * @param sspo[in] Buffer triple to be freed.
+ */
+void
+LSUP_striple_done(LSUP_SerTriple *sspo);
+
+
 #define _FN_BODY \
-    if (n == 0) return trp->s; \
-    if (n == 1) return trp->p; \
-    if (n == 2) return trp->o; \
+    if (n == TRP_POS_S) return trp->s; \
+    if (n == TRP_POS_P) return trp->p; \
+    if (n == TRP_POS_O) return trp->o; \
     return NULL;
 
 
@@ -40,7 +90,7 @@ typedef enum {
  * @return Corresponding triple term or NULL if n is out of range.
  */
 inline LSUP_Term *
-LSUP_triple_term_by_pos(const LSUP_Triple *trp, LSUP_TriplePos n)
+LSUP_triple_pos(const LSUP_Triple *trp, LSUP_TriplePos n)
 { _FN_BODY }
 
 
@@ -54,10 +104,12 @@ LSUP_triple_term_by_pos(const LSUP_Triple *trp, LSUP_TriplePos n)
  *
  * @return Corresponding serialized term or NULL if n is out of range.
  */
-inline LSUP_SerTerm *
-LSUP_ser_triple_term_by_pos(const LSUP_SerTriple *trp, LSUP_TriplePos n)
+inline LSUP_Buffer *
+LSUP_striple_pos(const LSUP_SerTriple *trp, LSUP_TriplePos n)
 { _FN_BODY }
 
+#undef _FN_BODY
+
 
 // TODO Add constructors and destructors with term type checks.
 #endif

+ 1 - 1
src/buffer.c

@@ -21,7 +21,7 @@ LSUP_Buffer *LSUP_buffer_new(size_t size)
 int LSUP_buffer_init(LSUP_Buffer *buf, size_t size)
 {
     //TRACE("Buffer Size: %lu\n", size);
-    CRITICAL(buf->addr = malloc(size * sizeof(char)));
+    CRITICAL(realloc(buf->addr, size * sizeof(char)));
     buf->size = size;
 
     return 0;

+ 293 - 368
src/graph.c

@@ -4,6 +4,9 @@
 // Initial size of lookup graph. It will double each time capacity is reached.
 #define LOOKUP_GR_INIT_SIZE 64
 
+// Expand hash table memory by this factor to keep a good load factor.
+#define PREALLOC_FACTOR 1.4
+
 // Assume VERY coarsly that the number of unique terms will be in general
 // 1.7 times the number of triples. This is conservative to maintain load
 // factor low.
@@ -21,99 +24,34 @@ typedef enum KSetFlag {
  * Static handles.
  */
 static const char *default_ctx_label = "urn:lsup:default";
-
-
-/**
- * Identity hashing function.
- *
- * Since the key is already a strong hash, reuse it for bucket allocation.
- */
-static inline uint64_t id_hash_fn(const void *key, ksize_t size, uint64_t seed)
-{ return *(uint64_t*)key; }
-
-
-/**
- * General XX64 hash. Strong (non-crypto) and extremely fast.
- */
-static inline uint64_t xx64_hash_fn(
-        const void *key, ksize_t size, uint64_t seed)
-{ return XXH64(key, size, seed); }
-
-
-static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size)
-{ return memcmp(a, b, size) == 0; }
+static LSUP_Buffer *default_ctx = NULL;
 
 
 typedef struct Graph {
-    LSUP_store_type store_type;     // In-memory or MDB-backed
-    LSUP_Term *uri;                 // Graph "name" (URI)
-    LSUP_HTable *keys;
-    LSUP_HTable *idx;            // Dictionary of keys to serialized terms
+    LSUP_store_type         store_type;     // In-memory or MDB-backed
+    LSUP_Term               *uri;                 // Graph "name" (URI)
+    union {
+        LSUP_HTStore *      ht_store;
+        LSUP_MDBStore *     mdb_store;
+    };
 } Graph;
 
-/**
- * Extern inline functions.
- */
-size_t LSUP_graph_size(const LSUP_Graph *gr);
-
-size_t LSUP_graph_capacity(const LSUP_Graph *gr);
-
-
-/**
- * Callback type for key comparison.
- */
-typedef bool (*LSUP_key_cmp_fn_t)(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2);
-
-
-/**
- * Dummy callback for queries with all parameters unbound. Returns true.
-static bool lookup_none_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return true; }
-*/
 
-/**
- * Keyset lookup for S key.
- */
-static bool lookup_sk_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][0] == k1; }
-
-/**
- * Keyset lookup for P key.
- */
-static bool lookup_pk_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][1] == k1; }
-
-/**
- * Keyset lookup for O key.
- */
-static bool lookup_ok_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][2] == k1; }
-
-/**
- * Keyset lookup for S and P keys.
- */
-static bool lookup_skpk_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][0] == k1 && spok[0][1] == k2; }
+typedef struct GraphIterator {
+    const Graph *           graph;      // Parent graph.
+    union {                             // Internal store iterator.
+        LSUP_HTIterator *   ht_iter;
+        LSUP_MDBIterator *  mdb_iter;
+    };
+    size_t                  ct;         // Total matches.
+    size_t                  i;          // Cursor.
+} GraphIterator;
 
-/**
- * Keyset lookup for S and O keys.
- */
-static bool lookup_skok_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][0] == k1 && spok[0][2] == k2; }
 
 /**
- * Keyset lookup for P and O keys.
+ * Extern inline functions.
  */
-static bool lookup_pkok_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][1] == k1 && spok[0][2] == k2; }
+size_t LSUP_graph_size(const LSUP_Graph *gr);
 
 
 /* * * Post-lookup callback prototypes * * */
@@ -127,9 +65,13 @@ int match_rm_fn(
         LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
         void *ctx);
 
+static LSUP_rc
+graph_iter_next_buffer(GraphIterator *it, LSUP_SerTriple *sspo);
+
 
+/* Atexit functions. */
+void ctx_cleanup() { free(default_ctx); }
 
-/* * * KEYSETS * * */
 
 static inline bool is_null_trp(const LSUP_TripleKey *trp)
 {
@@ -142,403 +84,386 @@ static inline bool is_null_trp(const LSUP_TripleKey *trp)
 
 /* * * GRAPH * * */
 
-int
-LSUP_graph_init(
-        LSUP_Graph *gr, size_t capacity, char *uri_str,
-        LSUP_store_type store_type)
+LSUP_rc
+LSUP_graph_new(const LSUP_store_type store_type, Graph **gr_p)
 {
-    if (uri_str == NULL) {
-        gr->uri = LSUP_term_new(
-                LSUP_TERM_URI, LSUP_term_gen_random_str(), NULL, NULL);
-    } else {
-        gr->uri = LSUP_term_new(LSUP_TERM_URI, uri_str, NULL, NULL);
-    }
-
-    gr->keys = LSUP_htable_new(
-            capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn, 0);
-
-    switch (store_type ) {
-        case LSUP_STORE_MEM:
-            gr->idx = LSUP_htable_new(
-                capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t),
-                xx64_hash_fn, buffer_eq_fn, 0);
-            break;
+    LSUP_Graph *gr;
+    CRITICAL(gr = malloc(sizeof(LSUP_Graph)));
 
-        case LSUP_STORE_MDB:
-            // TODO
+    *gr_p = gr;
 
-        default:
-            return -1;
+    // Initialize default context only once per process.
+    if(UNLIKELY(!default_ctx)) {
+        LSUP_Term *default_ctx_uri = LSUP_uri_new(default_ctx_label);
+        LSUP_term_serialize(default_ctx_uri, default_ctx);
+        LSUP_term_free(default_ctx_uri);
+        atexit(ctx_cleanup);
     }
 
-    return LSUP_OK;
-}
+    if (store_type == LSUP_STORE_MEM) {
+        LSUP_htstore_new(0, &gr->ht_store);
 
+    } else if (store_type == LSUP_STORE_MDB) {
+        LSUP_mdbstore_new(
+                getenv("LSUP_MDB_STORE_PATH"), default_ctx, &gr->mdb_store);
 
-LSUP_Graph *
-LSUP_graph_new(size_t capacity, char *uri_str, LSUP_store_type store_type)
-{
-    LSUP_Graph *gr;
-    CRITICAL(gr = malloc(sizeof(LSUP_Graph)));
+    } else return LSUP_VALUE_ERR;
 
-    LSUP_graph_init(gr, capacity, uri_str, store_type);
-
-    return gr;
+    return LSUP_OK;
 }
 
 
 /**
  * Copy triples from a source graph into a destination one.
  *
- * The destination graph is not initialized, so the copy is cumulative.
+ * The destination graph is not initialized here, so the copy is cumulative.
  */
-static int graph_copy_contents(LSUP_Graph *src, LSUP_Graph *dest)
+static LSUP_rc
+graph_copy_contents(const LSUP_Graph *src, LSUP_Graph *dest)
 {
-    LSUP_Triple trp;
-    trp.s = NULL;
-    trp.p = NULL;
-    trp.o = NULL;
+    LSUP_rc rc = LSUP_NOACTION;
+    const LSUP_Triple trp = {NULL, NULL, NULL};
+    GraphIterator *it;
 
-    return LSUP_graph_match_callback(
-            src, dest, &trp, &match_add_fn, true, NULL);
-}
+    LSUP_graph_lookup(src, &trp, &it);
 
+    LSUP_SerTriple sspo;
 
-int
-LSUP_graph_copy(LSUP_Graph *dest, LSUP_Graph *src)
-{
-    LSUP_graph_init(dest, LSUP_graph_size(src), NULL, src->store_type);
+    while (graph_iter_next_buffer(it, &sspo) != LSUP_END) {
+        TRACE("Inserting triple #%lu\n", it->i);
+        LSUP_rc add_rc = LSUP_graph_add(dest, NULL, 0, &sspo, 1);
+        if (LIKELY (add_rc == LSUP_OK)) rc = LSUP_OK;
+        else if (add_rc < 0) return add_rc;
+    }
 
-    return graph_copy_contents(src, dest);
+    return rc;
 }
 
 
-int
-LSUP_graph_resize(LSUP_Graph *gr, size_t size)
+LSUP_rc
+LSUP_graph_copy(const Graph *src, Graph **dest_p)
 {
-    LSUP_htable_resize(gr->keys, size);
-    LSUP_htable_resize(gr->idx, size * IDX_SIZE_RATIO);
-
-    return LSUP_OK;
-}
+    LSUP_rc rc;
+    LSUP_Graph *dest;
 
+    rc = LSUP_graph_new(src->store_type, &dest);
+    if (UNLIKELY (rc != LSUP_OK)) return rc;
 
-size_t
-LSUP_graph_capacity(const LSUP_Graph *gr)
-{ return LSUP_htable_capacity(gr->keys); }
-
-
-LSUP_Term *
-LSUP_graph_uri(const LSUP_Graph *gr) { return gr->uri; }
+    rc = graph_copy_contents(src, dest);
 
+    if (LIKELY (rc == LSUP_OK)) *dest_p = dest;
 
-size_t
-LSUP_graph_size(const LSUP_Graph *gr) { return LSUP_htable_size(gr->keys); }
+    return rc;
+}
 
 
-int
-LSUP_graph_add_triple(LSUP_Graph *gr, const LSUP_Triple *spo)
+LSUP_rc
+LSUP_graph_bool_op(
+        const LSUP_bool_op op, const Graph *gr1, const Graph *gr2,
+        Graph **res_p)
 {
-    LSUP_SerTerm sspo[3];
-
-    LSUP_term_serialize(spo->s, sspo);
-    LSUP_term_serialize(spo->p, sspo + 1);
-    LSUP_term_serialize(spo->o, sspo + 2);
-
-    LSUP_TripleKey spok = NULL_TRP;
-
-    // Add term to index.
-    for (int i = 0; i < 3; i++) {
-        spok[i] = LSUP_sterm_to_key(sspo + i);
-        TRACE("Indexing term key %lu\n", spok[i]);
-
-        // If term is already in the index, discard and free it.
-        if (LSUP_htable_get(gr->idx, spok + i, NULL) == LSUP_OK) {
-            //LSUP_SerTerm *sterm = sspo + i;
-            //CRITICAL(sterm = malloc(sizeof(LSUP_Buffer)));
-            LSUP_htable_put(gr->idx, spok + i, sspo + i);
-        } else {
-            TRACE("%s", "Term is already indexed.");
-            LSUP_buffer_done(sspo + i);
-        }
+    if (UNLIKELY (gr1->store_type != LSUP_STORE_MEM)) {
+        fprintf(
+                stderr,
+                "First operand %s is not an in-memory graph. "
+                "Cannot perform boolean operation.",
+                gr1->uri->data);
+        return LSUP_VALUE_ERR;
+    }
+    if (UNLIKELY (gr2->store_type != LSUP_STORE_MEM)) {
+        fprintf(
+                stderr,
+                "Second operand %s is not an in-memory graph. "
+                "Cannot perform boolean operation.",
+                gr2->uri->data);
+        return LSUP_VALUE_ERR;
     }
 
-    // Add triple.
-    TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
+    LSUP_Graph *res;
+    LSUP_graph_new(LSUP_STORE_MEM, &res);
 
-    return LSUP_htable_put(gr->keys, spok, NULL);
+    return LSUP_htstore_bool_op(
+            op, gr1->ht_store, gr2->ht_store, &res->ht_store);
 }
 
 
-int
-LSUP_graph_add(LSUP_Graph *gr, const LSUP_Triple data[], size_t data_size)
+void
+LSUP_graph_free(LSUP_Graph *gr)
 {
-    // TODO Decouple this and build interface for memory and MDB integration.
+    if (LIKELY(gr != NULL)) {
+        LSUP_term_free(gr->uri);
 
-    // Resize all at once if needed.
-    if (LSUP_graph_capacity(gr) < LSUP_graph_size(gr) + data_size)
-        LSUP_graph_resize(gr, LSUP_graph_size(gr) + data_size);
+        if (gr->store_type == LSUP_STORE_MEM)
+            LSUP_htstore_free(gr->ht_store);
+        else
+            LSUP_mdbstore_free(gr->mdb_store);
 
-    int rc = LSUP_NOACTION;
-    for (size_t i = 0; i < data_size; i++) {
-        TRACE("Inserting triple #%lu\n", i);
-        if (LIKELY(LSUP_graph_add_triple(gr, data + i) == LSUP_OK))
-            rc = LSUP_OK;
+        free(gr);
     }
+}
 
-    return rc;
+
+LSUP_rc
+LSUP_graph_resize(LSUP_Graph *gr, size_t size)
+{
+    if (gr->store_type == LSUP_STORE_MEM)
+        return LSUP_htstore_resize(gr->ht_store, size);
+
+    return LSUP_VALUE_ERR;
 }
 
 
-bool
-LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo)
+LSUP_Term *
+LSUP_graph_uri(const LSUP_Graph *gr) { return gr->uri; }
+
+
+LSUP_rc
+LSUP_graph_set_uri(LSUP_Graph *gr, const char *uri)
 {
-    LSUP_TripleKey spok = {
-        LSUP_term_to_key(spo->s),
-        LSUP_term_to_key(spo->p),
-        LSUP_term_to_key(spo->o),
-    };
+    gr->uri = uri ? LSUP_uri_new(uri) : LSUP_uri_random();
 
-    return LSUP_htable_get(gr->keys, spok, NULL) == LSUP_OK;
+    return LSUP_OK;
 }
 
 
-int
-LSUP_graph_match_callback(
-        LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo,
-        keyset_match_fn_t callback_fn, bool match_cond, void *ctx)
+size_t
+LSUP_graph_capacity(const Graph *gr)
 {
-    if (LSUP_htable_size(gr->keys) == 0)
-        return LSUP_NOACTION;
-
-    htsize_t cur = 0;
-    LSUP_Key k1, k2;
-    LSUP_key_cmp_fn_t cmp_fn;
-    LSUP_TripleKey i_spok;
-
-    LSUP_TripleKey spok = {
-        LSUP_term_to_key(spo->s),
-        LSUP_term_to_key(spo->p),
-        LSUP_term_to_key(spo->o),
-    };
+    if(gr->store_type == LSUP_STORE_MEM)
+        return LSUP_htstore_capacity(gr->ht_store);
 
-    if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
-        if (match_cond == true) {
-            // Shortcut for 3-term match—only if match_cond is true.
-            LSUP_graph_init(res, 1, NULL, LSUP_STORE_MEM);
-            int rc = LSUP_htable_get(gr->keys, spok, NULL);
-            if(rc == LSUP_OK) {
-                callback_fn(gr, res, &spok, ctx);
-                return LSUP_OK;
-            } else {
-                return LSUP_NOACTION;
-            }
-        } else {
-            // For negative condition (i.e. "apply this function to all triples
-            // except the matching one")
-            int rc = LSUP_NOACTION;
-            while (LSUP_htable_iter(
-                        gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
-                if (LIKELY(
-                    i_spok[2] != spok[2] ||
-                    i_spok[0] != spok[0] ||
-                    i_spok[1] != spok[1]
-                )) {
-                    rc = callback_fn(gr, res, &i_spok, ctx);
-                }
-            }
+    return LSUP_NOT_IMPL_ERR;
+}
 
-            return rc;
-        }
 
-    } else if (spok[0] != NULL_KEY) {
-        k1 = spok[0];
+size_t
+LSUP_graph_size(const Graph *gr)
+{
+    if(gr->store_type == LSUP_STORE_MEM)
+        return LSUP_htstore_size(gr->ht_store);
+
+    return LSUP_mdbstore_size(gr->mdb_store);
+}
 
-        if (spok[1] != NULL_KEY) { // s p ?
-            k2 = spok[1];
-            cmp_fn = lookup_skpk_cmp_fn;
 
-        } else if (spok[2] != NULL_KEY) { // s ? o
-            k2 = spok[2];
-            cmp_fn = lookup_skok_cmp_fn;
+LSUP_rc
+LSUP_graph_add(
+        LSUP_Graph *gr,
+        const LSUP_Triple trp[], size_t trp_ct,
+        const LSUP_SerTriple strp[], size_t strp_ct)
+{
+    LSUP_rc rc;
 
-        } else { // s ? ?
-            cmp_fn = lookup_sk_cmp_fn;
+    /*
+     * NOTE It is possible to pass both sets of RDF triples and buffer triples.
+     */
 
+    if (gr->store_type == LSUP_STORE_MEM) {
+        // Resize all at once if needed.
+        htsize_t prealloc = LSUP_htstore_size(gr->ht_store) + trp_ct + strp_ct;
+        if (LSUP_htstore_capacity(gr->ht_store) < prealloc) {
+            rc = LSUP_htstore_resize(gr->ht_store, prealloc * PREALLOC_FACTOR);
+
+            if (UNLIKELY(rc != LSUP_OK)) return rc;
         }
 
-    } else if (spok[1] != NULL_KEY) {
-        k1 = spok[1];
+        rc = LSUP_NOACTION;
+
+        // Serialize and insert RDF triples.
+        if (trp_ct > 0) {
+            LSUP_SerTriple sspo;
 
-        if (spok[2] != NULL_KEY) { // ? p o
-            k2 = spok[2];
-            cmp_fn = lookup_pkok_cmp_fn;
+            for (size_t i = 0; i < trp_ct; i++) {
 
-        } else { // ? p ?
-            cmp_fn = lookup_pk_cmp_fn;
+                LSUP_term_serialize(trp[i].s, sspo.s);
+                LSUP_term_serialize(trp[i].p, sspo.p);
+                LSUP_term_serialize(trp[i].o, sspo.o);
+
+                TRACE("Inserting triple #%lu\n", i);
+                if (LIKELY(LSUP_htstore_add(gr->ht_store, &sspo) == LSUP_OK))
+                    rc = LSUP_OK;
+            }
+
+            LSUP_buffer_done(sspo.s);
+            LSUP_buffer_done(sspo.p);
+            LSUP_buffer_done(sspo.o);
         }
 
-    } else if (spok[2] != NULL_KEY) { // ? ? o
-        k1 = spok[2];
-        cmp_fn = lookup_ok_cmp_fn;
+        // Insert serialized triples.
+        for (size_t i = 0; i < strp_ct; i++) {
+            TRACE("Inserting triple #%lu\n", i);
+            LSUP_rc db_rc = LSUP_htstore_add(gr->ht_store, strp + i);
 
-    } else {
-        printf("WARNING: no bound terms, making a compact copy.\n");
-        return LSUP_graph_copy(res, gr);
-    }
+            if (UNLIKELY (db_rc < 0)) return db_rc;
+            if (LIKELY (db_rc == LSUP_OK)) rc = LSUP_OK;
+        }
 
-    while (LSUP_htable_iter(gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
-        if (cmp_fn(&i_spok, k1, k2) == match_cond)
-            callback_fn(gr, res, &i_spok, ctx);
+        return rc;
     }
 
-    return LSUP_OK;
-}
+    if (gr->store_type == LSUP_STORE_MDB) {
+        rc = LSUP_NOACTION;
 
+        LSUP_Buffer sc;
+        LSUP_term_serialize(gr->uri, &sc);
 
-int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo)
-{
-    LSUP_graph_init(res, LOOKUP_GR_INIT_SIZE, NULL, LSUP_STORE_MEM);
+        LSUP_MDBIterator *it;
+        LSUP_mdbstore_add_init(gr->mdb_store, &sc, &it);
 
-    return LSUP_graph_match_callback(gr, res, spo, &match_add_fn, true, NULL);
-}
+        // Serialize and insert RDF triples.
+        if (trp_ct > 0) {
+            LSUP_SerTriple sspo;
 
+            for (size_t i = 0; i < trp_ct; i++) {
 
-int LSUP_graph_join(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
-{
-    LSUP_graph_copy(res, gr1);
-
-    return graph_copy_contents(gr2, res);
-}
+                LSUP_term_serialize(trp[i].s, sspo.s);
+                LSUP_term_serialize(trp[i].p, sspo.p);
+                LSUP_term_serialize(trp[i].o, sspo.o);
 
+                TRACE("Inserting triple #%lu\n", i);
+                LSUP_rc db_rc = LSUP_mdbstore_add_iter(it, &sspo);
 
-int LSUP_graph_subtract(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
-{
-    if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
+                if (UNLIKELY(db_rc < 0)) return db_rc;
+                if (LIKELY(db_rc == LSUP_OK)) rc = LSUP_OK;
+            }
 
-    LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM);
+            LSUP_buffer_done(sspo.s);
+            LSUP_buffer_done(sspo.p);
+            LSUP_buffer_done(sspo.o);
+        }
 
-    if (LSUP_htable_size(gr1->keys) == 0) return LSUP_OK;
+        // Insert serialized triples.
+        for (size_t i = 0; i < strp_ct; i++) {
+            TRACE("Inserting triple #%lu\n", i);
+            LSUP_rc db_rc = LSUP_mdbstore_add_iter(it, strp + i);
 
-    htsize_t cur = 0;
-    LSUP_TripleKey spok;
+            if (UNLIKELY (db_rc < 0)) return db_rc;
+            if (LIKELY (db_rc == LSUP_OK)) rc = LSUP_OK;
+        }
 
-    while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
-        if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT)
-            match_add_fn(res, gr1, &spok, NULL);
+        LSUP_mdbstore_add_done(it, NULL);
     }
 
-    return LSUP_OK;
+    return LSUP_VALUE_ERR;
 }
 
 
-int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
+LSUP_rc
+LSUP_graph_remove(Graph *gr, const LSUP_Triple *spo, size_t *ct)
 {
-    LSUP_graph_init(res, LSUP_graph_capacity(gr1), NULL, LSUP_STORE_MEM);
+    LSUP_rc rc;
+    LSUP_SerTriple sspo_s;
+    LSUP_SerTriple *sspo = &sspo_s;
+    LSUP_Buffer *sc;
 
-    if (LSUP_htable_size(gr1->keys) == 0 || LSUP_htable_size(gr2->keys) == 0)
-        return LSUP_OK;
+    LSUP_term_serialize(spo->s, sspo->s);
+    LSUP_term_serialize(spo->p, sspo->s);
+    LSUP_term_serialize(spo->o, sspo->s);
+    LSUP_term_serialize(gr->uri, sc);
 
-    htsize_t cur = 0;
-    LSUP_TripleKey spok;
+    if (gr->store_type == LSUP_STORE_MEM)
+        rc = LSUP_htstore_remove(gr->ht_store, sspo, ct);
+    else
+        rc = LSUP_mdbstore_remove(gr->mdb_store, sspo, sc, ct);
 
-    while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
-        if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_OK)
-            match_add_fn(res, gr1, &spok, NULL);
-    }
+    LSUP_striple_done(sspo);
+    LSUP_buffer_done(sc);
 
-    return LSUP_OK;
+    return rc;
 }
 
 
-int LSUP_graph_xor(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
+LSUP_rc
+LSUP_graph_lookup(
+        const Graph *gr, const LSUP_Triple *spo, GraphIterator **it_p)
 {
-    if (LSUP_htable_size(gr1->keys) == 0) return LSUP_graph_copy(gr2, res);
-    if (LSUP_htable_size(gr2->keys) == 0) return LSUP_graph_copy(gr1, res);
+    LSUP_rc rc;
+    GraphIterator *it;
+    CRITICAL(it = malloc(sizeof(GraphIterator)));
+    *it_p = it;
 
-    LSUP_graph_init(
-            res, min(LSUP_graph_capacity(gr1), LSUP_graph_capacity(gr2)),
-            NULL, LSUP_STORE_MEM);
+    it->graph = gr;
 
-    htsize_t cur = 0;
-    LSUP_TripleKey spok;
+    LSUP_SerTriple sspo_s;
+    LSUP_SerTriple *sspo = &sspo_s;
+    LSUP_Buffer *sc;
 
-    while(LSUP_htable_iter(gr1->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
-        if (LSUP_htable_get(gr2->keys, (void**)&spok, NULL) == LSUP_NORESULT)
-            match_add_fn(res, gr1, &spok, NULL);
-    }
+    LSUP_term_serialize(spo->s, sspo->s);
+    LSUP_term_serialize(spo->p, sspo->s);
+    LSUP_term_serialize(spo->o, sspo->s);
+    LSUP_term_serialize(gr->uri, sc);
 
-    cur = 0;
+    if (gr->store_type == LSUP_STORE_MEM) {
+        rc = LSUP_htstore_lookup(gr->ht_store, sspo, &it->ht_iter, &it->ct);
 
-    while(LSUP_htable_iter(gr2->keys, &cur, (void**)&spok, NULL) == LSUP_OK) {
-        if (LSUP_htable_get(gr1->keys, (void**)&spok, NULL) == LSUP_NORESULT)
-            match_add_fn(res, gr2, &spok, NULL);
+    } else {
+        rc = LSUP_mdbstore_lookup(
+                gr->mdb_store, sspo, sc, &it->mdb_iter, &it->ct);
     }
 
-    return LSUP_OK;
+    LSUP_striple_done(sspo);
+    LSUP_buffer_done(sc);
+
+    return rc;
 }
 
 
-void
-LSUP_graph_free(LSUP_Graph *gr)
+/** @brief Advance iterator and return serialized triple.
+ *
+ * This is an internal function to pass raw buffers between higher-level
+ * functions without serializing and deserializing triples.
+ */
+static LSUP_rc
+graph_iter_next_buffer(GraphIterator *it, LSUP_SerTriple *sspo)
 {
-    if (LIKELY(gr != NULL)) {
-        LSUP_term_free(gr->uri);
+    LSUP_rc rc;
 
-        // Free up triples.
-        LSUP_htable_free(gr->keys);
-
-        // Free up index entries and index.
-        htsize_t cur = 0;
-        LSUP_TripleKey spok;
-        LSUP_Buffer *sterm;
-        while(LSUP_htable_iter(
-                    gr->idx, &cur, (void**)&spok, (void**)&sterm) == LSUP_OK) {
-            TRACE("Freeing indexed term buffer #%d at %p", cur, sterm);
-            LSUP_buffer_done(sterm);
-        }
+    if (it->graph->store_type == LSUP_STORE_MEM)
+        rc = LSUP_htiter_next(it->ht_iter, sspo);
+    else rc = LSUP_mdbiter_next(it->mdb_iter, sspo);
 
-        LSUP_htable_free(gr->idx);
+}
 
-        free(gr);
+
+LSUP_rc
+LSUP_graph_iter_next(GraphIterator *it, LSUP_Triple *spo)
+{
+    LSUP_SerTriple sspo;
+    LSUP_rc rc = graph_iter_next_buffer(it, &sspo);
+
+    if (rc == LSUP_OK) {
+        LSUP_term_deserialize(sspo.s, spo->s);
+        LSUP_term_deserialize(sspo.p, spo->p);
+        LSUP_term_deserialize(sspo.o, spo->o);
+
+        it->i++;
     }
-}
 
+    return rc;
+}
 
-/* * CALLBACKS * */
 
-/**
- * Callback for adding a matched triple.
- *
- * Adds the current triple in src to dest. No duplicate check.
- *
- * The source graph cursor must be set to the triple to be copied.
- */
-int match_add_fn(
-        LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
-        void *ctx)
+void
+LSUP_graph_iter_free(GraphIterator *it)
 {
-    // Add term to index.
-    for (int i = 0; i < 3; i++) {
-        // Index terms if not yet presents in destination.
-        void *src_val, *dest_val;
-
-        if(LSUP_htable_get(src->idx, *spok + i, &src_val) == LSUP_OK) {
-            CRITICAL(dest_val = malloc(sizeof(LSUP_Buffer)));
-            LSUP_buffer_copy(dest_val, src_val);
-            LSUP_htable_put(dest->idx, *spok + i, dest_val);
-        }
-    }
+    if (it->graph->store_type == LSUP_STORE_MEM)
+        LSUP_htiter_free(it->ht_iter);
+    else
+        LSUP_mdbiter_free(it->mdb_iter);
 
-    // Add triple.
-    return LSUP_htable_put(dest->keys, spok, NULL);
+    free(it);
 }
 
 
-/**
- * Callback for removing a matched triple.
- */
-int match_rm_fn(
-        LSUP_Graph *src, LSUP_Graph *dest, const LSUP_TripleKey *spok,
-        void *ctx)
-{ return LSUP_htable_del(dest->keys, spok); }
+bool
+LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo)
+{
+    GraphIterator *it;
+
+    LSUP_graph_lookup(gr, spo, &it);
+    bool rc = LSUP_graph_iter_next(it, NULL) != LSUP_NORESULT;
+
+    LSUP_graph_iter_free(it);
+
+    return rc;
+}

+ 109 - 138
src/htable.c

@@ -22,64 +22,23 @@
 #define    APPROX_40_PERCENT(x)     (((x) * 409) >> 10)
 
 #define MIN_HT_SIZE         1 << 3
-#define MAX_KEY_SIZE        64
-
-/** @brief Bucket types.
- * Table columns are: bucket type tag; key type; value type.
- * NOTE This macro can be redefined BEFORE including this library in order to
- * redefine the bucket types for general purpose use.
- */
-#ifndef LSUP_HTABLE_BUCKET_TYPES
-#define LSUP_HTABLE_BUCKET_TYPES                                    \
-    ENTRY(  ks,         LSUP_Key,            void*              )   \
-    ENTRY(  kt,         LSUP_Key,            LSUP_TripleKey     )   \
-    ENTRY(  tk,         LSUP_TripleKey,      LSUP_Key           )   \
-
-#endif
-
-//#ifdef LSUP_BIG_HT // TODO
-typedef uint64_t ht_hash_t;
-//else
-//typedef uint32_t ht_hash_t;
-//#endif
 
 /** @brief Bucket structure.
  *
- * Note that the address of "key" is reliably findable across multiple bucket
- * types in a union, since hash and psl don't change. The address of the value,
- * however, cannot be determined without knowing the bucket type or key size.
+ * This struct does only contain bucket metadata which are of predictable size.
  */
-#define ENTRY(tag, k, v) \
-typedef struct {                \
-    ht_hash_t       hash;       \
-    uint16_t        psl;        \
-    k               key;        \
-    v               val;        \
-} bucket_##tag##_t;
-LSUP_HTABLE_BUCKET_TYPES
-#undef ENTRY
-
-typedef enum {
-#define ENTRY(tag, k, v) BT_##tag,
-LSUP_HTABLE_BUCKET_TYPES
-#undef ENTRY
-} BucketTypeTag;
-
-typedef union {
-#define ENTRY(tag, k, v) \
-    bucket_##tag##_t *  tag;
-LSUP_HTABLE_BUCKET_TYPES
-#undef ENTRY
+typedef struct {
+    ht_hash_t       hash;
+    uint16_t        psl;
 } bucket_t;
 
 typedef struct htable_t {
     htsize_t        size;
     htsize_t        nitems;
-    unsigned        flags;
     uint64_t        divinfo;
 
     bucket_t *      buckets;
-    BucketTypeTag   bucket_type;
+    void *          data;
     uint64_t        seed;
 
     key_hash_fn_t   key_hash_fn;
@@ -90,29 +49,24 @@ typedef struct htable_t {
 } HTable;
 
 // Fill and compare empty buckets.
-static const unsigned char del_marker[MAX_KEY_SIZE] = {0};
+static const unsigned char del_marker[sizeof(ksize_t)] = {0};
 
-/*
- * Byte offset for key address in a bucket.
+/** @brief Access bucket key address by bucket index.
  *
- * This is applicable to any type of bucket if handled as a null pointer.
+ * data is the address of the raw data array, i the bucket index.
  */
-static const unsigned int k_offset = sizeof(ht_hash_t) + sizeof(uint16_t);
+#define HT_KEY(data, i) data + ((ht->ksize + ht->vsize) * i)
 
-/** @brief Specific bucket access in union.
+/** @brief Access bucket value address by bucket index.
  *
- * Use: ht->HT_BUCKET_NAME(ht)
+ * data is the address of the raw data array, i the bucket index.
  */
-#define ENTRY(tag, k, v) if (ht->bucket_type == BT_##tag) return b->tag;
-static inline void *bkey(LSUP_HTable *ht, bucket_t *b) { LSUP_HTABLE_BUCKET_TYPES; }
-#undef ENTRY
-
-#define HT_BUCKET(tag, b, ht) b.ht->##tag
+#define HT_VAL(data, i) data + ((ht->ksize + ht->vsize) * i) + ht->ksize
 
 /* * * GENERIC UTILITIES * * */
 
-static inline bool is_empty_bucket(const HTable *ht, const void *bucket)
-{ return memcmp(bucket + k_offset, del_marker, ht->ksize) == 0; }
+static inline bool is_empty_bucket(const HTable *ht, htsize_t i)
+{ return memcmp(HT_KEY(ht, i), del_marker, ht->ksize) == 0; }
 
 /*
  * Find first bit.
@@ -168,69 +122,97 @@ fast_rem32(uint32_t v, uint32_t div, uint64_t divinfo)
 { return v - div * fast_div32(v, div, divinfo); }
 
 
-/*
 static int __attribute__((__unused__))
 //static int
 validate_psl_p(const HTable *ht, unsigned i)
 {
-    unsigned base_i = fast_rem32(bucket->hash, ht->size, ht->divinfo);
+    unsigned base_i = fast_rem32(ht->buckets[i].hash, ht->size, ht->divinfo);
     unsigned diff = (base_i > i) ? ht->size - base_i + i : i - base_i;
-    return is_empty_bucket(ht, ht->buckets + i) || diff == bucket->psl;
+    return is_empty_bucket(ht, i) || diff == ht->buckets[i].psl;
 }
-*/
 
 /* * * PUBLIC API * * */
 
-HTable *LSUP_htable_new(
+LSUP_rc LSUP_htable_new(
         htsize_t size, ksize_t ksize, vsize_t vsize,
-        key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, unsigned flags)
+        key_hash_fn_t key_hash_fn, key_eq_fn_t key_eq_fn, LSUP_HTable **ht_p)
 {
-    HTable *ht;
-    CRITICAL(ht = calloc(1, sizeof(HTable)));
+    HTable *ht = calloc(1, sizeof(HTable));
+    if (!ht) return ENOMEM;
+    *ht_p = ht;
 
     ht->ksize = ksize;
     ht->vsize = vsize;
     ht->key_hash_fn = key_hash_fn;
     ht->key_eq_fn = key_eq_fn;
-    ht->flags = flags;
     ht->size = 0;
 
-    LSUP_htable_resize(ht, size);
+    return LSUP_htable_resize(ht, size);
+}
+
+
+LSUP_rc
+LSUP_htable_copy(const HTable *src, HTable **dest_p)
+{
+    HTable *dest;
+    CRITICAL(dest = calloc(1, sizeof(HTable)));
+
+    dest->size = src->size;
+    dest->nitems = src->nitems;
+    dest->divinfo = src->divinfo;
+
+    CRITICAL(dest->buckets = malloc(sizeof(src->buckets)));
+    memcpy(dest->buckets, src->buckets, sizeof(src->buckets));
+
+    CRITICAL(dest->data = malloc(sizeof(src->data)));
+    memcpy(dest->data, src->data, sizeof(src->data));
+
+    dest->seed = src->seed ^ random() | (random() << sizeof(ht_hash_t));
+
+    dest->key_hash_fn = src->key_hash_fn;
+    dest->key_eq_fn = src->key_eq_fn;
 
-    return ht;
+    dest->ksize = src->ksize;
+    dest->vsize = src->vsize;
+
+    *dest_p = dest;
+
+    return LSUP_OK;
 }
 
 
 /**
  * Resize a table.
  */
-int LSUP_htable_resize(HTable *ht, htsize_t newsize)
+LSUP_rc LSUP_htable_resize(HTable *ht, htsize_t newsize)
 {
-    TRACE("Resizing htable to %lu.", (size_t)newsize);
-
-    bucket_t *oldbuckets = ht->buckets;
+    void *old_data = ht->data;
     const htsize_t oldsize = ht->size;
 
     // Clip size to min & max limits.
     if (newsize < MIN_HT_SIZE) newsize = MIN_HT_SIZE;
     if (newsize > HTSIZE_MAX) newsize = HTSIZE_MAX;
 
+    TRACE("Resizing htable to %lu.", (size_t)newsize);
+
     CRITICAL(ht->buckets = calloc(newsize, sizeof(bucket_t)));
+    CRITICAL(ht->data = calloc(
+                (ht->ksize + ht->vsize) * newsize, sizeof(bucket_t)));
 
     ht->size = newsize;
     ht->nitems = 0;
 
     ht->divinfo = fast_div32_init(newsize);
-    ht->seed ^= random() | (random() << 32);
+    ht->seed ^= random() | (random() << sizeof(ht_hash_t));
 
     for (unsigned i = 0; i < oldsize; i++) {
-        const bucket_t *bucket = &oldbuckets[i];
-
         /* Skip the empty buckets. */
-        if (!is_empty_bucket(ht, bucket))
-            LSUP_htable_insert(ht, bucket->key, bucket->val);
+        if (!is_empty_bucket(old_data, i))
+            LSUP_htable_insert(
+                    ht, HT_KEY(old_data, i),
+                    HT_VAL(old_data, i));
     }
-    if (oldbuckets != NULL) free(oldbuckets);
+    free(old_data);
 
     return LSUP_OK;
 }
@@ -247,20 +229,18 @@ htsize_t LSUP_htable_size(LSUP_HTable *ht)
 /*
  * Insert without resizing (assuming resizing is already done).
  */
-int LSUP_htable_insert(HTable *ht, const void *key, void *val)
+LSUP_rc LSUP_htable_insert(HTable *ht, const void *key, const void *val)
 {
-    bucket_t *bucket, entry;
+    bucket_t entry_s;
+    bucket_t *bucket, *entry = &entry_s;
 
     ASSERT(key != NULL);
 
     /*
      * Setup the bucket entry.
      */
-    memcpy(entry.key, key, ht->ksize);
-    //memcpy(entry.val, val, ht->vsize);
-    entry.val = val;
-    entry.hash = ht->key_hash_fn(entry.key, ht->ksize, ht->seed);
-    entry.psl = 0;
+    entry->hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
+    entry->psl = 0;
 
     /*
      * From the paper: "when inserting, if a record probes a location
@@ -272,18 +252,18 @@ int LSUP_htable_insert(HTable *ht, const void *key, void *val)
      * being inserted is greater than PSL of the element in the bucket,
      * then swap them and continue.
      */
-    htsize_t i = fast_rem32(entry.hash, ht->size, ht->divinfo);
+    htsize_t i = fast_rem32(entry->hash, ht->size, ht->divinfo);
 
+    // Locate the index to insert the KV into.
     for(;;) {
         bucket = ht->buckets + i;
 
-        if(is_empty_bucket(ht, ht->buckets + i)) break;
+        if(is_empty_bucket(ht, i)) break;
 
-        //ASSERT(validate_psl_p(ht, i));
+        ASSERT(validate_psl_p(ht, i));
 
         // There is a key in the bucket.
-        TRACE("Entry key: {%lu, %lu, %lu}; bucket key: {%lu, %lu, %lu}", entry.key[0], entry.key[1], entry.key[2], bucket->key[0], bucket->key[1], bucket->key[2]);
-        if (ht->key_eq_fn(bucket->key, entry.key, ht->ksize)) {
+        if (ht->key_eq_fn(HT_KEY(ht, i), key, ht->ksize)) {
             // Duplicate key: do nothing.
             TRACE(STR, "Duplicate key.");
             return LSUP_NOACTION;
@@ -292,36 +272,35 @@ int LSUP_htable_insert(HTable *ht, const void *key, void *val)
         /*
          * We found a "rich" bucket.  Capture its location.
          */
-        if (entry.psl > bucket->psl) {
-            //TRACE("Entry PSL: %d; Bucket PSL: %d", entry.psl, bucket->psl);
-            bucket_t tmp;
-
+        if (entry->psl > bucket->psl) {
+            //TRACE("Entry PSL: %d; Bucket PSL: %d", entry->psl, bucket->psl);
             TRACE(STR, "SWAP");
             /*
              * Place our key-value pair by swapping the "rich"
              * bucket with our entry.  Copy the structures.
              */
-            tmp = entry;
-            entry = *bucket;
-            *bucket = tmp;
+            bucket_t *tmp = entry;
+            entry = bucket;
+            bucket = tmp;
         }
 
-        entry.psl++;
+        entry->psl++;
 
         /* Continue to the next bucket. */
-        //ASSERT(validate_psl_p(ht, bucket, i));
+        ASSERT(validate_psl_p(ht, i));
         i = fast_rem32(i + 1, ht->size, ht->divinfo);
     }
 
     /*
      * Found a free bucket: insert the entry.
      */
-    TRACE("Inserting {%lu, %lu, %lu} in bucket #%d", entry.key[0], entry.key[1], entry.key[2], i);
-    //*bucket = entry; // copy
-    memcpy(bucket, &entry, sizeof(bucket_t)); // copy
+    TRACE("Inserting into bucket #%d", i);
+    memcpy(bucket, entry, sizeof(bucket_t)); // copy
+    memcpy(HT_KEY(ht->data, i), key, ht->ksize);
+    memcpy(HT_VAL(ht->data, i), val, ht->vsize);
     ht->nitems++;
 
-    //ASSERT(validate_psl_p(ht, bucket, i));
+    ASSERT(validate_psl_p(ht, i));
 
     return LSUP_OK;
 }
@@ -333,7 +312,7 @@ int LSUP_htable_insert(HTable *ht, const void *key, void *val)
  * => If the key is already present, return its associated value.
  * => Otherwise, on successful insert, return the given value.
  */
-int LSUP_htable_put(HTable *ht, const void *key, void *val)
+LSUP_rc LSUP_htable_put(HTable *ht, const void *key, const void *val)
 {
     const size_t threshold = APPROX_85_PERCENT(ht->size);
 
@@ -354,7 +333,7 @@ int LSUP_htable_put(HTable *ht, const void *key, void *val)
 }
 
 
-int LSUP_htable_get(const HTable *ht, const void *key, void **valp)
+int LSUP_htable_get(const HTable *ht, const void *key, void **val_p)
 {
     const uint64_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
     htsize_t n = 0, i = fast_rem32(hash, ht->size, ht->divinfo);
@@ -365,13 +344,11 @@ int LSUP_htable_get(const HTable *ht, const void *key, void **valp)
      * Lookup is a linear probe.
      */
     for(;;) {
-        bucket_t *bucket = ht->buckets + i;
-        //ASSERT(validate_psl_p(ht, bucket, i));
+        ASSERT(validate_psl_p(ht, i));
 
-        if (ht->key_eq_fn(bucket->key, key, ht->ksize)) {
+        if (ht->key_eq_fn(HT_KEY(ht->data, i), key, ht->ksize)) {
             // Key found within max probe length.
-            if (valp != NULL)
-                *valp = bucket->val;
+            if (val_p) *val_p = HT_VAL(ht->data, i);
 
             return LSUP_OK;
         }
@@ -383,8 +360,8 @@ int LSUP_htable_get(const HTable *ht, const void *key, void **valp)
          * have been captured, if the key was inserted -- see the central
          * point of the algorithm in the insertion function.
          */
-        if (is_empty_bucket(ht, bucket) || n > bucket->psl) {
-            valp = NULL;
+        if (is_empty_bucket(ht, i) || n > ht->buckets[i].psl) {
+            if (val_p) *val_p = NULL;
 
             return LSUP_NORESULT;
         }
@@ -397,7 +374,7 @@ int LSUP_htable_get(const HTable *ht, const void *key, void **valp)
 }
 
 
-int LSUP_htable_del(HTable *ht, const void *key)
+int LSUP_htable_remove(HTable *ht, const void *key)
 {
     const size_t threshold = APPROX_40_PERCENT(ht->size);
     const uint32_t hash = ht->key_hash_fn(key, ht->ksize, ht->seed);
@@ -411,12 +388,12 @@ int LSUP_htable_del(HTable *ht, const void *key)
          * The same probing logic as in the lookup function.
          */
         bucket_t *bucket = ht->buckets + i;
-        if (is_empty_bucket(ht, bucket) || n > bucket->psl)
+        if (is_empty_bucket(ht, i) || n > bucket->psl)
             return LSUP_NOACTION;
 
-        //ASSERT(validate_psl_p(ht, bucket, i));
+        ASSERT(validate_psl_p(ht, i));
 
-        if (!ht->key_eq_fn(bucket->key, key, ht->ksize)) {
+        if (!ht->key_eq_fn(HT_KEY(ht, i), key, ht->ksize)) {
             /* Continue to the next bucket. */
             i = fast_rem32(i + 1, ht->size, ht->divinfo);
             n++;
@@ -430,21 +407,20 @@ int LSUP_htable_del(HTable *ht, const void *key)
      * Use the backwards-shifting method to maintain low variance.
      */
 
-    while(1) {
+    for(;;) {
         bucket_t *nbucket;
 
-        memcpy(bucket->key, del_marker, ht->ksize);
+        memcpy(HT_KEY(ht, i), del_marker, ht->ksize);
 
         i = fast_rem32(i + 1, ht->size, ht->divinfo);
         nbucket = ht->buckets + i;
-        //ASSERT(validate_psl_p(ht, nbucket, i));
+        ASSERT(validate_psl_p(ht, i));
 
         /*
          * Stop if we reach an empty bucket or hit a key which
          * is in its base (original) location.
          */
-        if (is_empty_bucket(ht, nbucket) || nbucket->psl == 0)
-            break;
+        if (is_empty_bucket(ht, i) || nbucket->psl == 0) break;
 
         nbucket->psl--;
         *bucket = *nbucket;
@@ -464,22 +440,22 @@ int LSUP_htable_del(HTable *ht, const void *key)
 }
 
 
-extern int LSUP_htable_iter(
+LSUP_rc LSUP_htable_iter(
         LSUP_HTable *ht, htsize_t *cur, void **keyp, void **valp)
 {
     while (*cur < ht->size) {
-        bucket_t *bucket = ht->buckets + *cur;
-
         (*cur)++;
 
-        if (is_empty_bucket(ht, bucket)) {
+        if (is_empty_bucket(ht, *cur)) {
             TRACE("Empty bucket: %d. Skipping.", (*cur) - 1);
             continue;
         }
 
         // Copy key, and if relevant, value.
-        *keyp = bucket->key;
-        if (valp != NULL && ht->vsize > 0) *valp = bucket->val;
+        if(!keyp) return LSUP_VALUE_ERR;
+        *keyp = HT_KEY(ht, *cur);
+
+        if (valp != NULL && ht->vsize > 0) *valp = HT_VAL(ht, *cur);
 
         return LSUP_OK;
     }
@@ -487,16 +463,11 @@ extern int LSUP_htable_iter(
 }
 
 
-void LSUP_htable_done(HTable *ht)
-{
-    if(LIKELY(ht->buckets != NULL)) free(ht->buckets);
-}
-
-
 void LSUP_htable_free(HTable *ht)
 {
-    if(LIKELY(ht != NULL)) {
-        LSUP_htable_done(ht);
+    if(LIKELY(ht)) {
+        free(ht->buckets);
+        free(ht->data);
         free(ht);
     }
 }

+ 236 - 108
src/store_htable.c

@@ -1,5 +1,4 @@
 #include "store_htable.h"
-#include "khash.h"
 
 // Assume VERY coarsly that the number of unique terms will be in general
 // 1.7 times the number of triples. This is conservative to maintain load
@@ -7,6 +6,12 @@
 #define IDX_SIZE_RATIO 1.7
 
 
+/**
+ * Callback type for key comparison.
+ */
+typedef bool (*LSUP_key_eq_fn_t)(
+        const LSUP_Key spok[], const LSUP_Key luk[]);
+
 
 typedef struct HTStore {
     LSUP_HTable *keys;
@@ -14,8 +19,13 @@ typedef struct HTStore {
 } HTStore;
 
 typedef struct HTIterator {
-    LSUP_HTable *ht;
-    size_t *cur;
+    HTStore *           store;      // Store being iterated.
+    LSUP_HTable *       ht;         // Hash table to look up.
+    htsize_t            cur;        // Lookup cursor.
+    LSUP_Key            luk[3];     // 0÷3 lookup keys.
+    LSUP_key_eq_fn_t    eq_fn;      // Equality function to test triples.
+    int                 rc;         // Return code for *next* result.
+    LSUP_Key *          spok;       // Retrieved SPO key.
 } HTIterator;
 
 
@@ -42,61 +52,59 @@ static inline bool buffer_eq_fn(const void *a, const void *b, ksize_t size)
 
 /* * * CALLBACKS * * */
 
-/**
- * Callback type for key comparison.
- */
-typedef bool (*LSUP_key_cmp_fn_t)(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2);
-
-
 /**
  * Dummy callback for queries with all parameters unbound. Returns true.
 */
-static bool lookup_none_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+static bool lookup_none_eq_fn(
+        const LSUP_Key spok[], const LSUP_Key luk[])
 { return true; }
 
 /**
  * Keyset lookup for S key.
  */
-static bool lookup_sk_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][0] == k1; }
+static bool lookup_sk_eq_fn(
+        const LSUP_Key spok[], const LSUP_Key luk[])
+{ return spok[0] == luk[0]; }
 
 /**
  * Keyset lookup for P key.
  */
-static bool lookup_pk_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][1] == k1; }
+static bool lookup_pk_eq_fn(
+        const LSUP_Key spok[], const LSUP_Key luk[])
+{ return spok[1] == luk[0]; }
 
 /**
  * Keyset lookup for O key.
  */
-static bool lookup_ok_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][2] == k1; }
+static bool lookup_ok_eq_fn(
+        const LSUP_Key spok[], const LSUP_Key luk[])
+{ return spok[2] == luk[0]; }
 
 /**
  * Keyset lookup for S and P keys.
  */
-static bool lookup_skpk_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][0] == k1 && spok[0][1] == k2; }
+static bool lookup_spk_eq_fn(
+        const LSUP_Key spok[], const LSUP_Key luk[])
+{ return spok[0] == luk[0] && spok[1] == luk[1]; }
 
 /**
  * Keyset lookup for S and O keys.
  */
-static bool lookup_skok_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][0] == k1 && spok[0][2] == k2; }
+static bool lookup_sok_eq_fn(
+        const LSUP_Key spok[], const LSUP_Key luk[])
+{ return spok[0] == luk[0] && spok[2] == luk[1]; }
 
 /**
  * Keyset lookup for P and O keys.
  */
-static bool lookup_pkok_cmp_fn(
-        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
-{ return spok[0][1] == k1 && spok[0][2] == k2; }
+static bool lookup_pok_eq_fn(
+        const LSUP_Key spok[], const LSUP_Key luk[])
+{ return spok[1] == luk[0] && spok[2] == luk[1]; }
+
+
+/* * * Other prototypes. * * */
+
+static inline LSUP_rc htiter_next_key(HTIterator *it);
 
 
 /* * * API * * */
@@ -108,11 +116,15 @@ LSUP_htstore_new(size_t capacity, HTStore **ht_p)
     CRITICAL(ht = malloc(sizeof(HTStore)));
     *ht_p = ht;
 
-    ht->keys = LSUP_htable_new(
-            capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn, 0);
-    ht->idx = LSUP_htable_new(
+    LSUP_rc rc = LSUP_htable_new(
+            capacity, TRP_KLEN, 0, xx64_hash_fn, buffer_eq_fn, &ht->keys);
+    if (rc != LSUP_OK) return rc;
+
+    rc = LSUP_htable_new(
         capacity * IDX_SIZE_RATIO, sizeof(uint64_t), sizeof(uintptr_t),
-        xx64_hash_fn, buffer_eq_fn, 0);
+        xx64_hash_fn, buffer_eq_fn, &ht->idx);
+
+    return rc;
 }
 
 void
@@ -137,111 +149,227 @@ LSUP_htstore_free(HTStore *ht)
 }
 
 
+htsize_t
+LSUP_htstore_size(LSUP_HTStore *ht)
+{ return LSUP_htable_size(ht->keys); }
+
+
+htsize_t
+LSUP_htstore_capacity(const LSUP_HTStore *ht)
+{ return LSUP_htable_capacity(ht->keys); }
+
+
+LSUP_rc
+LSUP_htstore_resize(HTStore *ht, htsize_t size)
+{
+    LSUP_rc rc = LSUP_htable_resize(ht->keys, size);
+    if (rc != LSUP_OK) return rc;
+
+    return LSUP_htable_resize(ht->idx, size * IDX_SIZE_RATIO);
+}
+
+
+LSUP_rc
+LSUP_htstore_add(HTStore *store, const LSUP_SerTriple *sspo)
+{
+    LSUP_TripleKey spok = NULL_TRP;
+
+    // Add term to index.
+    for (int i = 0; i < 3; i++) {
+        spok[i] = LSUP_sterm_to_key(LSUP_striple_pos(sspo, i));
+        TRACE("Indexing term key %lu\n", spok[i]);
+
+        // If term is already in the index, discard and free it.
+        if (LSUP_htable_get(store->idx, spok + i, NULL) == LSUP_OK)
+            LSUP_htable_put(store->idx, spok + i, LSUP_striple_pos(sspo, i));
+    }
+
+    // Add triple.
+    TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
+
+    return LSUP_htable_put(store->keys, spok, NULL);
+}
+
+
+LSUP_rc
+LSUP_htstore_remove(
+        LSUP_HTStore *store, const LSUP_SerTriple *sspo, size_t *ct)
+{
+    LSUP_HTIterator *it;
+    LSUP_rc rc = LSUP_htstore_lookup(store, sspo, &it, ct);
+    if (UNLIKELY (rc != LSUP_OK)) return rc;
+
+    *ct = 0;
+    while (htiter_next_key (it)) {
+        rc = LSUP_htable_remove(store->keys, it->spok);
+        if (UNLIKELY (rc < 0)) return rc;
+
+        (*ct) ++;
+    }
+    // TODO clean up orphan indices in separate function.
+
+    return LSUP_OK;
+}
+
+
 LSUP_rc
 LSUP_htstore_lookup(
-        HTStore *gr, const LSUP_Triple *spo, HTIterator **it_p, size_t *ct)
+        HTStore *store, const LSUP_SerTriple *sspo,
+        HTIterator **it_p, size_t *ct)
 {
-    if (LSUP_htable_size(gr->keys) == 0)
+    if (LSUP_htable_size(store->keys) == 0)
         return LSUP_NOACTION;
 
-    htsize_t cur = 0;
-    LSUP_Key k1, k2;
-    LSUP_key_cmp_fn_t cmp_fn;
-    LSUP_TripleKey i_spok;
-
     LSUP_TripleKey spok = {
-        LSUP_term_to_key(spo->s),
-        LSUP_term_to_key(spo->p),
-        LSUP_term_to_key(spo->o),
+        LSUP_sterm_to_key(sspo->s),
+        LSUP_sterm_to_key(sspo->p),
+        LSUP_sterm_to_key(sspo->o),
     };
 
     HTIterator *it;
     CRITICAL(it = malloc(sizeof(HTIterator)));
+    it->store = store;
+    it->cur = 0;
     *it_p = it;
 
+    // s p o
     if (spok[0] != NULL_KEY && spok[1] != NULL_KEY && spok[2] != NULL_KEY) {
-        int rc = LSUP_htable_get(gr->keys, spok, NULL);
-        /* * * /
-        if (match_cond == true) {
-            // Shortcut for 3-term match—only if match_cond is true.
-            LSUP_graph_init(res, 1, NULL, LSUP_STORE_MEM);
-            int rc = LSUP_htable_get(gr->keys, spok, NULL);
-            if(rc == LSUP_OK) {
-                callback_fn(gr, res, &spok, ctx);
-                return LSUP_OK;
-            } else {
-                return LSUP_NOACTION;
-            }
-        } else {
-            // For negative condition (i.e. "apply this function to all triples
-            // except the matching one")
-            int rc = LSUP_NOACTION;
-            while (LSUP_htable_iter(
-                        gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
-                if (LIKELY(
-                    i_spok[2] != spok[2] ||
-                    i_spok[0] != spok[0] ||
-                    i_spok[1] != spok[1]
-                )) {
-                    rc = callback_fn(gr, res, &i_spok, ctx);
-                }
-            }
-
-            return rc;
-        }
-        */
+        memcpy(it->luk, spok, sizeof(LSUP_TripleKey));
+        it->eq_fn = NULL;
 
     } else if (spok[0] != NULL_KEY) {
-        k1 = spok[0];
-
-        if (spok[1] != NULL_KEY) { // s p ?
-            k2 = spok[1];
-            cmp_fn = lookup_skpk_cmp_fn;
+        it->luk[0] = spok[0];
 
-        } else if (spok[2] != NULL_KEY) { // s ? o
-            k2 = spok[2];
-            cmp_fn = lookup_skok_cmp_fn;
+        // s p ?
+        if (spok[1] != NULL_KEY) {
+            it->luk[1] = spok[1];
+            it->eq_fn = lookup_spk_eq_fn;
 
-        } else { // s ? ?
-            cmp_fn = lookup_sk_cmp_fn;
+        // s ? o
+        } else if (spok[2] != NULL_KEY) {
+            it->luk[1] = spok[2];
+            it->eq_fn = lookup_sok_eq_fn;
 
+        // s ? ?
+        } else {
+            it->eq_fn = lookup_sk_eq_fn;
         }
 
     } else if (spok[1] != NULL_KEY) {
-        k1 = spok[1];
+        it->luk[0] = spok[1];
 
-        if (spok[2] != NULL_KEY) { // ? p o
-            k2 = spok[2];
-            cmp_fn = lookup_pkok_cmp_fn;
+        // ? p o
+        if (spok[2] != NULL_KEY) {
+            it->luk[1] = spok[2];
+            it->eq_fn = lookup_pok_eq_fn;
 
-        } else { // ? p ?
-            cmp_fn = lookup_pk_cmp_fn;
-        }
+        // ? p ?
+        } else it->eq_fn = lookup_pk_eq_fn;
 
-    } else if (spok[2] != NULL_KEY) { // ? ? o
-        k1 = spok[2];
-        cmp_fn = lookup_ok_cmp_fn;
+    // ? ? o
+    } else if (spok[2] != NULL_KEY) {
+        it->luk[0] = spok[2];
+        it->eq_fn = lookup_ok_eq_fn;
 
-    } else {
-        printf("WARNING: no bound terms, making a compact copy.\n");
-        return LSUP_graph_copy(res, &gr);
-    }
+    // ? ? ?
+    } else it->eq_fn = lookup_none_eq_fn;
 
-    while (LSUP_htable_iter(gr->keys, &cur, (void**)&i_spok, NULL) == LSUP_OK) {
-        if (cmp_fn(&i_spok, k1, k2) == match_cond)
-            callback_fn(gr, res, &i_spok, ctx);
-    }
+    it->rc = LSUP_htable_iter(
+            it->store->keys, &it->cur, (void**)&it->spok, NULL);
 
-    return LSUP_OK;
+    return it->rc >= 0 ? LSUP_OK : it->rc;
 }
 
 
-/*
-int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo)
+static inline LSUP_rc
+htiter_next_key(HTIterator *it)
 {
-    LSUP_graph_init(res, LOOKUP_GR_INIT_SIZE, NULL, LSUP_STORE_MEM);
+    for (;;) {
+        if (it->rc != LSUP_OK) return it->rc;
+        if (it->eq_fn(it->spok, it->luk)) return LSUP_OK;
 
-    return LSUP_graph_match_callback(gr, res, spo, &match_add_fn, true, NULL);
+        it->rc = LSUP_htable_iter(
+                it->store->keys, &it->cur, (void**)&it->spok, NULL);
+    }
 }
-*/
 
+
+LSUP_rc
+LSUP_htiter_next(HTIterator *it, LSUP_SerTriple *sspo)
+{
+    LSUP_rc rc = htiter_next_key(it);
+    if (UNLIKELY (rc != LSUP_OK)) return rc;
+
+    for (int i = 0; i < 3; i++)
+        LSUP_htable_get(
+                it->store->idx, (void*)it->spok[i],
+                (void**)(LSUP_striple_pos(sspo, i)));
+
+    return rc;
+}
+
+
+void
+LSUP_htiter_free(LSUP_HTIterator *it)
+{ free(it); }
+
+
+LSUP_rc
+LSUP_htstore_bool_op(
+        const LSUP_bool_op op, const HTStore *s1, const HTStore *s2,
+        HTStore **dest_p)
+{
+    HTStore *dest;
+    htsize_t cur;
+    void *key, *val;
+
+    LSUP_htstore_new(0, &dest);
+    *dest_p = dest;
+
+    if (UNLIKELY (
+            op != LSUP_BOOL_UNION
+            && op != LSUP_BOOL_SUBTRACTION
+            && op != LSUP_BOOL_INTERSECTION
+            && op != LSUP_BOOL_XOR)) return LSUP_VALUE_ERR;
+
+    if (op == LSUP_BOOL_UNION) {
+        LSUP_htable_copy(s1->keys, &dest->keys);
+        while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END)
+            LSUP_htable_put(dest->keys, key, NULL);
+
+        LSUP_htable_copy(s1->idx, &dest->idx);
+        while (LSUP_htable_iter(s2->idx, &cur, &key, &val) != LSUP_END)
+            LSUP_htable_put(dest->idx, key, val);
+
+    } else {
+        if (op == LSUP_BOOL_XOR) {
+            while (LSUP_htable_iter(s2->keys, &cur, &key, NULL) != LSUP_END) {
+                LSUP_rc get_rc = LSUP_htable_get(s1->keys, key, NULL);
+                if (get_rc == LSUP_NORESULT) {
+                    LSUP_htable_put(dest->keys, key, NULL);
+
+                    if (LSUP_htable_get(s2->idx, key, &val) == LSUP_OK)
+                        LSUP_htable_put(dest->idx, key, val);
+
+                } else if (UNLIKELY(get_rc < 0)) return get_rc;
+            }
+
+        }
+
+        while (LSUP_htable_iter(s1->keys, &cur, &key, NULL) != LSUP_END) {
+            LSUP_rc get_rc = LSUP_htable_get(s2->keys, key, NULL);
+            if (
+                (op == LSUP_BOOL_INTERSECTION && get_rc == LSUP_OK)
+                || get_rc == LSUP_NORESULT
+            ) {
+                LSUP_htable_put(dest->keys, key, NULL);
+
+                if (LSUP_htable_get(s1->idx, key, &val) == LSUP_OK)
+                    LSUP_htable_put(dest->idx, key, val);
+
+            } else if (UNLIKELY(get_rc < 0)) return get_rc;
+        }
+    }
+
+    return LSUP_OK;
+}

+ 203 - 166
src/store_mdb.c

@@ -29,6 +29,7 @@ typedef char DbLabel[8];
 typedef enum {
     LSSTORE_INIT         = 1, // Is the store environment set up on disk?
     LSSTORE_OPEN         = 3, // Is the environment open? Assumes init is set.
+    LSSTORE_DIRTY_TXN    = 4, // Main txn was opened in a subroutine.
 } StoreState;
 
 
@@ -38,13 +39,13 @@ typedef enum {
 } StoreOp;
 
 
-struct MDBStore {
+typedef struct MDBStore {
     MDB_env *           env;        // Environment handle.
     MDB_txn *           txn;        // Current transaction.
     MDB_dbi             dbi[N_DB];  // DB handles. Refer to DbIdx enum.
-    LSUP_Buffer *       default_ctx;// Default context as a serialized URI.
-    StoreState          state;      // Store state (initialized, open etc.)
-};
+    LSUP_Buffer *       default_ctx;// Default ctx as a serialized URI.
+    StoreState          state;      // Store state.
+} MDBStore;
 
 
 /** @brief Iterator operation.
@@ -62,19 +63,20 @@ typedef void (*iter_op_fn_t)(struct MDBIterator *it);
 
 /** @brief Triple iterator.
  */
-struct MDBIterator {
-    struct MDBStore *store;     // MDB store pointer.
-    MDB_txn *txn;               // MDB transaction.
-    MDB_cursor *cur;            // MDB cursor.
-    MDB_val key, data;          // Internal data handlers.
-    LSUP_TripleKey *spok;       // Triple to be populated with match.
-    LSUP_Key ck;                // Context key to filter by. May be NULL_TRP.
-    iter_op_fn_t iter_op_fn;    // Function used to look up next match.
-    const uint8_t *term_order;  // Term order used in 1-2bound look-ups.
-    LSUP_Key luk[3];            // 0÷3 lookup keys.
-    size_t i;                   // Internal counter for paged lookups.
-    int rc;                     // MDB_* return code for the next result.
-};
+typedef struct MDBIterator {
+    MDBStore *          store;      // MDB store pointer.
+    MDB_txn *           txn;        // MDB transaction.
+    MDB_cursor *        cur;        // MDB cursor.
+    MDB_val             key, data;  // Internal data handlers.
+    LSUP_TripleKey      spok;       // Triple to be populated with match.
+    LSUP_Key            ck;         // Ctx key to filter by. May be NULL_TRP.
+    iter_op_fn_t        iter_op_fn; // Function used to look up next match.
+    const uint8_t *     term_order; // Term order used in 1-2bound look-ups.
+    LSUP_Key            luk[3];     // 0÷3 lookup keys.
+    size_t              i;          // Internal counter for paged lookups.
+    int                 rc;         // MDB_* return code for the next result.
+    StoreState          state;      // State flags.
+} MDBIterator;
 
 
 /*
@@ -200,15 +202,14 @@ static int index_triple(
         LSUP_TripleKey spok, LSUP_Key ck);
 
 inline static LSUP_rc lookup_0bound(
-        struct MDBStore *store, struct MDBIterator *it, size_t *ct);
+        MDBStore *store, MDBIterator *it, size_t *ct);
 inline static LSUP_rc lookup_1bound(
-        struct MDBStore *store, uint8_t idx0,
-        struct MDBIterator *it, size_t *ct);
+        MDBStore *store, uint8_t idx0, MDBIterator *it, size_t *ct);
 inline static LSUP_rc lookup_2bound(
-        struct MDBStore *store, uint8_t idx0, uint8_t idx1,
-        struct MDBIterator *it, size_t *ct);
+        MDBStore *store, uint8_t idx0, uint8_t idx1,
+        MDBIterator *it, size_t *ct);
 inline static LSUP_rc lookup_3bound(
-        struct MDBStore *store, struct MDBIterator *it, size_t *ct);
+        MDBStore *store, MDBIterator *it, size_t *ct);
 /* TODO
 inline static int check_txn_open(MDB_txn *txn, bool write);
 */
@@ -224,7 +225,7 @@ static int rmrf(char *path);
  */
 
 LSUP_rc
-LSUP_store_setup(char **path/*, bool clear*/) // TODO clear
+LSUP_mdbstore_setup(char **path/*, bool clear*/) // TODO clear
 {
     int rc;
 
@@ -276,12 +277,15 @@ LSUP_store_setup(char **path/*, bool clear*/) // TODO clear
 }
 
 
-LSUP_MDBStore *
-LSUP_store_new(const char *path, const LSUP_Buffer *default_ctx)
+LSUP_rc
+LSUP_mdbstore_new(
+        const char *path, const LSUP_Buffer *default_ctx,
+        LSUP_MDBStore **store_p)
 {
     int rc;
     LSUP_MDBStore *store;
     CRITICAL(store = malloc(sizeof(LSUP_MDBStore)));
+    *store_p = NULL;
 
     rc = mdb_env_create(&store->env);
     TRACE("create rc: %d", rc);
@@ -299,10 +303,10 @@ LSUP_store_new(const char *path, const LSUP_Buffer *default_ctx)
     else sscanf(env_mapsize, "%lu", &mapsize);
 
     rc = mdb_env_set_maxdbs(store->env, N_DB);
-    if(rc != MDB_SUCCESS) return NULL;
+    if(rc != MDB_SUCCESS) return LSUP_DB_ERR;
 
     rc = mdb_env_open(store->env, path, 0, ENV_FILE_MODE);
-    if (rc != MDB_SUCCESS) return NULL;
+    if (rc != MDB_SUCCESS) return LSUP_DB_ERR;
 
     // Assign DB handles to store->dbi.
     MDB_txn *txn;
@@ -312,7 +316,7 @@ LSUP_store_new(const char *path, const LSUP_Buffer *default_ctx)
                 txn, db_labels[i], db_flags[i], store->dbi + i);
         if (rc != MDB_SUCCESS) {
             mdb_txn_abort(txn);
-            return NULL;
+            return LSUP_DB_ERR;
         }
     }
 
@@ -321,12 +325,14 @@ LSUP_store_new(const char *path, const LSUP_Buffer *default_ctx)
     store->state |= LSSTORE_OPEN;
     store->txn = NULL;
 
-    return store;
+    *store_p = store;
+
+    return LSUP_OK;
 }
 
 
 void
-LSUP_store_free(LSUP_MDBStore *store)
+LSUP_mdbstore_free(LSUP_MDBStore *store)
 {
     if (store->state & LSSTORE_OPEN) {
         TRACE(STR, "Closing MDB env.\n");
@@ -343,7 +349,7 @@ LSUP_store_free(LSUP_MDBStore *store)
 
 
 LSUP_rc
-LSUP_store_stats(LSUP_MDBStore *store)
+LSUP_mdbstore_stats(LSUP_MDBStore *store)
 {
     // TODO
     // MDB_stat env_stat, db_stats[N_DB];
@@ -352,7 +358,7 @@ LSUP_store_stats(LSUP_MDBStore *store)
 
 
 size_t
-LSUP_store_size(LSUP_MDBStore *store)
+LSUP_mdbstore_size(LSUP_MDBStore *store)
 {
     if(!(store->state & LSSTORE_INIT)) return 0;
 
@@ -376,109 +382,146 @@ LSUP_store_size(LSUP_MDBStore *store)
 }
 
 
-LSUP_rc
-LSUP_store_add(
-        LSUP_MDBStore *store, const LSUP_Buffer *sc,
-        const LSUP_SerTriple *data, const size_t data_size)
+void
+LSUP_mdbstore_add_init(
+        LSUP_MDBStore *store, const LSUP_Buffer *sc, MDBIterator **it_p)
 {
-    MDB_val key_v, data_v;
+    /* An iterator is used here. Some members are a bit misused but it does
+     * its job without having to define a very similar struct.
+     */
+    MDBIterator *it;
+    CRITICAL(it= malloc(sizeof(MDBIterator)));
+    it->i = 0;
 
-    bool txn_pending = false;
     if (!store->txn) {
         mdb_txn_begin(store->env, NULL, 0, &store->txn);
-        txn_pending = true;
+        // We are starting the main DB txn and we need to close it afterwards.
+        it->state = LSSTORE_DIRTY_TXN;
     }
 
     // Take care of context first.
     // Serialize and hash.
-    LSUP_Key ck = NULL_KEY;
+    it->ck = NULL_KEY;
 
     if (store->default_ctx != NULL) {
         if (sc == NULL) sc = store->default_ctx;
 
-        ck = LSUP_sterm_to_key(sc);
+        it->ck = LSUP_sterm_to_key(sc);
 
         // Insert t:st for context.
         //TRACE("Adding context: %s", sc);
-        key_v.mv_data = &ck;
-        key_v.mv_size = KLEN;
-        data_v.mv_data = sc->addr;
-        data_v.mv_size = sc->size;
+        it->key.mv_data = &it->ck;
+        it->key.mv_size = KLEN;
+        it->data.mv_data = sc->addr;
+        it->data.mv_size = sc->size;
 
-        mdb_put(
+        if (mdb_put(
                 store->txn, store->dbi[IDX_T_ST],
-                &key_v, &data_v, MDB_NOOVERWRITE);
+                &it->key, &it->data, MDB_NOOVERWRITE) != MDB_SUCCESS)
+            it->rc = LSUP_DB_ERR;
     }
 
-    LSUP_rc rc = LSUP_NOACTION;
+    *it_p = it;
+}
+
+
+LSUP_rc
+LSUP_mdbstore_add_iter(MDBIterator *it, const LSUP_SerTriple *sspo)
+{
     int db_rc;
-    for (size_t i = 0; i < data_size; i++) {
-        const LSUP_SerTriple *sspo = data + i;
-        LSUP_TripleKey spok = NULL_TRP;
-
-        // Add triple.
-        for (int j = 0; j < 3; j++) {
-            LSUP_SerTerm *st = LSUP_ser_triple_term_by_pos(sspo, j);
-
-            printf("Inserting term: ");
-            LSUP_buffer_print(st);
-            printf("\n");
-
-            spok[j] = LSUP_sterm_to_key(st);
-
-            key_v.mv_data = spok + j;
-            key_v.mv_size = KLEN;
-            data_v.mv_data = st->addr;
-            data_v.mv_size = st->size;
-
-            db_rc = mdb_put(
-                    store->txn, store->dbi[IDX_T_ST],
-                    &key_v, &data_v, MDB_NOOVERWRITE);
-            if (db_rc == MDB_SUCCESS) rc = LSUP_OK;
-            else if (db_rc != MDB_KEYEXIST) goto _add_close_txn;
-        }
+    LSUP_TripleKey spok = NULL_TRP;
 
-        TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
+    // Add triple.
+    for (int j = 0; j < 3; j++) {
+        LSUP_Buffer *st = LSUP_striple_pos(sspo, j);
 
-        // Insert spo:c.
-        key_v.mv_data = spok;
-        key_v.mv_size = TRP_KLEN;
+        printf("Inserting term: ");
+        LSUP_buffer_print(st);
+        printf("\n");
 
-        // In triple mode, data is empty (= NULL_KEY).
-        data_v.mv_data = &ck;
-        data_v.mv_size = ck == NULL_KEY ? 0 : KLEN;
+        spok[j] = LSUP_sterm_to_key(st);
 
+        it->key.mv_data = spok + j;
+        it->key.mv_size = KLEN;
+        it->data.mv_data = st->addr;
+        it->data.mv_size = st->size;
 
         db_rc = mdb_put(
-                store->txn, store->dbi[IDX_SPO_C],
-                &key_v, &data_v, MDB_NODUPDATA);
-        if (db_rc == MDB_SUCCESS) rc = LSUP_OK;
-        else if (db_rc != MDB_KEYEXIST) goto _add_close_txn;
+                it->store->txn, it->store->dbi[IDX_T_ST],
+                &it->key, &it->data, MDB_NOOVERWRITE);
+        if (db_rc == MDB_SUCCESS) it->rc = LSUP_OK;
+        else if (db_rc != MDB_KEYEXIST) {
+            it->rc = LSUP_DB_ERR;
+            return it->rc;
+        }
+    }
+
+    TRACE("Inserting spok: {%lx, %lx, %lx}", spok[0], spok[1], spok[2]);
+
+    // Insert spo:c.
+    it->key.mv_data = spok;
+    it->key.mv_size = TRP_KLEN;
+
+    // In triple mode, data is empty (= NULL_KEY).
+    it->data.mv_data = &it->ck;
+    it->data.mv_size = it->ck == NULL_KEY ? 0 : KLEN;
 
-        // Index.
-        PCHECK(index_triple(store, OP_ADD, spok, ck), db_rc, _add_close_txn);
+    db_rc = mdb_put(
+            it->store->txn, it->store->dbi[IDX_SPO_C],
+            &it->key, &it->data, MDB_NODUPDATA);
+    if (db_rc == MDB_SUCCESS) it->rc = LSUP_OK;
+    else if (db_rc != MDB_KEYEXIST) {
+        it->rc = LSUP_DB_ERR;
+        return it->rc;
     }
 
-_add_close_txn:
+    // Index.
+    it->rc = index_triple(it->store, OP_ADD, spok, it->ck);
+
+    if(it->rc == LSUP_OK) it->i++;
+
+    return it->rc;
+}
+
+
+LSUP_rc
+LSUP_mdbstore_add_done(MDBIterator *it, size_t *inserted)
+{
     // Only return commit rc if it fails.
-    if (txn_pending) {
-        if (rc == LSUP_OK) {
-            if((db_rc = mdb_txn_commit(store->txn)) != MDB_SUCCESS) {
-                mdb_txn_abort(store->txn);
-                rc = db_rc;
+    if (it->state & LSSTORE_DIRTY_TXN) {
+        if (it->rc == LSUP_OK) {
+            if(mdb_txn_commit(it->store->txn) != MDB_SUCCESS) {
+                mdb_txn_abort(it->store->txn);
+                it->rc = LSUP_DB_ERR;
             }
-        } else mdb_txn_abort(store->txn);
+        } else mdb_txn_abort(it->store->txn);
 
-        store->txn = NULL;
+        it->store->txn = NULL;
     }
 
-    return rc;
+    return it->rc;
+}
+
+
+LSUP_rc
+LSUP_mdbstore_add(
+        LSUP_MDBStore *store, const LSUP_Buffer *sc,
+        const LSUP_SerTriple strp[], const size_t ct, size_t *inserted)
+{
+    MDBIterator *it;
+    LSUP_mdbstore_add_init(store, sc, &it);
+    if (it->rc < 0) {
+        for (size_t i = 0; i < ct; i++)
+            if (LSUP_mdbstore_add_iter(it, strp + i) < 0) break;
+    }
+
+    return LSUP_mdbstore_add_done(it, inserted);
 }
 
 
 LSUP_Key
-LSUP_store_sterm_to_key(
-        LSUP_MDBStore *store, const LSUP_SerTerm *sterm)
+LSUP_mdbstore_sterm_to_key(
+        LSUP_MDBStore *store, const LSUP_Buffer *sterm)
 {
     // TODO this will be replaced by a lookup when 128-bit hash is introduced.
     return LSUP_sterm_to_key(sterm);
@@ -486,8 +529,8 @@ LSUP_store_sterm_to_key(
 
 
 LSUP_rc
-LSUP_store_key_to_sterm(
-        LSUP_MDBStore *store, const LSUP_Key key, LSUP_SerTerm *sterm)
+LSUP_mdbstore_key_to_sterm(
+        LSUP_MDBStore *store, const LSUP_Key key, LSUP_Buffer *sterm)
 {
     LSUP_rc rc = LSUP_NORESULT;
 
@@ -515,22 +558,22 @@ LSUP_store_key_to_sterm(
 
 
 LSUP_rc
-LSUP_store_lookup(
-        LSUP_MDBStore *store, LSUP_SerTerm *sspoc[],
-        struct MDBIterator **itp, size_t *ct)
+LSUP_mdbstore_lookup(
+        LSUP_MDBStore *store, const LSUP_SerTriple *sspo,
+        const LSUP_Buffer *sc, MDBIterator **it_p, size_t *ct)
 {
     LSUP_TripleKey spok = {
-        LSUP_sterm_to_key(sspoc[0]),
-        LSUP_sterm_to_key(sspoc[1]),
-        LSUP_sterm_to_key(sspoc[2]),
+        LSUP_sterm_to_key(sspo->s),
+        LSUP_sterm_to_key(sspo->p),
+        LSUP_sterm_to_key(sspo->o),
     };
 
     LSUP_MDBIterator *it;
-    CRITICAL(it = malloc(sizeof(struct MDBIterator)));
-    *itp = it;
+    CRITICAL(it = malloc(sizeof(MDBIterator)));
+    *it_p = it;
 
     it->store = store;
-    it->ck = store->default_ctx ? LSUP_sterm_to_key(sspoc[3]) : NULL_KEY;
+    it->ck = store->default_ctx ? LSUP_sterm_to_key(sc) : NULL_KEY;
 
     if(ct) *ct = 0;
 
@@ -587,7 +630,7 @@ LSUP_store_lookup(
 
 
 LSUP_rc
-LSUP_store_it_next(LSUP_MDBIterator *it, LSUP_SerTerm **sspo)
+mdbiter_next_key(LSUP_MDBIterator *it)
 {
     // Only advance if the previous it->rc wasn't already at the end.
     if(it->rc == MDB_NOTFOUND) return LSUP_END;
@@ -646,14 +689,21 @@ LSUP_store_it_next(LSUP_MDBIterator *it, LSUP_SerTerm **sspo)
 
     } else rc = LSUP_OK;
 
-    if (sspo) {
-        if (rc == LSUP_OK) {
-            LSUP_store_key_to_sterm(it->store, *it->spok[0], *sspo);
-            LSUP_store_key_to_sterm(it->store, *it->spok[1], *sspo + 1);
-            LSUP_store_key_to_sterm(it->store, *it->spok[2], *sspo + 2);
+    return rc;
+}
+
+
+LSUP_rc
+LSUP_mdbiter_next(LSUP_MDBIterator *it, LSUP_SerTriple *sspo)
+{
+    LSUP_rc rc = mdbiter_next_key(it);
+
+    if (sspo && rc == LSUP_OK) {
+        LSUP_mdbstore_key_to_sterm(it->store, it->spok[0], sspo->s);
+        LSUP_mdbstore_key_to_sterm(it->store, it->spok[1], sspo->p);
+        LSUP_mdbstore_key_to_sterm(it->store, it->spok[2], sspo->o);
 
         // TODO error handling.
-        } else *sspo = NULL;
     }
 
     return rc;
@@ -661,7 +711,7 @@ LSUP_store_it_next(LSUP_MDBIterator *it, LSUP_SerTerm **sspo)
 
 
 void
-LSUP_store_it_free(struct MDBIterator *it)
+LSUP_mdbiter_free(MDBIterator *it)
 {
     if (it) {
         mdb_cursor_close(it->cur);
@@ -674,9 +724,9 @@ LSUP_store_it_free(struct MDBIterator *it)
 
 
 LSUP_rc
-LSUP_store_remove(
-        LSUP_MDBStore *store, const LSUP_Buffer *sc,
-        LSUP_TripleKey data[], size_t data_size)
+LSUP_mdbstore_remove(
+        MDBStore *store, const LSUP_SerTriple *sspo,
+        const LSUP_Buffer *sc, size_t *ct)
 {
     LSUP_rc rc = LSUP_NOACTION;
 
@@ -700,8 +750,11 @@ LSUP_store_remove(
     spok_v.mv_size = TRP_KLEN;
     ck_v.mv_size = KLEN;
 
-    for(size_t i = 0; i < data_size; i++) {
-        spok_v.mv_data = data + i;
+    LSUP_MDBIterator *it;
+    LSUP_mdbstore_lookup(store, sspo, sc, &it, ct);
+
+    while (mdbiter_next_key(it)) {
+        spok_v.mv_data = it->spok;
 
         rc = mdb_cursor_get(dcur, &spok_v, &ck_v, MDB_GET_BOTH);
         if (rc == MDB_NOTFOUND) continue;
@@ -719,7 +772,7 @@ LSUP_store_remove(
         if (UNLIKELY(rc != MDB_SUCCESS)) goto _remove_abort;
 
         mdb_cursor_del(icur, 0);
-        spok_v.mv_data = data + i;
+        spok_v.mv_data = it->spok;
 
         // If there are no more contexts associated with this triple,
         // remove from indices.
@@ -727,7 +780,7 @@ LSUP_store_remove(
         if (rc == MDB_SUCCESS) continue;
         if (UNLIKELY(rc != MDB_NOTFOUND)) goto _remove_abort;
 
-        index_triple(store, OP_REMOVE, data[i], ck);
+        index_triple(store, OP_REMOVE, it->spok, ck);
     }
 
     if(UNLIKELY(mdb_txn_commit(txn) != MDB_SUCCESS)) {
@@ -765,20 +818,6 @@ static int rmrf(char *path)
 */
 
 
-/* TODO
-inline static int
-check_txn_open(MDB_txn *txn, bool write)
-{
-    if (txn == NULL) {
-        mdb_txn_begin(LSUP_mdbenv, NULL, write ? 0 : MDB_RDONLY, &txn);
-
-        return LSUP_OK;
-    }
-
-    return LSUP_NOACTION;
-}
-*/
-
 static LSUP_rc
 index_triple(
         LSUP_MDBStore *store, StoreOp op,
@@ -891,9 +930,9 @@ index_triple(
  * Cursor: spo:c
  */
 inline static void
-it_next_0bound(struct MDBIterator *it)
+it_next_0bound(MDBIterator *it)
 {
-    it->spok = (LSUP_TripleKey*)&it->data.mv_data;
+    memcpy(it->spok, it->data.mv_data, sizeof(LSUP_TripleKey));
 
     it->rc = mdb_cursor_get(it->cur, &it->key, NULL, MDB_NEXT);
 }
@@ -906,17 +945,17 @@ it_next_0bound(struct MDBIterator *it)
  * Cursor: s:po, p:so, or o:sp.
  */
 inline static void
-it_next_1bound(struct MDBIterator *it)
+it_next_1bound(MDBIterator *it)
 {
     LSUP_DoubleKey *lu_dset = it->data.mv_data;
 
-    it->spok[0][it->term_order[0]] = it->luk[0];
-    it->spok[0][it->term_order[1]] = lu_dset[it->i][0];
-    it->spok[0][it->term_order[2]] = lu_dset[it->i][1];
+    it->spok[it->term_order[0]] = it->luk[0];
+    it->spok[it->term_order[1]] = lu_dset[it->i][0];
+    it->spok[it->term_order[2]] = lu_dset[it->i][1];
 
     TRACE(
             "Composed triple: {%lu %lu %lu}",
-            it->spok[0][0], it->spok[0][1], it->spok[0][2]);
+            it->spok[0], it->spok[1], it->spok[2]);
 
     // Ensure next block within the same page is not beyond the last.
     if(it->i < it->data.mv_size / DBL_KLEN - 1) {
@@ -942,13 +981,13 @@ it_next_1bound(struct MDBIterator *it)
  * Cursor: po:s, so:p, or sp:o.
  */
 inline static void
-it_next_2bound(struct MDBIterator *it)
+it_next_2bound(MDBIterator *it)
 {
     LSUP_Key *lu_dset = it->data.mv_data;
 
-    it->spok[0][it->term_order[0]] = it->luk[0];
-    it->spok[0][it->term_order[1]] = it->luk[1];
-    it->spok[0][it->term_order[2]] = lu_dset[it->i];
+    it->spok[it->term_order[0]] = it->luk[0];
+    it->spok[it->term_order[1]] = it->luk[1];
+    it->spok[it->term_order[2]] = lu_dset[it->i];
 
     // Ensure next block within the same page is not beyond the last.
     if(it->i < it->data.mv_size / KLEN - 1)
@@ -969,7 +1008,7 @@ it_next_2bound(struct MDBIterator *it)
  * already MDB_NOTFOUND and this function will not be called.
  */
 inline static void
-it_next_3bound(struct MDBIterator *it)
+it_next_3bound(MDBIterator *it)
 {
     it->rc = MDB_NOTFOUND;
 }
@@ -978,7 +1017,7 @@ it_next_3bound(struct MDBIterator *it)
 /* * * Term-specific lookups. * * */
 
 inline static LSUP_rc
-lookup_0bound(struct MDBStore *store, struct MDBIterator *it, size_t *ct)
+lookup_0bound(MDBStore *store, MDBIterator *it, size_t *ct)
 {
     if(store->txn) it->txn = store->txn;
     else {
@@ -1023,9 +1062,7 @@ lookup_0bound(struct MDBStore *store, struct MDBIterator *it, size_t *ct)
 
 
 inline static LSUP_rc
-lookup_1bound(
-        struct MDBStore *store, uint8_t idx0,
-        struct MDBIterator *it, size_t *ct)
+lookup_1bound(MDBStore *store, uint8_t idx0, MDBIterator *it, size_t *ct)
 {
     it->term_order = (const uint8_t*)lookup_ordering_1bound[idx0];
 
@@ -1048,12 +1085,12 @@ lookup_1bound(
         // If a context is specified, the only way to count triples matching
         // the context is to loop over them.
         if (it->ck != NULL_KEY) {
-            struct MDBIterator *ct_it;
-            CRITICAL(ct_it = malloc(sizeof(struct MDBIterator)));
+            MDBIterator *ct_it;
+            CRITICAL(ct_it = malloc(sizeof(MDBIterator)));
 
             ct_it->luk[0] = it->luk[0];
             LSUP_TripleKey ct_spok;
-            ct_it->spok = &ct_spok;
+            memcpy(ct_it->spok, ct_spok, sizeof(LSUP_TripleKey));
             ct_it->ck = it->ck;
             ct_it->store = it->store;
             ct_it->txn = it->txn;
@@ -1062,7 +1099,7 @@ lookup_1bound(
             ct_it->i = 0;
             lookup_1bound(store, idx0, ct_it, NULL);
 
-            while (LSUP_store_it_next(ct_it, NULL) != LSUP_END) {
+            while (LSUP_mdbiter_next(ct_it, NULL) != LSUP_END) {
                 ct[0] ++;
                 TRACE("Counter increased to %lu.", *ct);
             }
@@ -1094,8 +1131,8 @@ lookup_1bound(
 
 inline static LSUP_rc
 lookup_2bound(
-        struct MDBStore *store, uint8_t idx0, uint8_t idx1,
-        struct MDBIterator *it, size_t *ct)
+        MDBStore *store, uint8_t idx0, uint8_t idx1,
+        MDBIterator *it, size_t *ct)
 {
     uint8_t luk1_offset, luk2_offset;
     MDB_dbi dbi = 0;
@@ -1158,19 +1195,19 @@ lookup_2bound(
         // If a context is specified, the only way to count triples matching
         // the context is to loop over them.
         if (it->ck != NULL_KEY) {
-            struct MDBIterator *ct_it;
-            CRITICAL(ct_it = malloc(sizeof(struct MDBIterator)));
+            MDBIterator *ct_it;
+            CRITICAL(ct_it = malloc(sizeof(MDBIterator)));
 
             ct_it->luk[0] = it->luk[0];
             ct_it->luk[1] = it->luk[1];
             LSUP_TripleKey ct_spok;
-            ct_it->spok = &ct_spok;
+            memcpy(ct_it->spok, ct_spok, sizeof(LSUP_TripleKey));
             ct_it->ck = it->ck;
             ct_it->store = it->store;
             ct_it->txn = it->txn;
             lookup_2bound(store, idx0, idx1, ct_it, NULL);
 
-            while (LSUP_store_it_next(ct_it, NULL) != LSUP_END) {
+            while (LSUP_mdbiter_next(ct_it, NULL) != LSUP_END) {
                 ct[0] ++;
             }
             if (ct_it->cur) mdb_cursor_close(ct_it->cur);
@@ -1199,7 +1236,7 @@ lookup_2bound(
 
 
 inline static LSUP_rc
-lookup_3bound(struct MDBStore *store, struct MDBIterator *it, size_t *ct)
+lookup_3bound(MDBStore *store, MDBIterator *it, size_t *ct)
 {
     TRACE(
             "Looking up 3 bound: {%lx, %lx, %lx}",
@@ -1232,7 +1269,7 @@ lookup_3bound(struct MDBStore *store, struct MDBIterator *it, size_t *ct)
     if(ct && it->rc == MDB_SUCCESS) *ct = 1;
 
     it->iter_op_fn = it_next_3bound;
-    it->spok = &it->luk;
+    memcpy(it->spok, it->luk, sizeof(LSUP_TripleKey));
 
     if (it->rc != MDB_SUCCESS && it->rc != MDB_NOTFOUND) {
         fprintf(stderr, "Database error: %s", mdb_strerror(it->rc));

+ 53 - 29
src/term.c

@@ -9,7 +9,8 @@ static bool ptn_init = false;
 
 /* Global inline prototypes. */
 
-LSUP_Term * LSUP_uri_new(const char *data);
+LSUP_Term *LSUP_uri_new(const char *data);
+LSUP_Term *LSUP_uri_random();
 
 
 /**
@@ -18,13 +19,13 @@ LSUP_Term * LSUP_uri_new(const char *data);
 void term_cleanup() { if (ptn_init) regfree(&ptn); }
 
 
-int
+LSUP_rc
 LSUP_term_init(
         LSUP_Term *term, LSUP_term_type type,
         const char *data, char *datatype, char *lang)
 {
     term->type = type;
-    if (data == NULL) return -1;
+    if (data == NULL) return LSUP_VALUE_ERR;
 
     if (term->type == LSUP_TERM_URI) {
         if (UNLIKELY(!ptn_init)) {
@@ -34,22 +35,22 @@ LSUP_term_init(
         }
 
         if (regexec(&ptn, data, 0, NULL, 0) != 0) {
-            printf("Error matching URI pattern.\n");
+            fprintf(stderr, "Error matching URI pattern.\n");
 
-            return -1;
+            return LSUP_VALUE_ERR;
         }
     }
 
-    term->data = malloc(strlen(data) + 1);
+    CRITICAL(term->data = malloc(strlen(data) + 1));
     strcpy(term->data, data);
 
-    if (datatype != NULL) {
+    if (datatype) {
         term->datatype = malloc(strlen(datatype) + 1);
         strcpy(term->datatype, datatype);
     } else {
         term->datatype = NULL;
     }
-    if (lang != NULL) {
+    if (lang) {
         // TODO validate language and country code
         //char lsize = 5 ? lang[2] == "-" : 2;
         memcpy(term->lang, lang, LANG_SIZE);
@@ -61,8 +62,8 @@ LSUP_term_init(
 }
 
 
-LSUP_Term
-*LSUP_term_new(
+LSUP_Term *
+LSUP_term_new(
         LSUP_term_type type, const char *data, char *datatype, char *lang) {
 
     LSUP_Term *term;
@@ -74,22 +75,45 @@ LSUP_Term
 }
 
 
-char *
-LSUP_term_gen_random_str()
-{
-    uuid_t uuid;
-    uuid_generate_random(uuid);
-
-    uuid_str_t uuid_str;
-    uuid_unparse_lower(uuid, uuid_str);
-
-    static char uri[UUIDSTR_SIZE + 10];
-    sprintf(uri, "urn:uuid4:%s", uuid_str);
-
-    return uri;
-}
-
-
+/*
+ * This function allocates and returns the following byte sequence:
+ *
+ * - `sizeof(char)` bytes for the term type;
+ * - `LANG_SIZE` bytes for the language tag;
+ * - Arbitrary bytes with NUL-terminated strings for data and datatype.
+ *
+ * The index for `data` is consistently `LANG_SIZE + sizeof(char)`. The
+ * index for `datatype` is found by the terminating NULL for `data`.
+ *
+ * Serialized representations of some RDF terms:
+ *
+ * <http://hello.org>
+ *
+ * 0      1                size=19
+ * | \x01 | http://hello.org\x00 |
+ * type   data
+ *
+ * "hello"
+ *
+ * 0      1      size=7
+ * | \x03 | hello\x00 |
+ * type   data
+ *
+ * "hello"^^xsd:string
+ *
+ * 0      1           7          size=18
+ * | \x03 | hello\x00 | xsd:string\x00 |
+ * type   data        datatype
+ *
+ * (note: the "xsd:" prefix is used for simplification here, it would be
+ * normally be a fully qualified URI)
+ *
+ * "hello"@en-US
+ *
+ * 0      1           7               18             size=26
+ * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 |
+ * type   data        datatype         lang
+ */
 LSUP_rc
 LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm)
 {
@@ -136,7 +160,7 @@ LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm)
 }
 
 
-int
+LSUP_rc
 LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term)
 {
     size_t cur;
@@ -161,7 +185,7 @@ LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term)
 
     LSUP_term_init(term, type, data, datatype, lang);
 
-    return 0;
+    return LSUP_OK;
 }
 
 
@@ -221,7 +245,7 @@ void LSUP_term_free(LSUP_Term *term)
 
 // Extern inline functions.
 
-LSUP_Key LSUP_sterm_to_key(const LSUP_SerTerm *sterm);
+LSUP_Key LSUP_sterm_to_key(const LSUP_Buffer *sterm);
 
 LSUP_Key LSUP_term_to_key(const LSUP_Term *term);
 

+ 55 - 4
src/triple.c

@@ -1,8 +1,59 @@
 #include "triple.h"
 
 // Extern inline prototypes.
-LSUP_Term *LSUP_triple_term_by_pos(
-        const LSUP_Triple *trp, LSUP_TriplePos n);
-LSUP_SerTerm *LSUP_ser_triple_term_by_pos(
-        const LSUP_SerTriple *trp, LSUP_TriplePos n);
+LSUP_Term *LSUP_triple_pos(const LSUP_Triple *trp, LSUP_TriplePos n);
+LSUP_Buffer *LSUP_striple_pos(const LSUP_SerTriple *trp, LSUP_TriplePos n);
 
+
+LSUP_rc
+LSUP_triple_serialize(const LSUP_Triple *spo, LSUP_SerTriple *sspo)
+{
+    LSUP_rc rc;
+
+    rc = LSUP_term_serialize(spo->s, sspo->s);
+    if (UNLIKELY(rc != LSUP_OK)) return rc;
+    rc = LSUP_term_serialize(spo->s, sspo->s);
+    if (UNLIKELY(rc != LSUP_OK)) return rc;
+    rc = LSUP_term_serialize(spo->s, sspo->s);
+    if (UNLIKELY(rc != LSUP_OK)) return rc;
+
+    return LSUP_OK;
+}
+
+
+LSUP_rc
+LSUP_triple_deserialize(const LSUP_SerTriple *sspo, LSUP_Triple *spo)
+{
+    LSUP_rc rc;
+
+    rc = LSUP_term_deserialize(sspo->s, spo->s);
+    if (UNLIKELY(rc != LSUP_OK)) return rc;
+    rc = LSUP_term_deserialize(sspo->s, spo->s);
+    if (UNLIKELY(rc != LSUP_OK)) return rc;
+    rc = LSUP_term_deserialize(sspo->s, spo->s);
+    if (UNLIKELY(rc != LSUP_OK)) return rc;
+
+    return LSUP_OK;
+}
+
+
+void
+LSUP_triple_done(LSUP_Triple *spo)
+{
+    if (UNLIKELY(!spo)) return;
+
+    LSUP_term_done(spo->s);
+    LSUP_term_done(spo->p);
+    LSUP_term_done(spo->o);
+}
+
+
+void
+LSUP_striple_done(LSUP_SerTriple *sspo)
+{
+    if (UNLIKELY(!sspo)) return;
+
+    LSUP_buffer_done(sspo->s);
+    LSUP_buffer_done(sspo->p);
+    LSUP_buffer_done(sspo->o);
+}