Browse Source

Merge keyset and index modules in graph.

Stefano Cossu 4 years ago
parent
commit
798dade9f7
6 changed files with 665 additions and 101 deletions
  1. 3 0
      .gitignore
  2. 31 11
      include/core.h
  3. 51 22
      include/graph.h
  4. 8 2
      include/term.h
  5. 567 64
      src/graph.c
  6. 5 2
      test/test_graph.c

+ 3 - 0
.gitignore

@@ -51,6 +51,9 @@ Module.symvers
 Mkfile.old
 dkms.conf
 
+# Keep files
+!.keep
+
 # Compiled binary files
 bin/*
 

+ 31 - 11
include/core.h

@@ -7,6 +7,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <uuid/uuid.h>
 
 #ifdef DEBUG
 #define DEBUG_TEST 1
@@ -26,22 +27,41 @@
 // TODO Handle memory errors better.
 #define CRITICAL(exp)   if (UNLIKELY(((exp) == NULL))) { abort(); }
 
-// NOTE This may change in the future, e.g. if a different key size is to
-// be forced.
-typedef size_t LSUP_Key;
-typedef LSUP_Key LSUP_DoubleKey[2];
-typedef LSUP_Key LSUP_TripleKey[3];
-typedef LSUP_Key LSUP_QuadKey[4];
-
-
+#define KLEN sizeof(LSUP_Key)
+#define DBL_KLEN sizeof(LSUP_DoubleKey)
+#define TRP_KLEN sizeof(LSUP_TripleKey)
+#define QUAD_KLEN sizeof(LSUP_QuadKey)
 // "NULL" key, a value that is never user-provided. Used to mark special
 // values (e.g. deleted records).
 #define NULL_KEY 0
 // Value of first key inserted in an empty term database.
 #define FIRST_KEY 1
-// "NULL" triple, a value that is never user-provided. Used to fill deleted
-// triples in a keyset.
-extern LSUP_TripleKey NULL_TRP;
+
+# define UUIDSTR_SIZE 37
+
+
+/* * * RETURN CODES * * */
+
+/**
+ * 0 is success, positive integers (>88800) are warnings, and negative integers
+ * (<-88800) are errors.
+ */
+#define LSUP_OK             0
+
+#define LSUP_NOACTION       88801
+#define LSUP_NORESULT       88802
+#define LSUP_END            88803
+
+#define LSUP_ERROR          (-88801)
+#define LSUP_PARSE_ERR      (-88802)
+
+
+typedef size_t LSUP_Key;
+typedef LSUP_Key LSUP_DoubleKey[2];
+typedef LSUP_Key LSUP_TripleKey[3];
+typedef LSUP_Key LSUP_QuadKey[4];
+
+typedef char uuid_str_t[UUIDSTR_SIZE];
 
 // Don't use MIN and MAX macros: see
 // https://dustri.org/b/min-and-max-macro-considered-harmful.html

+ 51 - 22
include/graph.h

@@ -1,8 +1,6 @@
 #ifndef _LSUP_GRAPH_H
 #define _LSUP_GRAPH_H
 
-#include "keyset.h"
-#include "index.h"
 #include "triple.h"
 
 
@@ -11,17 +9,21 @@ typedef enum LSUP_store_type {
     LSUP_STORE_MDB
 } LSUP_store_type;
 
-typedef struct LSUP_Graph {
-    LSUP_store_type store_type;
-    LSUP_Keyset *keys;
-    LSUP_Term *uri;
-    LSUP_Index *idx;
-} LSUP_Graph;
+typedef struct Graph LSUP_Graph;
 
 
-typedef void (*lookup_callback_fn_t)(
-    LSUP_Graph gr, const LSUP_TripleKey* spok_p, void* ctx
-);
+/**
+ * Post-lookup callback type.
+ *
+ * src is the graph that yielded a match. Its index ponts at the matched triple
+ *  key and is accessible via `keyset_peek(ks)`.
+ *
+ * dest is an optional keyset that may be acted upon. It may be NULL.
+ *
+ * ctx is an optional arbitrary pointer to additional data that may be used
+ *  by the callback.
+*/
+typedef int (*keyset_match_fn_t)(LSUP_Graph *src, LSUP_Graph *dest, void *ctx);
 
 
 int
@@ -32,14 +34,51 @@ LSUP_graph_init(
 LSUP_Graph *
 LSUP_graph_new(size_t capacity, char *uri_str, LSUP_store_type store_type);
 
+int
+LSUP_graph_copy(LSUP_Graph *src, LSUP_Graph *dest);
+
+size_t
+LSUP_graph_capacity(LSUP_Graph *gr);
+
+size_t
+LSUP_graph_size(LSUP_Graph *gr);
+
 bool
 LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *t);
 
+
+/**
+ * Execute a custom function on a graph based on a match pattern.
+ *
+ * This function executes an arbitrary callback on a graph, `res`, based on
+ * triples matched by a pattern on graph `gr`. `res` must be initialized but
+ * need not be empty. `res` can point to the same object as `gr` if changes
+ * are to be done in place (e.g. removing triples).
+ *
+ * @param[in] gr Graph to perform pattern matching.
+ * @param[out] res Result graph to apply the callback to.
+ * @param[in] spo Triple pattern. Each term of the triple members can be either
+ *  a term pointer or NULL. If NULL, the term is unbound.
+ * @param[in] callback_fn Callback function to apply.
+ * @param[in] match_cond If true, apply the callback to each triple a match is
+ *  found for. Otherwise, apply to each triple no match is found for.
+ * @param[in|out] ctx Arbitrary context that may be handled in the callback
+ *  function.
+ *
+ * @return LSUP_OK on match, LSUP_NOACTION on no match, <0 on error.
+ */
+int LSUP_graph_match_callback(
+        LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo,
+        keyset_match_fn_t callback_fn, bool match_cond, void *ctx);
+
+
 /**
  * Add triples to a graph.
  */
 int
-LSUP_graph_add(LSUP_Graph *gr, LSUP_Triple data[], size_t data_size);
+LSUP_graph_add(LSUP_Graph *gr, const LSUP_Triple data[], size_t data_size);
+
+int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo);
 
 /**
  * Set-theoretical union (gr1 ∪ gr2).
@@ -69,17 +108,7 @@ int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res);
  */
 int LSUP_graph_xor(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res);
 
-
 void
 LSUP_graph_free(LSUP_Graph *gr);
 
-
-/** Extern inline functions. */
-
-inline size_t
-LSUP_graph_capacity(LSUP_Graph *gr) { return gr->keys->capacity; }
-
-inline size_t
-LSUP_graph_size(LSUP_Graph *gr) { return gr->keys->free_i; }
-
 #endif

+ 8 - 2
include/term.h

@@ -87,8 +87,8 @@ LSUP_term_new(LSUP_term_type type, char *data, char *datatype, char *lang);
  *
  * "hello"@en-US
  *
- * 0      1           7               18     size=24
- * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00 |
+ * 0      1           7               18             size=26
+ * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00\x00\x00 |
  * type   data        datatype         lang
  */
 int LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm);
@@ -106,9 +106,15 @@ LSUP_sterm_to_key(const LSUP_SerTerm *sterm)
 }
 
 
+/**
+ * Hash a term into a key. If NULL is passed, the result is NULL_KEY.
+ */
 inline LSUP_Key
 LSUP_term_to_key(const LSUP_Term *term)
 {
+    if (term == NULL)
+        return NULL_KEY;
+
     LSUP_Buffer sterm_s;
     LSUP_Buffer *sterm = &sterm_s;
 

+ 567 - 64
src/graph.c

@@ -1,10 +1,44 @@
 #include "graph.h"
 
-// Max size of random graph name.
-#define RANDOM_NAME_SIZE 64
 // Initial size of lookup graph. It will double each time capacity is reached.
 #define LOOKUP_GR_INIT_SIZE 64
 
+typedef enum KSetFlag {
+    LSUP_KS_NONE        = 0,
+    LSUP_KS_CHECK_CAP   = 1 << 0,
+    LSUP_KS_CHECK_DUP   = 1 << 1
+} KSetFlag;
+
+typedef struct Keyset {
+    LSUP_TripleKey  *data;
+    size_t          capacity;
+    size_t          cur;
+    size_t          free_i;
+} Keyset;
+
+struct IndexEntry {
+    LSUP_Key key;
+    LSUP_SerTerm *val;
+};
+
+typedef struct Index {
+    size_t free_i;
+    size_t capacity;
+    struct IndexEntry *entries;
+} Index;
+
+typedef struct Graph {
+    LSUP_store_type store_type;
+    Keyset *keys;
+    LSUP_Term *uri;
+    Index *idx;
+} Graph;
+
+
+// "NULL" triple, a value that is never user-provided. Used to fill deleted
+// triples in a keyset.
+LSUP_TripleKey NULL_TRP = {NULL_KEY, NULL_KEY, NULL_KEY};
+
 /**
  * Extern inline functions.
  */
@@ -13,24 +47,324 @@ size_t LSUP_graph_size(LSUP_Graph *gr);
 size_t LSUP_graph_capacity(LSUP_Graph *gr);
 
 
+/**
+ * Callback type for key comparison.
+ */
+typedef bool (*LSUP_key_cmp_fn_t)(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2);
+
+
+//  Keyset lookup for S key.
+static bool lookup_sk_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][0] == k1; }
+
+//  Keyset lookup for P key.
+static bool lookup_pk_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][1] == k1; }
+
+//  Keyset lookup for O key.
+static bool lookup_ok_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][2] == k1; }
+
+//  Keyset lookup for S and P keys.
+static bool lookup_skpk_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][0] == k1 && spok[0][1] == k2; }
+
+//  Keyset lookup for S and O keys.
+static bool lookup_skok_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][0] == k1 && spok[0][2] == k2; }
+
+//  Keyset lookup for P and O keys.
+static bool lookup_pkok_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][1] == k1 && spok[0][2] == k2; }
+
+// Dummy callback for queries with all parameters unbound. Returns true.
+static bool lookup_none_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return true; }
+
+
+/* * * Post-lookup callbacks * * */
+
+int match_add_fn(LSUP_Graph *src, LSUP_Graph *dest, void *ctx);
+
+int match_rm_fn(LSUP_Graph *src, LSUP_Graph *dest, void *ctx);
+
+
+/* * * KEYSETS * * */
+
+static inline bool is_null_trp(const LSUP_TripleKey *trp)
+{
+    return (
+            (*trp)[0] == NULL_TRP[0]
+            && (*trp)[1] == NULL_TRP[1]
+            && (*trp)[2] == NULL_TRP[2]);
+}
+
+
+static int keyset_init(Keyset *ks, size_t capacity)
+{
+    CRITICAL (ks->data = malloc(capacity * TRP_KLEN));
+    ks->capacity = capacity;
+    ks->cur = 0;
+    ks->free_i = 0;
+
+    return LSUP_OK;
+}
+
+
+/**
+ * Move cursor to a non-empty position.
+ */
+static inline bool keyset_seek(Keyset* ks, size_t idx)
+{
+    if (idx >= ks->free_i) return false;
+
+    ks->cur = idx;
+
+    return true;
+}
+
+
+static inline LSUP_TripleKey *keyset_peek(Keyset *ks) {
+    return ks->data + ks->cur;
+}
+
+
+static inline bool keyset_contains(
+        const Keyset *ks, const LSUP_TripleKey *val) {
+
+    for (size_t i = 0; i < ks->free_i; i++) {
+        // scan from the least to the most probable to match.
+        if (
+                (*val)[2] == ks->data[i][2] &&
+                (*val)[0] == ks->data[i][0] &&
+                (*val)[1] == ks->data[i][1]) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+
+static inline bool keyset_next(Keyset *ks)
+{
+    if (ks->free_i > 0 && ks->cur < ks->free_i - 1) {
+        ks->cur ++;
+        return true;
+    }
+    return false;
+}
+
+
+static int keyset_resize(Keyset *ks, size_t new_size) {
+    new_size = max(new_size, ks->free_i);
+
+    CRITICAL (ks->data = realloc(
+            ks->data, max(new_size, ks->free_i) * TRP_KLEN))
+
+    return LSUP_OK;
+}
+
+
+static int keyset_add(
+        Keyset *ks, const LSUP_TripleKey *val, const KSetFlag flags)
+{
+    if((flags & LSUP_KS_CHECK_DUP) && keyset_contains(ks, val))
+        return LSUP_NOACTION;
+
+    if((flags & LSUP_KS_CHECK_CAP) && ks->free_i >= ks->capacity)
+            keyset_resize(ks, ks->capacity * 2);
+
+    memcpy(ks->data + ks->free_i, val, TRP_KLEN);
+
+    ks->cur = ks->free_i;
+    ks->free_i ++;
+
+    return LSUP_OK;
+}
+
+
+static int keyset_remove(Keyset *ks, const LSUP_TripleKey *val) {
+
+    keyset_seek(ks, 0);
+
+    while (keyset_next(ks)) {
+        if (memcmp(val, ks->data + ks->cur, TRP_KLEN) == 0) {
+            memcpy(ks->data + ks->cur, &NULL_TRP, TRP_KLEN);
+
+            break;
+        }
+    }
+
+    return LSUP_OK;
+}
+
+
+static int keyset_copy(const Keyset *src, Keyset *dest) {
+
+    keyset_init(dest, src->capacity);
+
+    memcpy(dest->data, src->data, src->capacity * TRP_KLEN);
+
+    keyset_seek(dest, 0);
+    dest->free_i = src->free_i;
+
+    return LSUP_OK;
+}
+
+
+static int keyset_sparse_copy(Keyset *src, Keyset *dest) {
+
+    keyset_init(dest, src->capacity);
+
+    if (keyset_seek(src, 0)) {
+        do {
+            if (LIKELY(memcmp(
+                            keyset_peek(src),
+                            &NULL_TRP, TRP_KLEN) != 0)) {
+                keyset_add(dest, keyset_peek(src), 0);
+            }
+        } while (keyset_next(src));
+
+        keyset_seek(dest, 0);
+    }
+
+    return LSUP_OK;
+}
+
+
+static void keyset_free(Keyset *ks)
+{
+    if(LIKELY(ks != NULL)) {
+        if(LIKELY(ks->data != NULL))
+            free(ks->data);
+
+        free(ks);
+    }
+}
+
+
+/* * * INDEX * * */
+
+static Index *index_new(size_t capacity)
+{
+    Index *idx = malloc(sizeof(struct Index));
+
+    if (capacity == 0) return NULL;
+
+    CRITICAL (idx->entries = malloc(sizeof(struct IndexEntry) * capacity));
+
+    idx->free_i = 0;
+    idx->capacity = capacity;
+
+    return idx;
+}
+
+
+static void index_resize(Index *idx, size_t capacity)
+{
+    CRITICAL (idx->entries = (struct IndexEntry*)realloc(
+            idx->entries,
+            sizeof(struct IndexEntry) * capacity));
+
+    idx->capacity = capacity;
+}
+
+
+static LSUP_SerTerm *index_lookup(Index *idx, LSUP_Key key)
+{
+    LSUP_SerTerm *match = NULL;
+
+    for (size_t i = 0; i < idx->free_i; i++) {
+        if (idx->entries[i].key == key) {
+            match = idx->entries[i].val;
+            break;
+        }
+    }
+
+    return match;
+}
+
+
+static int index_add_pair(Index *idx, LSUP_Key key, LSUP_SerTerm *sterm)
+{
+    // Fail quietly if key exists already.
+    if (index_lookup(idx, key) == NULL) {
+        if (idx->free_i >= idx->capacity) {
+            index_resize(idx, idx->capacity * 1.5);
+            TRACE("Capacity now at %lu\n", idx->capacity);
+        }
+
+        struct IndexEntry *entry = idx->entries + idx->free_i;
+
+        entry->key = key;
+        entry->val = LSUP_buffer_new(sterm->size);
+        memcpy(entry->val->addr, sterm->addr, sterm->size);
+
+        idx->free_i ++;
+        TRACE("Size now at %lu\n", idx->free_i);
+
+    }
+
+    return LSUP_OK;
+}
+
+
+static int index_add(Index *idx, LSUP_SerTerm *sterm)
+{
+    LSUP_Key key = LSUP_sterm_to_key(sterm);
+
+    return index_add_pair(idx, key, sterm);
+}
+
+
+static void index_free(Index *idx)
+{
+    for (size_t i = 0; i < idx->free_i; i++) {
+        LSUP_buffer_done(idx->entries[i].val);
+        free(idx->entries[i].val);
+    }
+
+    free(idx->entries);
+    free(idx);
+}
+
+
+/* * * GRAPH * * */
+
 int
 LSUP_graph_init(
         LSUP_Graph *gr, size_t capacity, char *uri_str,
         LSUP_store_type store_type)
 {
     if (uri_str == NULL) {
-        char gr_name[RANDOM_NAME_SIZE];
-        sprintf(gr_name, "urn:tmp:%d", rand());
+        char gr_name[UUIDSTR_SIZE + 9];
+
+        uuid_t uuid;
+        uuid_generate_random(uuid);
+
+        uuid_str_t uuid_str;
+        uuid_unparse_lower(uuid, uuid_str);
+        sprintf(gr_name, "urn:lsup:%s", uuid_str);
         gr->uri = LSUP_term_new(LSUP_TERM_URI, gr_name, NULL, NULL);
     } else {
         gr->uri = LSUP_term_new(LSUP_TERM_URI, uri_str, NULL, NULL);
     }
 
-    gr->keys = LSUP_keyset_new(capacity);
+    gr->keys = malloc(sizeof(Keyset));
+    keyset_init(gr->keys, capacity);
 
     switch (store_type ) {
         case LSUP_STORE_MEM:
-            gr->idx = LSUP_index_new(gr->keys->capacity);
+            gr->idx = index_new(gr->keys->capacity);
             break;
 
         case LSUP_STORE_MDB:
@@ -40,7 +374,7 @@ LSUP_graph_init(
             return -1;
     }
 
-    return 0;
+    return LSUP_OK;
 }
 
 
@@ -56,14 +390,48 @@ LSUP_graph_new(size_t capacity, char *uri_str, LSUP_store_type store_type)
 }
 
 
+/**
+ * Copy triples from a source graph into a destination one.
+ *
+ * The destination graph is not initialized, so the copy is cumulative.
+ */
+static int graph_copy_contents(LSUP_Graph *src, LSUP_Graph *dest)
+{
+    LSUP_Triple trp;
+    trp.s = NULL;
+    trp.p = NULL;
+    trp.o = NULL;
+
+    return LSUP_graph_match_callback(
+            src, dest, &trp, match_add_fn, true, NULL);
+}
+
+
 int
-LSUP_graph_add(LSUP_Graph *gr, LSUP_Triple data[], size_t data_size)
+LSUP_graph_copy(LSUP_Graph *src, LSUP_Graph *dest)
+{
+    LSUP_graph_init(dest, src->keys->capacity, NULL, src->store_type);
+
+    return graph_copy_contents(src, dest);
+}
+
+
+size_t
+LSUP_graph_capacity(LSUP_Graph *gr) { return gr->keys->capacity; }
+
+
+size_t
+LSUP_graph_size(LSUP_Graph *gr) { return gr->keys->free_i; }
+
+
+int
+LSUP_graph_add(LSUP_Graph *gr, const LSUP_Triple data[], size_t data_size)
 {
     // TODO Decouple this and build interface for memory and MDB integration.
 
     // Resize all at once if needed.
     if (gr->keys->capacity < gr->keys->free_i + data_size)
-        LSUP_keyset_resize(gr->keys, gr->keys->free_i + data_size);
+        keyset_resize(gr->keys, gr->keys->free_i + data_size);
 
     for (size_t i = 0; i < data_size; i++) {
         LSUP_SerTerm sspo[3];
@@ -79,9 +447,9 @@ LSUP_graph_add(LSUP_Graph *gr, LSUP_Triple data[], size_t data_size)
         // Add terms to index.
         // TODO Optimize. This copies sterm data twice: once to serialize,
         // once to add to index.
-        LSUP_index_add_pair(gr->idx, sk, sspo);
-        LSUP_index_add_pair(gr->idx, pk, sspo + 1);
-        LSUP_index_add_pair(gr->idx, ok, sspo + 2);
+        index_add_pair(gr->idx, sk, sspo);
+        index_add_pair(gr->idx, pk, sspo + 1);
+        index_add_pair(gr->idx, ok, sspo + 2);
 
         LSUP_buffer_done(sspo);
         LSUP_buffer_done(sspo + 1);
@@ -89,10 +457,10 @@ LSUP_graph_add(LSUP_Graph *gr, LSUP_Triple data[], size_t data_size)
 
         // Add triple.
         LSUP_TripleKey trp = {sk, pk, ok};
-        LSUP_keyset_add(gr->keys, &trp, LSUP_KS_CHECK_DUP);
+        keyset_add(gr->keys, &trp, LSUP_KS_CHECK_DUP);
     }
 
-    return 0;
+    return LSUP_OK;
 }
 
 
@@ -105,25 +473,116 @@ LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo)
 
     LSUP_TripleKey spok = {sk, pk, ok};
 
-    return LSUP_keyset_contains(gr->keys, &spok);
+    return keyset_contains(gr->keys, &spok);
 }
 
 
-int LSUP_graph_join(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
+int LSUP_graph_match_callback(
+        LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo,
+        keyset_match_fn_t callback_fn, bool match_cond, void *ctx)
 {
-    LSUP_keyset_sparse_copy(ks1, res);
+    if (!keyset_seek(gr->keys, 0))
+        return LSUP_NOACTION;
 
-    if (LSUP_keyset_seek(ks2, 0)) {
-        do {
-            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks2);
-            if (!is_null_trp(spok)) {
-                LSUP_keyset_add(
-                        res, spok, LSUP_KS_CHECK_DUP | LSUP_KS_CHECK_CAP);
+    LSUP_Key k1, k2;
+    LSUP_key_cmp_fn_t cmp_fn;
+
+    LSUP_Key sk = LSUP_term_to_key(spo->s);
+    LSUP_Key pk = LSUP_term_to_key(spo->p);
+    LSUP_Key ok = LSUP_term_to_key(spo->o);
+
+    if (sk != NULL_KEY && pk != NULL_KEY && ok != NULL_KEY) {
+        LSUP_TripleKey spok = {sk, pk, ok};
+
+        if (match_cond == true) {
+            // Shortcut for 3-term match—only if match_cond is true.
+            keyset_init(res->keys, 1);
+            if(keyset_contains(gr->keys, &spok)) {
+                callback_fn(gr, res, ctx);
+                return LSUP_OK;
+            } else {
+                return LSUP_NOACTION;
             }
-        } while (LSUP_keyset_next(ks2));
+        } else {
+            // For negative condition (i.e. "apply this function to all triples
+            // except the matching one"), the whole set is scanned.
+            const LSUP_TripleKey *cur = keyset_peek(gr->keys);
+            int rc = LSUP_NOACTION;
+            do {
+                if (
+                    *cur[0] != spok[0] ||
+                    *cur[1] != spok[1] ||
+                    *cur[2] != spok[2]
+                ) {
+                    callback_fn(gr, res, ctx);
+                    rc = LSUP_OK;
+                }
+            } while (keyset_next(gr->keys));
+
+            return rc;
+        }
+
+    } else if (sk != NULL_KEY) {
+        k1 = sk;
+
+        if (pk != NULL_KEY) { // s p ?
+            k2 = pk;
+            cmp_fn = lookup_skpk_cmp_fn;
+
+        } else if (ok != NULL_KEY) { // s ? o
+            k2 = ok;
+            cmp_fn = lookup_skok_cmp_fn;
+
+        } else { // s ? ?
+            cmp_fn = lookup_sk_cmp_fn;
+
+        }
+
+    } else if (pk != NULL_KEY) {
+        k1 = pk;
+
+        if (ok != NULL_KEY) { // ? p o
+            k2 = ok;
+            cmp_fn = lookup_pkok_cmp_fn;
+
+        } else { // ? p ?
+            cmp_fn = lookup_pk_cmp_fn;
+        }
+
+    } else if (ok != NULL_KEY) { // ? ? o
+        k1 = ok;
+        cmp_fn = lookup_ok_cmp_fn;
+
+    } else {
+        printf("WARNING: no bound terms, making a compact copy.\n");
+        return LSUP_graph_copy(gr, res);
     }
 
-    return 0;
+    do {
+        if (cmp_fn(keyset_peek(gr->keys), k1, k2) == match_cond) {
+            callback_fn(gr, res, ctx);
+        }
+    } while (keyset_next(gr->keys));
+
+    return LSUP_OK;
+}
+
+
+int LSUP_graph_lookup(LSUP_Graph *gr, LSUP_Graph *res, const LSUP_Triple *spo)
+{
+    LSUP_graph_init(res, LOOKUP_GR_INIT_SIZE, NULL, LSUP_STORE_MEM);
+
+    LSUP_graph_match_callback(gr, res, spo, &match_add_fn, true, NULL);
+
+    return LSUP_OK;
+}
+
+
+int LSUP_graph_join(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
+{
+    LSUP_graph_copy(gr1, res);
+
+    return graph_copy_contents(gr2, res);
 }
 
 
@@ -131,16 +590,17 @@ int LSUP_graph_subtract(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 {
     LSUP_graph_init(res, gr1->keys->capacity, NULL, LSUP_STORE_MEM);
 
-    if (LSUP_keyset_seek(ks1, 0)) {
-        do {
-            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks1);
-            if (!is_null_trp(spok) && !LSUP_keyset_contains(ks2, spok)) {
-                LSUP_keyset_add(res, spok, 0);
-            }
-        } while (LSUP_keyset_next(ks1));
-    }
+    if (!keyset_seek(gr1->keys, 0))
+        return LSUP_OK;
+
+    do {
+        const LSUP_TripleKey *spok = keyset_peek(gr1->keys);
+        if (!is_null_trp(spok) && !keyset_contains(gr2->keys, spok)) {
+            match_add_fn(res, gr1, NULL);
+        }
+    } while (keyset_next(gr1->keys));
 
-    return 0;
+    return LSUP_OK;
 }
 
 
@@ -148,53 +608,96 @@ int LSUP_graph_intersect(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 {
     LSUP_graph_init(res, gr1->keys->capacity, NULL, LSUP_STORE_MEM);
 
-    if (LSUP_keyset_seek(ks1, 0)) {
-        do {
-            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks1);
-            if (!is_null_trp(spok) && LSUP_keyset_contains(ks2, spok)) {
-                LSUP_keyset_add(res, spok, 0);
-            }
-        } while (LSUP_keyset_next(ks1));
-    }
+    if (!keyset_seek(gr1->keys, 0) || gr2->keys->free_i == 0)
+        return LSUP_OK;
 
-    return 0;
+    do {
+        const LSUP_TripleKey *spok = keyset_peek(gr1->keys);
+        if (!is_null_trp(spok) && keyset_contains(gr2->keys, spok)) {
+            match_add_fn(res, gr1, NULL);
+        }
+    } while (keyset_next(gr1->keys));
+
+    return LSUP_OK;
 }
 
 
 int LSUP_graph_xor(LSUP_Graph *gr1, LSUP_Graph *gr2, LSUP_Graph *res)
 {
-    LSUP_graph_init(res, gr1->keys->capacity, NULL, LSUP_STORE_MEM);
+    if (!keyset_seek(gr1->keys, 0))
+        return LSUP_graph_copy(gr2, res);
 
-    if (LSUP_keyset_seek(ks1, 0)) {
-        do {
-            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks1);
-            if (!is_null_trp(spok) && !LSUP_keyset_contains(ks2, spok)) {
-                LSUP_keyset_add(res, spok, 0);
-            }
-        } while (LSUP_keyset_next(ks1));
-    }
+    if (!keyset_seek(gr2->keys, 0))
+        return LSUP_graph_copy(gr1, res);
 
-    if (LSUP_keyset_seek(ks2, 0)) {
-        do {
-            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks2);
-            if (!is_null_trp(spok) && !LSUP_keyset_contains(ks1, spok)) {
-                LSUP_keyset_add(res, spok, 0);
-            }
-        } while (LSUP_keyset_next(ks2));
-    }
+    LSUP_graph_init(res, gr1->keys->capacity, NULL, LSUP_STORE_MEM);
 
-    return 0;
+    do {
+        const LSUP_TripleKey *spok = keyset_peek(gr1->keys);
+        if (!is_null_trp(spok) && !keyset_contains(gr2->keys, spok)) {
+            match_add_fn(gr1, res, NULL);
+        }
+    } while (keyset_next(gr1->keys));
+
+    do {
+        const LSUP_TripleKey *spok = keyset_peek(gr2->keys);
+        if (!is_null_trp(spok) && !keyset_contains(gr1->keys, spok)) {
+            match_add_fn(gr2, res, NULL);
+        }
+    } while (keyset_next(gr2->keys));
+
+    return LSUP_OK;
 }
 
 
 void
 LSUP_graph_free(LSUP_Graph *gr)
 {
-    if(LIKELY(gr != NULL)) {
+    if (LIKELY(gr != NULL)) {
         LSUP_term_free(gr->uri);
-        LSUP_keyset_free(gr->keys);
-        LSUP_index_free(gr->idx);
+        keyset_free(gr->keys);
+        index_free(gr->idx);
         free(gr);
     }
 }
 
+
+/* * CALLBACKS * */
+
+/**
+ * Callback for adding a matched triple.
+ *
+ * Adds the current triple in src to dest. No duplicate check.
+ *
+ * The source graph cursor must be set to the triple to be copied.
+ */
+int match_add_fn(LSUP_Graph *src, LSUP_Graph *dest, void *ctx)
+{
+    if(LSUP_graph_size(src) >= LSUP_graph_capacity(src))
+        keyset_resize(dest->keys, dest->keys->capacity * 2);
+
+    LSUP_TripleKey *spok = keyset_peek(src->keys);
+
+    memcpy(dest->keys->data + dest->keys->free_i, spok, TRP_KLEN);
+
+    index_add_pair(
+            src->idx, *spok[0], index_lookup(src->idx, *spok[0]));
+    index_add_pair(
+            src->idx, *spok[1], index_lookup(src->idx, *spok[1]));
+    index_add_pair(
+            src->idx, *spok[2], index_lookup(src->idx, *spok[2]));
+
+    dest->keys->cur = dest->keys->free_i;
+    dest->keys->free_i ++;
+
+    return LSUP_OK;
+}
+
+
+/**
+ * Callback for removing a matched triple.
+ */
+int match_rm_fn(LSUP_Graph *src, LSUP_Graph *dest, void *ctx)
+{
+    memcpy(keyset_peek(src->keys), &NULL_TRP, TRP_KLEN);
+}

+ 5 - 2
test/test_graph.c

@@ -5,6 +5,8 @@
 
 static int _create_triples(LSUP_Triple *trp)
 {
+    // These constitute overall 10 individual triples, 8 unique.
+
     trp[0].s = LSUP_term_new(LSUP_TERM_URI, "urn:s:0", NULL, NULL);
     trp[0].p = LSUP_term_new(LSUP_TERM_URI, "urn:p:0", NULL, NULL);
     trp[0].o = LSUP_term_new(LSUP_TERM_URI, "urn:o:0", NULL, NULL);
@@ -36,17 +38,18 @@ static int _create_triples(LSUP_Triple *trp)
     trp[6].o = LSUP_term_new(
             LSUP_TERM_LITERAL, "String 3", "xsd:string", "es-ES");
 
-    // Let's reuse pointers. They should not double-free.
+    // Let's reuse pointers. Do not double-free.
     trp[7].s = trp[0].s; // <urn:s:0>
     trp[7].p = trp[2].p; // <urn:p:2>
     trp[7].o = trp[5].o; // "String 2"^^xsd:string
 
-    // Duplicate of trp[7]
+    // Duplicate of trp[7]. Do not double-free.
     trp[8].s = trp[0].s;
     trp[8].p = trp[2].p;
     trp[8].o = trp[5].o;
 
     // Duplicate of trp[7] from different pointers with same value.
+    // Do not double-free.
     trp[9].s = trp[5].s;
     trp[9].p = trp[4].p;
     trp[9].o = trp[5].o;