Browse Source

Initial commit, most test coverage.

Stefano Cossu 4 years ago
commit
79ddd34850

+ 58 - 0
.gitignore

@@ -0,0 +1,58 @@
+# Prerequisites
+*.d
+
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Linker output
+*.ilk
+*.map
+*.exp
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
+*.su
+*.idb
+*.pdb
+
+# Kernel Module Compile Results
+*.mod*
+*.cmd
+.tmp_versions/
+modules.order
+Module.symvers
+Mkfile.old
+dkms.conf
+
+# Compiled binary files
+bin/*
+
+# Valgrind data
+vgcore.*

+ 3 - 0
.gitmodules

@@ -0,0 +1,3 @@
+[submodule "ext/xxHash"]
+	path = ext/xxHash
+	url = https://github.com/Cyan4973/xxHash.git

+ 31 - 0
Makefile

@@ -0,0 +1,31 @@
+default: all
+
+all: test
+
+check:
+	splint \
+	-Iinclude -Iinclude/structures -Iinclude/model/rdf -Iext/binn/src \
+	-Iext/xxHash \
+	-preproc \
+	test.c
+
+build:
+	gcc -g -Wall \
+		-std=c99 \
+		-DDEBUG \
+		-Iinclude -Iinclude/structures -Iinclude/model/rdf \
+		-Iext/xxHash \
+		ext/xxHash/xxhash.c \
+		src/*.c src/structures/*.c src/model/rdf/*.c \
+		-o bin/lsup_rdf.so
+
+test:
+	gcc -g -Wall \
+		-std=c99 \
+		-DDEBUG \
+		-Iinclude -Iinclude/structures -Iinclude/model/rdf \
+		-Iext/xxHash \
+		-Itest \
+		ext/xxHash/xxhash.c \
+		src/*.c src/structures/*.c src/model/rdf/*.c test.c \
+		-o bin/test

+ 1 - 0
ext/xxHash

@@ -0,0 +1 @@
+Subproject commit dda230e4dd269374cbc3ea4f1b41d681fec2d380

+ 52 - 0
include/core.h

@@ -0,0 +1,52 @@
+#ifndef _LSUP_CORE_H
+#define _LSUP_CORE_H
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef DEBUG
+#define DEBUG_TEST 1
+#else
+#define DEBUG_TEST 0
+#endif
+#define STR "%s\n"
+#define TRACE(fmt, ...) \
+        do {\
+            if (DEBUG_TEST) \
+                fprintf(stderr, "%s:%d:%s(): " fmt "\n", \
+                        __FILE__,  __LINE__, __func__, __VA_ARGS__); \
+        } while (0)
+
+#define LIKELY(x)       __builtin_expect(!!(x), true)
+#define UNLIKELY(x)     __builtin_expect(!!(x), false)
+// TODO Handle memory errors better.
+#define CRITICAL(exp)   if (UNLIKELY(((exp) == NULL))) { abort(); }
+
+// NOTE This may change in the future, e.g. if a different key size is to
+// be forced.
+typedef size_t LSUP_Key;
+typedef LSUP_Key LSUP_DoubleKey[2];
+typedef LSUP_Key LSUP_TripleKey[3];
+typedef LSUP_Key LSUP_QuadKey[4];
+
+
+// "NULL" key, a value that is never user-provided. Used to mark special
+// values (e.g. deleted records).
+#define NULL_KEY 0
+// Value of first key inserted in an empty term database.
+#define FIRST_KEY 1
+// "NULL" triple, a value that is never user-provided. Used to fill deleted
+// triples in a keyset.
+extern LSUP_TripleKey NULL_TRP;
+
+// Don't use MIN and MAX macros: see
+// https://dustri.org/b/min-and-max-macro-considered-harmful.html
+inline int min(int x, int y) { return x < y ? x : y; }
+
+inline int max(int x, int y) { return x > y ? x : y; }
+
+#endif

+ 7 - 0
include/lsup_rdf.h

@@ -0,0 +1,7 @@
+#ifndef _LSUP_RDF_H
+#define _LSUP_RDF_H
+
+#include "model/rdf/graph.h"
+#include "structures/keyset.h"
+
+#endif

+ 58 - 0
include/model/rdf/graph.h

@@ -0,0 +1,58 @@
+#ifndef _LSUP_GRAPH_H
+#define _LSUP_GRAPH_H
+
+#include "structures/keyset.h"
+#include "structures/index.h"
+#include "model/rdf/triple.h"
+
+
+typedef enum LSUP_store_type {
+    LSUP_STORE_MEM,
+    LSUP_STORE_MDB
+} LSUP_store_type;
+
+typedef struct LSUP_Graph {
+    LSUP_store_type store_type;
+    LSUP_Keyset *keys;
+    const LSUP_Term *uri;
+    LSUP_Index *idx;
+} LSUP_Graph;
+
+
+typedef void (*lookup_callback_fn_t)(
+    LSUP_Graph gr, const LSUP_TripleKey* spok_p, void* ctx
+);
+
+
+int
+LSUP_graph_init(
+        LSUP_Graph *gr, size_t capacity, const LSUP_Term *uri,
+        LSUP_store_type store_type);
+
+LSUP_Graph *
+LSUP_graph_new(
+        size_t capacity, const LSUP_Term *uri,
+        LSUP_store_type store_type);
+
+bool
+LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *t);
+
+/**
+ * Add triples to a graph.
+ */
+int
+LSUP_graph_add(LSUP_Graph *gr, LSUP_Triple data[], size_t data_size);
+
+void
+LSUP_graph_free(LSUP_Graph *gr);
+
+
+/** Extern inline functions. */
+
+inline size_t
+LSUP_graph_capacity(LSUP_Graph *gr) { return gr->keys->capacity; }
+
+inline size_t
+LSUP_graph_size(LSUP_Graph *gr) { return gr->keys->free_i; }
+
+#endif

+ 140 - 0
include/model/rdf/term.h

@@ -0,0 +1,140 @@
+#ifndef LSUP_TERM_H
+#define LSUP_TERM_H
+
+#include <assert.h>
+#include <regex.h>
+
+#include "xxhash.h"
+
+#include "buffer.h"
+
+// URI parsing regular expression. Conforms to RFC3986.
+#define URI_REGEX_STR \
+    "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
+#define SEED 0 // TODO Make configurable.
+#define LANG_SIZE 8 // Size in chars of lang tag
+
+
+typedef LSUP_Buffer LSUP_SerTerm;
+typedef XXH64_hash_t LSUP_TermHash64;
+typedef char langtag[LANG_SIZE];
+
+typedef enum LSUP_term_type {
+    LSUP_TERM_UNDEFINED,
+    LSUP_TERM_URI,
+    LSUP_TERM_BNODE,
+    LSUP_TERM_LITERAL
+} LSUP_term_type;
+
+typedef struct LSUP_Term {
+    LSUP_term_type type;
+    // This language variable currently supports a 2-digit ISO 639 language
+    // code and a 2-character ISO 3166-1 country code, separated by a hyphen.
+    // See https://tools.ietf.org/html/bcp47#section-2.1
+    langtag lang;
+    char *datatype;
+    char *data;
+} LSUP_Term;
+
+
+/*
+ * Initialize a pre-allocated term structure.
+ *
+ * the structure can be an already initialized term, and can be reused
+ * without freeing it.
+ */
+int
+LSUP_term_init(
+        LSUP_Term *term, LSUP_term_type type,
+        char *data, char *datatype, char *lang);
+
+LSUP_Term *
+LSUP_term_new(LSUP_term_type type, char *data, char *datatype, char *lang);
+
+/** Simple ad-hoc serialization function.
+ *
+ * This function allocates and returns the following byte sequence:
+ *
+ * - `sizeof(char)` bytes for the term type;
+ * - `LANG_SIZE` bytes for the language tag;
+ * - Arbitrary bytes with NUL-terminated strings for data and datatype.
+ *
+ * The index for `data` is consistently `LANG_SIZE + sizeof(char)`. The
+ * index for `datatype` is found by the terminating NULL for `data`.
+ *
+ * Serialized representations of some RDF terms:
+ *
+ * <http://hello.org>
+ *
+ * 0      1                size=19
+ * | \x01 | http://hello.org\x00 |
+ * type   data
+ *
+ * "hello"
+ *
+ * 0      1      size=7
+ * | \x03 | hello\x00 |
+ * type   data
+ *
+ * "hello"^^xsd:string
+ *
+ * 0      1           7          size=18
+ * | \x03 | hello\x00 | xsd:string\x00 |
+ * type   data        datatype
+ *
+ * (note: the "xsd:" prefix is used for simplification here, it would be
+ * normally be a fully qualified URI)
+ *
+ * "hello"@en-US
+ *
+ * 0      1           7               18     size=24
+ * | \x03 | hello\x00 | xsd:string\x00 | en-US\x00 |
+ * type   data        datatype         lang
+ */
+int LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm);
+
+int
+LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term);
+
+
+inline LSUP_Key
+LSUP_sterm_to_key(const LSUP_SerTerm *sterm)
+{
+    LSUP_Key key = (LSUP_Key)XXH64(sterm->addr, sterm->size, SEED);
+
+    return key;
+}
+
+
+inline LSUP_Key
+LSUP_term_to_key(const LSUP_Term *term)
+{
+    LSUP_Buffer sterm_s;
+    LSUP_Buffer *sterm = &sterm_s;
+
+    LSUP_term_serialize(term, sterm);
+    LSUP_Key key = LSUP_sterm_to_key(sterm);
+
+    LSUP_buffer_done(sterm);
+
+    return key;
+}
+
+/**
+ * Compare two terms.
+ */
+bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2);
+
+/*
+// TODO Implement when xxhash v0.8 is released with stable xxhash128 function.
+XXH128_hash_t
+LSUP_term_hash128(const LSUP_Term *term);
+*/
+
+void
+LSUP_term_done(LSUP_Term *term);
+
+void
+LSUP_term_free(LSUP_Term *term);
+
+#endif

+ 13 - 0
include/model/rdf/triple.h

@@ -0,0 +1,13 @@
+#include "model/rdf/term.h"
+
+typedef struct LSUP_Triple {
+    LSUP_Term *s;
+    LSUP_Term *p;
+    LSUP_Term *o;
+} LSUP_Triple;
+
+typedef struct LSUP_SerTriple {
+    LSUP_SerTerm *s;
+    LSUP_SerTerm *p;
+    LSUP_SerTerm *o;
+} LSUP_SerTriple;

+ 28 - 0
include/structures/buffer.h

@@ -0,0 +1,28 @@
+#ifndef _LSUP_BUFFER_H
+#define _LSUP_BUFFER_H
+
+#include "core.h"
+
+typedef struct LSUP_Buffer {
+    void *addr;
+    size_t size;
+} LSUP_Buffer;
+
+
+LSUP_Buffer *LSUP_buffer_new(size_t size);
+
+int LSUP_buffer_init(LSUP_Buffer *buf, size_t size);
+
+void LSUP_buffer_print(const LSUP_Buffer *buf);
+
+void LSUP_buffer_done(LSUP_Buffer *buf);
+
+inline bool LSUP_buffer_eq(
+        const LSUP_Buffer *buf1, const LSUP_Buffer *buf2)
+{
+    if (buf1->size != buf2->size) return false;
+
+    return (memcmp(buf1->addr, buf2->addr, buf1->size) == 0) ? true : false;
+}
+
+#endif

+ 39 - 0
include/structures/index.h

@@ -0,0 +1,39 @@
+/** Limited-scope, fast implementation of a bi-directional hash table.
+ *
+ * This data structure holds an array of key-value pairs. It is append-only.
+ *
+ * A key is the result of a hash function applied to the value, and keys are
+ * unique within an index. Hence, both keys and values are uniqure and lookups
+ * can be done both ways.
+ *
+ * A value is a pointer to a `LSUP_SerTerm` structure. Comparison for
+ * uniqueness is done by comparing the hashes/keys.
+ *
+ * To find a key by its value, it is sufficient to apply the hash function to
+ * the value without having to look up the table.
+ */
+
+#include "structures/buffer.h"
+#include "model/rdf/term.h"
+
+typedef struct Index LSUP_Index;
+
+LSUP_Index *LSUP_index_new(size_t capacity);
+
+/**
+ * Add a key/value pair. The key must be calculated in advance.
+ *
+ * This function takes ownership of the serialized term.
+ */
+int LSUP_index_add_pair(LSUP_Index *idx, LSUP_Key key, LSUP_SerTerm *sterm);
+
+/**
+ * Add a term and automatically calculate the key.
+ *
+ * This function takes ownership of the serialized term.
+ */
+int LSUP_index_add(LSUP_Index *idx, LSUP_SerTerm *sterm);
+
+LSUP_SerTerm *LSUP_index_lookup(LSUP_Index *idx, LSUP_Key key);
+
+void LSUP_index_free(LSUP_Index *idx);

+ 138 - 0
include/structures/keyset.h

@@ -0,0 +1,138 @@
+#ifndef LSUP_KEYSET_H
+#define LSUP_KEYSET_H
+
+#include "core.h"
+
+typedef enum LSUP_KSFlag {
+    LSUP_KS_CHECK_CAP = 1 << 0,
+    LSUP_KS_CHECK_DUP = 1 << 1
+} LSUP_KSFlag;
+
+typedef struct keyset {
+    LSUP_TripleKey  *data;
+    size_t          capacity;
+    size_t          cur;
+    size_t          free_i;
+    float           expand_ratio;
+} LSUP_Keyset;
+
+
+int LSUP_keyset_init(LSUP_Keyset *ks, size_t capacity, float expand_ratio);
+
+LSUP_Keyset *LSUP_keyset_new(size_t capacity, float expand_ratio);
+
+
+/**
+ * Move cursor to a non-empty position.
+ */
+inline bool LSUP_keyset_seek(LSUP_Keyset* ks, size_t idx)
+{
+    if (idx >= ks->free_i) return false;
+
+    ks->cur = idx;
+
+    return true;
+}
+
+
+inline size_t LSUP_keyset_size(LSUP_Keyset* ks)
+{
+    return(ks->free_i);
+}
+
+
+inline size_t LSUP_keyset_tell(LSUP_Keyset* ks)
+{
+    return(ks->cur);
+}
+
+
+inline const LSUP_TripleKey *LSUP_keyset_peek(LSUP_Keyset *ks) {
+    return (const LSUP_TripleKey *)(ks->data + ks->cur);
+}
+
+
+inline bool LSUP_keyset_contains(
+        const LSUP_Keyset *ks, const LSUP_TripleKey *val) {
+
+    for (size_t i = 0; i < ks->free_i; i++) {
+        // scan from the least to the most probable to match.
+        if (
+                (*val)[2] == ks->data[i][2] &&
+                (*val)[0] == ks->data[i][0] &&
+                (*val)[1] == ks->data[i][1]) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+
+inline bool LSUP_keyset_next(LSUP_Keyset *ks)
+{
+    if (ks->free_i > 0 && ks->cur < ks->free_i - 1) {
+        ks->cur ++;
+        return true;
+    }
+    return false;
+}
+
+/**
+ * Resize the keyset capacity.
+ *
+ * Size cannot be smaller than `free_i`. Therefore, specifying a size of 0 will
+ * always compact the keyset to the current occupied space.
+ */
+int LSUP_keyset_resize(LSUP_Keyset *ks, size_t new_size);
+
+/**
+ * Add a single key.
+ */
+int LSUP_keyset_add(
+        LSUP_Keyset *ks, const LSUP_TripleKey *val, LSUP_KSFlag flags);
+
+int LSUP_keyset_remove(LSUP_Keyset *ks, const LSUP_TripleKey *val);
+
+int LSUP_keyset_copy(const LSUP_Keyset *src, LSUP_Keyset *dest);
+
+int LSUP_keyset_sparse_copy(LSUP_Keyset *src, LSUP_Keyset *dest);
+
+int LSUP_keyset_lookup(
+        LSUP_Keyset *ks, LSUP_Keyset *res,
+        const LSUP_Key sk, const LSUP_Key pk, const LSUP_Key ok);
+
+/**
+ * Set-theoretical union (ks1 ∪ ks2).
+ *
+ * The resulting Keyset is initialized beforehand and is not compacted.
+ */
+int LSUP_keyset_join(LSUP_Keyset *ks1, LSUP_Keyset *ks2, LSUP_Keyset *res);
+
+/**
+ * Set-theoretical complement (ks1 \ ks2).
+ *
+ * The resulting Keyset is initialized beforehand and is not compacted.
+ */
+int LSUP_keyset_subtract(LSUP_Keyset *ks1, LSUP_Keyset *ks2, LSUP_Keyset *res);
+
+/**
+ * Set-theoretical intersection (ks1 ∩ ks2).
+ *
+ * The resulting Keyset is initialized beforehand and is not compacted.
+ */
+int LSUP_keyset_intersect(LSUP_Keyset *ks1, LSUP_Keyset *ks2, LSUP_Keyset *res);
+
+/**
+ * Disjunctive union (XOR) (ks1 ⊕ ks2).
+ *
+ * The resulting Keyset is initialized beforehand and is not compacted.
+ */
+int LSUP_keyset_xor(LSUP_Keyset *ks1, LSUP_Keyset *ks2, LSUP_Keyset *res);
+
+
+void LSUP_keyset_done(LSUP_Keyset *ks);
+
+void LSUP_keyset_free(LSUP_Keyset *ks);
+
+#endif

+ 5 - 0
src/core.c

@@ -0,0 +1,5 @@
+#include "core.h"
+
+// Extern inline prototypes.
+int min(int x, int y);
+int max(int x, int y);

+ 109 - 0
src/model/rdf/graph.c

@@ -0,0 +1,109 @@
+#include "model/rdf/graph.h"
+
+/**
+ * Extern inline functions.
+ */
+size_t LSUP_graph_size(LSUP_Graph *gr);
+
+size_t LSUP_graph_capacity(LSUP_Graph *gr);
+
+
+int
+LSUP_graph_init(
+        LSUP_Graph *gr, size_t capacity, const LSUP_Term *uri,
+        LSUP_store_type store_type)
+{
+    if (uri->type != LSUP_TERM_URI)
+        return -1;
+    gr->uri = uri;
+
+    gr->keys = LSUP_keyset_new(capacity, .75);
+
+    switch (store_type ) {
+        case LSUP_STORE_MEM:
+            gr->idx = LSUP_index_new(gr->keys->capacity);
+            break;
+
+        case LSUP_STORE_MDB:
+            // TODO
+
+        default:
+            return -1;
+    }
+
+    return 0;
+}
+
+
+LSUP_Graph *
+LSUP_graph_new(
+        size_t capacity, const LSUP_Term *uri,
+        LSUP_store_type store_type)
+{
+    LSUP_Graph *gr;
+    CRITICAL(gr = malloc(sizeof(LSUP_Graph)));
+
+    LSUP_graph_init(gr, capacity, uri, store_type);
+
+    return gr;
+}
+
+
+int
+LSUP_graph_add(LSUP_Graph *gr, LSUP_Triple data[], size_t data_size)
+{
+    // TODO Decouple this and build interface for memory and MDB integration.
+
+    // Resize all at once if needed.
+    if (gr->keys->capacity < gr->keys->free_i + data_size)
+        LSUP_keyset_resize(gr->keys, gr->keys->free_i + data_size);
+
+    LSUP_SerTerm **sterms = malloc(
+            sizeof(LSUP_SerTerm) * 3 * LSUP_keyset_size(gr->keys));
+
+    for (size_t i = 0; i < data_size; i++) {
+        LSUP_term_serialize(data[i].s, sterms[i]);
+        LSUP_term_serialize(data[i].p, sterms[i] + 1);
+        LSUP_term_serialize(data[i].o, sterms[i] + 2);
+
+        LSUP_Key sk = LSUP_sterm_to_key(sterms[i]);
+        LSUP_Key pk = LSUP_sterm_to_key(sterms[i] + 1);
+        LSUP_Key ok = LSUP_sterm_to_key(sterms[i] + 2);
+
+        // Add terms to index.
+        LSUP_index_add_pair(gr->idx, sk, sterms[i]);
+        LSUP_index_add_pair(gr->idx, pk, sterms[i] + 1);
+        LSUP_index_add_pair(gr->idx, ok, sterms[i] + 2);
+
+        // Add triple.
+        LSUP_TripleKey trp = {sk, pk, ok};
+        LSUP_keyset_add(gr->keys, &trp, LSUP_KS_CHECK_DUP);
+    }
+
+    return 0;
+}
+
+
+bool
+LSUP_graph_contains(const LSUP_Graph *gr, const LSUP_Triple *spo)
+{
+    LSUP_Key sk = LSUP_term_to_key(spo->s);
+    LSUP_Key pk = LSUP_term_to_key(spo->p);
+    LSUP_Key ok = LSUP_term_to_key(spo->o);
+
+    LSUP_TripleKey spok = {sk, pk, ok};
+
+    return LSUP_keyset_contains(gr->keys, &spok);
+}
+
+
+void
+LSUP_graph_free(LSUP_Graph *gr)
+{
+    if(LIKELY(gr != NULL)) {
+        LSUP_keyset_free(gr->keys);
+        LSUP_index_free(gr->idx);
+        free(gr);
+    }
+}
+

+ 198 - 0
src/model/rdf/term.c

@@ -0,0 +1,198 @@
+//#define PY_SSIZE_T_CLEAN
+//#include <Python.h>
+
+#include "model/rdf/term.h"
+
+#define CHR         sizeof(char)
+#define NLEN(str)   (str) == NULL ? 0 : strlen((str))
+
+int
+LSUP_term_init(
+        LSUP_Term *term, LSUP_term_type type,
+        char *data, char *datatype, char *lang) {
+
+    term->type = type;
+    if (data == NULL) return -1;
+
+    if (term->type == LSUP_TERM_URI) {
+        TRACE(STR, "Checking URI term.");
+        // TODO Move this to a code block that is only executed once.
+        regex_t ptn;
+        int status = regcomp(&ptn, URI_REGEX_STR, REG_EXTENDED|REG_NOSUB);
+        assert(status == 0);
+        //TRACE(STR, "Regex compiled.");
+
+        //TRACE("Checking data: %s", data);
+        status = regexec(&ptn, data, 0, NULL, 0);
+        regfree(&ptn);
+        if (status != 0) {
+            printf("Error matching URI pattern.\n");
+
+            return(-1);
+        }
+        TRACE(STR, "URI checked.");
+    }
+
+    term->data = malloc(strlen(data) + 1);
+    strcpy(term->data, data);
+
+    if (datatype != NULL) {
+        term->datatype = malloc(strlen(datatype) + 1);
+        strcpy(term->datatype, datatype);
+    } else {
+        term->datatype = NULL;
+    }
+    if (lang != NULL) {
+        // TODO validate language and country code
+        //char lsize = 5 ? lang[2] == "-" : 2;
+        memcpy(term->lang, lang, LANG_SIZE);
+    } else {
+        memset(term->lang, 0, LANG_SIZE);
+    }
+
+    return 0;
+}
+
+
+LSUP_Term
+*LSUP_term_new(
+        LSUP_term_type type, char *data, char *datatype, char *lang) {
+
+    LSUP_Term *term;
+
+    CRITICAL(term = malloc(sizeof(LSUP_Term)));
+    LSUP_term_init(term, type, data, datatype, lang);
+
+    return term;
+}
+
+
+int
+LSUP_term_serialize(const LSUP_Term *term, LSUP_Buffer *sterm)
+{
+    size_t size, data_len, datatype_len,
+           data_idx, datatype_idx, lang_idx;
+
+    data_idx = CHR;
+    data_len = strlen(term->data) + CHR;
+
+    size = data_idx + data_len;
+
+    if (term->datatype != NULL) {
+        datatype_idx = size;
+        datatype_len = strlen(term->datatype) + CHR;
+        size += datatype_len;
+
+        if (strlen(term->lang) > 0) {
+            lang_idx = size;
+            size += LANG_SIZE;
+        }
+    }
+
+    TRACE("Serialized term size: %lu", size);
+    LSUP_buffer_init(sterm, size);
+
+    // Copy type.
+    memset(sterm->addr, (unsigned char)term->type, CHR);
+    // Copy data.
+    memcpy(sterm->addr + data_idx, term->data, data_len);
+
+    if (term->datatype != NULL) {
+        // Copy data type.
+        memcpy(sterm->addr + datatype_idx, term->datatype, datatype_len);
+
+        if (strlen(term->lang) > 0) {
+            // Copy lang tag.
+            memcpy(sterm->addr + lang_idx, term->lang, LANG_SIZE);
+        }
+    }
+
+    return 0;
+}
+
+
+int
+LSUP_term_deserialize(const LSUP_Buffer *sterm, LSUP_Term *term)
+{
+    size_t cur;
+    char *data, *datatype = NULL;
+    langtag lang = "\00";
+
+    char type = ((char*)(sterm->addr))[0];
+
+    cur = CHR;
+    data = (char*)sterm->addr + cur;
+    cur += strlen(data) + CHR;
+
+    if (type == LSUP_TERM_LITERAL) {
+        datatype = (char*)sterm->addr + cur;
+        cur += strlen(datatype) + CHR;
+        if (strlen(datatype) == 0)
+            datatype = NULL;
+
+        if (cur < sterm->size)
+            strcpy(lang, sterm->addr + cur);
+    }
+
+    LSUP_term_init(term, type, data, datatype, lang);
+
+    return 0;
+}
+
+
+bool LSUP_term_equals(const LSUP_Term *term1, const LSUP_Term *term2)
+{
+    if (term1->type != term2->type)
+        return false;
+
+    if (strcmp(term1->data, term2->data) != 0)
+        return false;
+
+    if (term1->type == LSUP_TERM_LITERAL) {
+        if ((term1->datatype == NULL) != (term2->datatype == NULL)) // XOR
+            return false;
+
+        if (
+                term1->datatype != NULL &&
+                strcmp(term1->datatype, term2->datatype) != 0)
+            return false;
+
+        if ((term1->lang == NULL) != (term2->lang == NULL)) // XOR
+            return false;
+
+        if (
+                term1->lang != NULL &&
+                strcmp(term1->lang, term2->lang) != 0)
+            return false;
+    }
+
+    return true;
+}
+
+
+void LSUP_term_done(LSUP_Term *term)
+{
+    if (term->data != NULL)
+        free(term->data);
+    else
+        TRACE(STR, "Term data is NULL!");
+
+    if (term->datatype != NULL)
+        free(term->datatype);
+}
+
+
+void LSUP_term_free(LSUP_Term *term)
+{
+    TRACE(STR, "Freeing term.");
+    LSUP_term_done(term);
+    free(term);
+}
+
+
+// Extern inline functions.
+
+LSUP_Key LSUP_sterm_to_key(const LSUP_SerTerm *sterm);
+
+LSUP_Key LSUP_term_to_key(const LSUP_Term *term);
+

+ 53 - 0
src/structures/buffer.c

@@ -0,0 +1,53 @@
+#include "structures/buffer.h"
+
+// Inline extern prototypes
+
+bool LSUP_buffer_eq(const LSUP_Buffer *buf1, const LSUP_Buffer *buf2);
+
+
+LSUP_Buffer *LSUP_buffer_new(size_t size)
+{
+    LSUP_Buffer *buf;
+
+    CRITICAL(buf = malloc(sizeof(LSUP_Buffer)));
+    LSUP_buffer_init(buf, size);
+
+    return buf;
+}
+
+
+int LSUP_buffer_init(LSUP_Buffer *buf, size_t size)
+{
+    TRACE("Buffer Size: %lu\n", size);
+    CRITICAL(buf->addr = malloc(size * sizeof(char)));
+    buf->size = size;
+
+    return 0;
+}
+
+/**
+ * Print a byte string of a given length in a human-readable format.
+ *
+ * The string is printed in Python style: printable characters are output
+ * literally, and non-printable ones as hex sequences.
+ */
+void LSUP_buffer_print(const LSUP_Buffer *buf) {
+    for (size_t i = 0; i < buf->size; i++) {
+        char chr = ((char*)buf->addr)[i];
+        if (isprint(chr)) {
+            fputc(chr, stdout);
+        } else {
+            printf("\\x%02x", chr);
+        }
+    }
+    printf("\n");
+}
+
+
+void LSUP_buffer_done(LSUP_Buffer *buf){
+    if (buf->addr != NULL) {
+        TRACE(STR, "Freeing buffer.");
+        free(buf->addr);
+    }
+}
+

+ 94 - 0
src/structures/index.c

@@ -0,0 +1,94 @@
+#include "structures/index.h"
+
+struct IndexEntry {
+    LSUP_Key key;
+    LSUP_SerTerm *val;
+};
+
+struct Index {
+    size_t free_i;
+    size_t capacity;
+    struct IndexEntry *entries;
+};
+
+
+LSUP_Index *LSUP_index_new(size_t capacity)
+{
+    LSUP_Index *idx = malloc(sizeof(struct Index));
+
+    if (capacity == 0) return NULL;
+
+    CRITICAL (idx->entries = malloc(sizeof(struct IndexEntry) * capacity));
+
+    idx->free_i = 0;
+    idx->capacity = capacity;
+
+    return idx;
+}
+
+
+void LSUP_index_resize(LSUP_Index *idx, size_t capacity)
+{
+    CRITICAL (idx->entries = (struct IndexEntry*)realloc(
+            idx->entries,
+            sizeof(struct IndexEntry) * capacity));
+
+    idx->capacity = capacity;
+}
+
+
+int LSUP_index_add(LSUP_Index *idx, LSUP_SerTerm *sterm)
+{
+    LSUP_Key key = LSUP_sterm_to_key(sterm);
+
+    return LSUP_index_add_pair(idx, key, sterm);
+}
+
+
+int LSUP_index_add_pair(LSUP_Index *idx, LSUP_Key key, LSUP_SerTerm *sterm)
+{
+    // Fail quietly if key exists already.
+    if (LSUP_index_lookup(idx, key) == NULL) {
+        if (idx->free_i >= idx->capacity) {
+            LSUP_index_resize(idx, idx->capacity * 1.5);
+            TRACE("Capacity now at %lu\n", idx->capacity);
+        }
+
+        struct IndexEntry *entry = idx->entries + idx->free_i;
+
+        entry->key = key;
+        entry->val = LSUP_buffer_new(sterm->size);
+        memcpy(entry->val->addr, sterm->addr, sterm->size);
+
+        idx->free_i ++;
+        TRACE("Size now at %lu\n", idx->free_i);
+
+    }
+
+    return 0;
+}
+
+
+LSUP_SerTerm *LSUP_index_lookup(LSUP_Index *idx, LSUP_Key key)
+{
+    LSUP_SerTerm *match = NULL;
+
+    for (size_t i = 0; i < idx->free_i; i++) {
+        if (idx->entries[i].key == key) {
+            match = idx->entries[i].val;
+            break;
+        }
+    }
+
+    return match;
+}
+
+void LSUP_index_free(LSUP_Index *idx)
+{
+    for (size_t i = 0; i < idx->free_i; i++) {
+        LSUP_buffer_done(idx->entries[i].val);
+    }
+
+    free(idx->entries);
+    free(idx);
+}

+ 364 - 0
src/structures/keyset.c

@@ -0,0 +1,364 @@
+#include <math.h>
+
+#include "structures/keyset.h"
+
+#define KLEN sizeof(LSUP_Key)
+#define DBL_KLEN sizeof(LSUP_DoubleKey)
+#define TRP_KLEN sizeof(LSUP_TripleKey)
+#define QUAD_KLEN sizeof(LSUP_QuadKey)
+
+
+LSUP_TripleKey NULL_TRP = {NULL_KEY, NULL_KEY, NULL_KEY};
+
+/**
+ * CALLBACKS
+ */
+
+typedef bool (*LSUP_key_cmp_fn_t)(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2);
+
+//  Keyset lookup for S LSUP_Key.
+static inline bool lookup_sk_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][0] == k1; }
+
+//  Keyset lookup for P LSUP_Key.
+static inline bool lookup_pk_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][1] == k1; }
+
+//  Keyset lookup for O LSUP_Key.
+static inline bool lookup_ok_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][2] == k1; }
+
+//  Keyset lookup for S and P keys.
+static inline bool lookup_skpk_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][0] == k1 && spok[0][1] == k2; }
+
+//  Keyset lookup for S and O keys.
+static inline bool lookup_skok_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][0] == k1 && spok[0][2] == k2; }
+
+//  Keyset lookup for P and O keys.
+static inline bool lookup_pkok_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return spok[0][1] == k1 && spok[0][2] == k2; }
+
+// Dummy callback for queries with all parameters unbound. Returns true.
+static inline bool lookup_none_cmp_fn(
+        const LSUP_TripleKey* spok, const LSUP_Key k1, const LSUP_Key k2)
+{ return true; }
+
+
+// Inline extern prototypes.
+
+bool LSUP_keyset_seek(LSUP_Keyset* ks, size_t idx);
+size_t LSUP_keyset_size(LSUP_Keyset* ks);
+size_t LSUP_keyset_tell(LSUP_Keyset* ks);
+const LSUP_TripleKey *LSUP_keyset_peek(LSUP_Keyset *ks);
+bool LSUP_keyset_contains(
+        const LSUP_Keyset *ks, const LSUP_TripleKey *val);
+bool LSUP_keyset_next(LSUP_Keyset *ks);
+
+
+// Inline utils.
+
+static inline bool is_null_trp(const LSUP_TripleKey *trp)
+{
+    return (
+            (*trp)[0] == NULL_TRP[0]
+            && (*trp)[1] == NULL_TRP[1]
+            && (*trp)[2] == NULL_TRP[2]);
+}
+
+
+int LSUP_keyset_init(LSUP_Keyset *ks, size_t capacity, float expand_ratio)
+{
+    CRITICAL (ks->data = malloc(capacity * TRP_KLEN));
+    ks->capacity = capacity;
+    ks->cur = 0;
+    ks->free_i = 0;
+    ks->expand_ratio = expand_ratio;
+
+    return 0;
+}
+
+
+LSUP_Keyset *LSUP_keyset_new(size_t capacity, float expand_ratio) {
+    LSUP_Keyset *ks = malloc(sizeof(LSUP_Keyset));
+
+    LSUP_keyset_init(ks, capacity, expand_ratio);
+
+    return(ks);
+}
+
+
+/**
+ * Populate the provided `val` variable with the next available record.
+ *
+ * If the cursor is already at the last record, `val` is set to `NULL`.
+ *
+ * NOTE: This function copies data. For no-copy peek, use `LSUP_keyset_peek()`.
+ */
+bool LSUP_keyset_get_next(LSUP_Keyset *ks, LSUP_TripleKey *val) {
+
+    if (LSUP_keyset_next(ks)) {
+        memcpy(val, ks->data + ks->cur, TRP_KLEN);
+
+        return(1);
+    } else {
+        val = NULL;
+        return(0);
+    }
+}
+
+
+int LSUP_keyset_resize(LSUP_Keyset *ks, size_t new_size) {
+    new_size = max(new_size, ks->free_i);
+
+    CRITICAL (ks->data = realloc(
+            ks->data, max(new_size, ks->free_i) * TRP_KLEN))
+
+    return(0);
+}
+
+
+int LSUP_keyset_add(
+        LSUP_Keyset *ks, const LSUP_TripleKey *val, LSUP_KSFlag flags)
+{
+    if((flags & LSUP_KS_CHECK_DUP) && LSUP_keyset_contains(ks, val))
+        return 1;
+
+    if((flags & LSUP_KS_CHECK_CAP) && ks->free_i >= ks->capacity) {
+        if(ks->expand_ratio > 0) {
+            LSUP_keyset_resize(
+                    ks, 1 + (size_t)(ks->capacity * (1 + ks->expand_ratio)));
+        } else {
+            return -2; // TODO: ENOMEM
+        }
+    }
+
+    memcpy(ks->data + ks->free_i, val, TRP_KLEN);
+
+    ks->cur = ks->free_i;
+    ks->free_i ++;
+
+    return(0);
+}
+
+
+int LSUP_keyset_remove(LSUP_Keyset *ks, const LSUP_TripleKey *val) {
+
+    LSUP_keyset_seek(ks, 0);
+
+    while (LSUP_keyset_next(ks)) {
+        if (memcmp(val, ks->data + ks->cur, TRP_KLEN) == 0) {
+            memcpy(ks->data + ks->cur, &NULL_TRP, TRP_KLEN);
+
+            break;
+        }
+    }
+
+    return(0);
+}
+
+
+int LSUP_keyset_copy(const LSUP_Keyset *src, LSUP_Keyset *dest) {
+
+    LSUP_keyset_init(dest, src->capacity, src->expand_ratio);
+
+    memcpy(dest->data, src->data, src->capacity * TRP_KLEN);
+
+    LSUP_keyset_seek(dest, 0);
+    dest->free_i = src->free_i;
+
+    return(0);
+}
+
+
+int LSUP_keyset_sparse_copy(LSUP_Keyset *src, LSUP_Keyset *dest) {
+
+    LSUP_keyset_init(dest, src->capacity, src->expand_ratio);
+
+    if (LSUP_keyset_seek(src, 0)) {
+        do {
+            if (LIKELY(memcmp(
+                            LSUP_keyset_peek(src),
+                            &NULL_TRP, TRP_KLEN) != 0)) {
+                LSUP_keyset_add(dest, LSUP_keyset_peek(src), 0);
+            }
+        } while (LSUP_keyset_next(src));
+
+        LSUP_keyset_seek(dest, 0);
+    }
+
+    return(0);
+}
+
+
+int LSUP_keyset_lookup(
+        LSUP_Keyset *ks, LSUP_Keyset *res,
+        const LSUP_Key sk, const LSUP_Key pk, const LSUP_Key ok) {
+
+    LSUP_Key k1, k2;
+    LSUP_key_cmp_fn_t cmp_fn;
+
+    if (sk && pk && ok) {
+        LSUP_keyset_init(res, 1, 1);
+        LSUP_TripleKey spok = {sk, pk, ok};
+        if(LSUP_keyset_contains(ks, &spok)) {
+            LSUP_keyset_add(res, &spok, 0);
+            return 0;
+        } else {
+            return 1;
+        }
+
+    } else if (sk) {
+        k1 = sk;
+
+        if (pk) { // s p ?
+            k2 = pk;
+            cmp_fn = lookup_skpk_cmp_fn;
+
+        } else if (ok) { // s ? o
+            k2 = ok;
+            cmp_fn = lookup_skok_cmp_fn;
+
+        } else { // s ? ?
+            cmp_fn = lookup_sk_cmp_fn;
+
+        }
+
+    } else if (pk) {
+        k1 = pk;
+
+        if (ok) { // ? p o
+            k2 = ok;
+            cmp_fn = lookup_pkok_cmp_fn;
+
+        } else { // ? p ?
+            cmp_fn = lookup_pk_cmp_fn;
+        }
+
+    } else if (ok) { // ? ? o
+        k1 = ok;
+        cmp_fn = lookup_ok_cmp_fn;
+
+    } else {
+        printf("WARNING: no bound terms, making a compact copy.\n");
+        return LSUP_keyset_sparse_copy(ks, res);
+    }
+
+    LSUP_keyset_init(res, ks->capacity, ks->expand_ratio);
+
+    LSUP_keyset_seek(ks, 0);
+    do {
+        if (cmp_fn(LSUP_keyset_peek(ks), k1, k2)) {
+            LSUP_keyset_add(res, LSUP_keyset_peek(ks), 0);
+        }
+    } while (LSUP_keyset_next(ks));
+
+    // Compact result keyset.
+    LSUP_keyset_resize(res, 0);
+
+    return 0;
+}
+
+
+int LSUP_keyset_join(LSUP_Keyset *ks1, LSUP_Keyset *ks2, LSUP_Keyset *res)
+{
+    LSUP_keyset_sparse_copy(ks1, res);
+
+    if (LSUP_keyset_seek(ks2, 0)) {
+        do {
+            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks2);
+            if (!is_null_trp(spok)) {
+                LSUP_keyset_add(
+                        res, spok, LSUP_KS_CHECK_DUP | LSUP_KS_CHECK_CAP);
+            }
+        } while (LSUP_keyset_next(ks2));
+    }
+
+    return 0;
+}
+
+
+int LSUP_keyset_subtract(LSUP_Keyset *ks1, LSUP_Keyset *ks2, LSUP_Keyset *res)
+{
+    LSUP_keyset_init(res, ks1->capacity, ks1->expand_ratio);
+
+    if (LSUP_keyset_seek(ks1, 0)) {
+        do {
+            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks1);
+            if (!is_null_trp(spok) && !LSUP_keyset_contains(ks2, spok)) {
+                LSUP_keyset_add(res, spok, 0);
+            }
+        } while (LSUP_keyset_next(ks1));
+    }
+
+    return 0;
+}
+
+
+int LSUP_keyset_intersect(LSUP_Keyset *ks1, LSUP_Keyset *ks2, LSUP_Keyset *res)
+{
+    LSUP_keyset_init(res, ks1->capacity, ks1->expand_ratio);
+
+    if (LSUP_keyset_seek(ks1, 0)) {
+        do {
+            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks1);
+            if (!is_null_trp(spok) && LSUP_keyset_contains(ks2, spok)) {
+                LSUP_keyset_add(res, spok, 0);
+            }
+        } while (LSUP_keyset_next(ks1));
+    }
+
+    return 0;
+}
+
+
+int LSUP_keyset_xor(LSUP_Keyset *ks1, LSUP_Keyset *ks2, LSUP_Keyset *res)
+{
+    LSUP_keyset_init(res, ks1->capacity + ks2->capacity, ks1->expand_ratio);
+
+    if (LSUP_keyset_seek(ks1, 0)) {
+        do {
+            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks1);
+            if (!is_null_trp(spok) && !LSUP_keyset_contains(ks2, spok)) {
+                LSUP_keyset_add(res, spok, 0);
+            }
+        } while (LSUP_keyset_next(ks1));
+    }
+
+    if (LSUP_keyset_seek(ks2, 0)) {
+        do {
+            const LSUP_TripleKey *spok = LSUP_keyset_peek(ks2);
+            if (!is_null_trp(spok) && !LSUP_keyset_contains(ks1, spok)) {
+                LSUP_keyset_add(res, spok, 0);
+            }
+        } while (LSUP_keyset_next(ks2));
+    }
+
+    return 0;
+}
+
+
+void LSUP_keyset_done(LSUP_Keyset *ks)
+{
+    if(LIKELY(ks->data != NULL))
+        free(ks->data);
+}
+
+
+void LSUP_keyset_free(LSUP_Keyset *ks)
+{
+    if(LIKELY(ks != NULL)) {
+        LSUP_keyset_done(ks);
+        free(ks);
+    }
+}
+
+

+ 25 - 0
test.c

@@ -0,0 +1,25 @@
+#include "test_term.c"
+#include "test_index.c"
+#include "test_graph.c"
+#include "test_keyset.c"
+
+int main(int argc, char **argv) {
+
+    int result = (
+            term_tests() | index_tests()
+            | keyset_tests() | graph_tests());
+
+    printf("Test result: %lu\n", (size_t)result);
+
+    if (result != 0) {
+        printf("Test failed.");
+    }
+    else {
+        printf("ALL TESTS PASSED\n");
+    }
+
+    printf("Tests run: %d\n", tests_run);
+
+    return result != 0;
+}
+

+ 0 - 0
test/keyset.c


+ 45 - 0
test/test.h

@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lsup_rdf.h"
+
+#ifndef _LSUP_TEST_H
+#define _LSUP_TEST_H
+
+/**
+ * Minimal unit testing framework.
+ * Inspired from http://www.jera.com/techinfo/jtns/jtn002.html
+ */
+#define ASSERT(test, msg) do { \
+    if (!(test)) {\
+        fprintf(\
+                stderr, "!!! Assertion failed at %s:%d. Message: %s\n", \
+                __FILE__, __LINE__, msg); \
+        return -1; \
+    }\
+} while (0)
+
+#define EXPECT_INT_EQ(got, exp) do { \
+    if ((exp) != (got)) {\
+        fprintf(\
+                stderr, "!!! Test failed at %s:%d. Expected: %lu; got: %lu\n",\
+                __FILE__, __LINE__, (size_t)(exp), (size_t)(got)); \
+        return -1; \
+    }\
+} while (0)
+
+#define EXPECT_STR_EQ(got, exp) do { \
+    if (strcmp((exp), (got)) != 0) {\
+        fprintf(\
+                stderr, "!!! Test failed at %s:%d. Expected: %s; got: %s\n", \
+                __FILE__, __LINE__, (exp), (got)); \
+        return -1; \
+    }\
+} while (0)
+#define RUN(test) do { int rc = test(); tests_run++; \
+                       if (rc != 0) return -1; } while (0)
+
+
+int tests_run;
+
+#endif

+ 7 - 0
test/test_graph.c

@@ -0,0 +1,7 @@
+#include "test.h"
+
+int graph_tests()
+{
+    return 0;
+}
+

+ 126 - 0
test/test_index.c

@@ -0,0 +1,126 @@
+#include "test.h"
+
+static int test_index_add()
+{
+    LSUP_Term *uri = LSUP_term_new(
+            LSUP_TERM_URI, "http://hello.org", NULL, NULL);
+    LSUP_Term *lit = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", NULL, NULL);
+    LSUP_Term *tlit = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", "xsd:string", NULL);
+    LSUP_Term *tllit1 = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", "xsd:string", "en-US");
+    LSUP_Term *tllit2 = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", "xsd:string", "en-GB");
+
+    // Make capacity lower intentionally, to test expansion.
+    LSUP_Index *idx = LSUP_index_new(3);
+    LSUP_Key key;
+
+    LSUP_SerTerm *s_uri = malloc(sizeof(LSUP_SerTerm));
+    LSUP_term_serialize(uri, s_uri);
+    LSUP_index_add(idx, s_uri);
+
+    key = LSUP_term_to_key(uri);
+    ASSERT(
+            LSUP_buffer_eq(LSUP_index_lookup(idx, key), s_uri),
+            "URI not found!");
+
+    LSUP_SerTerm *s_lit = malloc(sizeof(LSUP_SerTerm));
+    LSUP_term_serialize(lit, s_lit);
+    LSUP_index_add(idx, s_lit);
+
+    key = LSUP_term_to_key(lit);
+    ASSERT(
+            LSUP_buffer_eq(LSUP_index_lookup(idx, key), s_lit),
+            "Literal not found!");
+
+    LSUP_SerTerm *s_tlit = malloc(sizeof(LSUP_SerTerm));
+    LSUP_term_serialize(tlit, s_tlit);
+    LSUP_index_add(idx, s_tlit);
+
+    key = LSUP_term_to_key(tlit);
+    ASSERT(
+            LSUP_buffer_eq(LSUP_index_lookup(idx, key), s_tlit),
+            "Typed literal not found!");
+
+    LSUP_SerTerm *s_tllit1 = malloc(sizeof(LSUP_SerTerm));
+    LSUP_term_serialize(tllit1, s_tllit1);
+    LSUP_index_add(idx, s_tllit1);
+
+    key = LSUP_term_to_key(tllit1);
+    ASSERT(
+            LSUP_buffer_eq(LSUP_index_lookup(idx, key), s_tllit1),
+            "US lang literal not found!");
+
+    LSUP_SerTerm *s_tllit2 = malloc(sizeof(LSUP_SerTerm));
+    LSUP_term_serialize(tllit2, s_tllit2);
+    LSUP_index_add(idx, s_tllit2);
+
+    key = LSUP_term_to_key(tllit2);
+    ASSERT(
+            LSUP_buffer_eq(LSUP_index_lookup(idx, key), s_tllit2),
+            "GB lang literal not found!");
+
+    LSUP_term_free(uri);
+    LSUP_term_free(lit);
+    LSUP_term_free(tlit);
+    LSUP_term_free(tllit1);
+    LSUP_term_free(tllit2);
+
+    LSUP_index_free(idx);
+
+    LSUP_buffer_done(s_uri);
+    free(s_uri);
+    LSUP_buffer_done(s_lit);
+    free(s_lit);
+    LSUP_buffer_done(s_tlit);
+    free(s_tlit);
+    LSUP_buffer_done(s_tllit1);
+    free(s_tllit1);
+    LSUP_buffer_done(s_tllit2);
+    free(s_tllit2);
+
+    return 0;
+}
+
+
+int test_index_add_pair()
+{
+    LSUP_Term *uri = LSUP_term_new(
+            LSUP_TERM_URI, "http://hello.org", NULL, NULL);
+
+    LSUP_Index *idx = LSUP_index_new(1);
+
+    LSUP_SerTerm *s_uri = malloc(sizeof(LSUP_SerTerm));
+    LSUP_term_serialize(uri, s_uri);
+    LSUP_index_add(idx, s_uri);
+
+    LSUP_Key key;
+
+    key = LSUP_term_to_key(uri);
+
+    LSUP_index_add_pair(idx, key, s_uri);
+
+    ASSERT(
+            LSUP_buffer_eq(LSUP_index_lookup(idx, key), s_uri),
+            "URI not found!");
+
+
+    LSUP_index_free(idx);
+    LSUP_term_free(uri);
+    LSUP_buffer_done(s_uri);
+    free(s_uri);
+
+    return 0;
+}
+
+
+int index_tests() {
+    RUN(test_index_add);
+    RUN(test_index_add_pair);
+
+    return 0;
+}
+
+

+ 217 - 0
test/test_keyset.c

@@ -0,0 +1,217 @@
+#include "test.h"
+
+int test_keyset_stack()
+{
+    LSUP_Keyset ks_s;
+    LSUP_Keyset *ks = &ks_s;
+
+    LSUP_keyset_init(ks, 10, .5);
+
+    EXPECT_INT_EQ(ks->capacity, 10);
+
+    LSUP_keyset_done(ks);
+
+    return 0;
+}
+
+
+int test_keyset_heap()
+{
+    LSUP_Keyset *ks = LSUP_keyset_new(10, .5);
+
+    EXPECT_INT_EQ(ks->capacity, 10);
+
+    LSUP_keyset_free(ks);
+
+    return 0;
+}
+
+
+int test_keyset_add_remove()
+{
+    LSUP_Keyset *ks = LSUP_keyset_new(10, .5);
+
+    LSUP_TripleKey k1 = {1,1,1};
+    LSUP_TripleKey k2 = {1,1,2};
+    LSUP_TripleKey k3 = {1,1,3};
+    LSUP_TripleKey k4 = {1,2,1};
+    LSUP_TripleKey k5 = {1,2,1}; // Duplicate
+
+    LSUP_keyset_add(ks, &k1, LSUP_KS_CHECK_DUP);
+    LSUP_keyset_add(ks, &k2, LSUP_KS_CHECK_DUP);
+    LSUP_keyset_add(ks, &k3, LSUP_KS_CHECK_DUP);
+    LSUP_keyset_add(ks, &k4, LSUP_KS_CHECK_DUP);
+    LSUP_keyset_add(ks, &k5, LSUP_KS_CHECK_DUP);
+
+    EXPECT_INT_EQ(LSUP_keyset_size(ks), 4);
+    EXPECT_INT_EQ(LSUP_keyset_tell(ks), 3);
+
+    EXPECT_INT_EQ(LSUP_keyset_seek(ks, 1), true);
+    ASSERT(
+            memcmp(LSUP_keyset_peek(ks), &k2, sizeof(LSUP_TripleKey)) == 0,
+            "Key not corresponding to index!");
+
+    LSUP_TripleKey k6 = {1,1,2};
+    LSUP_keyset_remove(ks, &k6);
+
+    ASSERT(!LSUP_keyset_contains(ks, &k6), "Triple not removed!");
+    ASSERT(!LSUP_keyset_contains(ks, &k2), "Triple not removed!");
+
+    EXPECT_INT_EQ(ks->free_i, 4);
+
+    LSUP_Keyset *ks2 = malloc(sizeof(LSUP_Keyset));
+    LSUP_keyset_sparse_copy(ks, ks2);
+
+    EXPECT_INT_EQ(ks2->free_i, 3);
+
+    LSUP_keyset_free(ks);
+    LSUP_keyset_free(ks2);
+
+    return 0;
+}
+
+
+int test_keyset_lookup()
+{
+    LSUP_Keyset *ks = LSUP_keyset_new(10, .5);
+    LSUP_Keyset res_s;
+    LSUP_Keyset *res = &res_s;
+
+    LSUP_TripleKey k1 = {1,1,1};
+    LSUP_TripleKey k2 = {1,1,2};
+    LSUP_TripleKey k3 = {1,2,1};
+    LSUP_TripleKey k4 = {2,1,3};
+
+    LSUP_keyset_add(ks, &k1, 0);
+    LSUP_keyset_add(ks, &k2, 0);
+    LSUP_keyset_add(ks, &k3, 0);
+    LSUP_keyset_add(ks, &k4, 0);
+
+    LSUP_keyset_lookup(ks, res, 1, 1, 1);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 1);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_lookup(ks, res, 1, 1, 0);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 2);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_lookup(ks, res, 1, 2, 0);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 1);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_lookup(ks, res, 1, 0, 0);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 3);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_lookup(ks, res, 0, 1, 0);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 3);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_lookup(ks, res, 0, 0, 0);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 4);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_free(ks);
+
+    return 0;
+}
+
+
+int test_keyset_bool_ops()
+{
+    LSUP_Keyset res_s;
+    LSUP_Keyset *res = &res_s;
+
+    LSUP_TripleKey k1 = {1,1,1};
+    LSUP_TripleKey k2 = {1,1,2};
+    LSUP_TripleKey k3 = {1,2,1};
+    LSUP_TripleKey k4 = {2,1,3};
+    LSUP_TripleKey k5 = {3,1,1};
+
+    LSUP_TripleKey k6 = {1,1,1};
+    LSUP_TripleKey k7 = {1,1,2};
+    LSUP_TripleKey k8 = {3,2,1};
+    LSUP_TripleKey k9 = {4,1,3};
+    LSUP_TripleKey k10 = {5,1,3};
+
+    LSUP_Keyset *ks1 = LSUP_keyset_new(5, .5);
+    LSUP_Keyset *ks2 = LSUP_keyset_new(5, .5);
+
+    // Both sets empty.
+    LSUP_keyset_join(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 0);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_subtract(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 0);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_intersect(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 0);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_xor(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 0);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_add(ks1, &k1, 0);
+    LSUP_keyset_add(ks1, &k2, 0);
+    LSUP_keyset_add(ks1, &k3, 0);
+    LSUP_keyset_add(ks1, &k4, 0);
+    LSUP_keyset_add(ks1, &k5, 0);
+
+    // Set 2 empty.
+    LSUP_keyset_join(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 5);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_subtract(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 5);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_intersect(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 0);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_xor(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 5);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_add(ks2, &k6, 0);
+    LSUP_keyset_add(ks2, &k7, 0);
+    LSUP_keyset_add(ks2, &k8, 0);
+    LSUP_keyset_add(ks2, &k9, 0);
+    LSUP_keyset_add(ks2, &k10, 0);
+
+    // Both sets populated.
+    LSUP_keyset_join(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 8);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_subtract(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 3);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_intersect(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 2);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_xor(ks1, ks2, res);
+    EXPECT_INT_EQ(LSUP_keyset_size(res), 6);
+    LSUP_keyset_done(res);
+
+    LSUP_keyset_free(ks1);
+    LSUP_keyset_free(ks2);
+
+    return 0;
+}
+
+int keyset_tests()
+{
+    RUN(test_keyset_stack);
+    RUN(test_keyset_heap);
+    RUN(test_keyset_add_remove);
+    RUN(test_keyset_lookup);
+    RUN(test_keyset_bool_ops);
+    return 0;
+}

+ 148 - 0
test/test_term.c

@@ -0,0 +1,148 @@
+#include "test.h"
+
+
+static int test_term_stack()
+{
+    TRACE(STR, "Test term, stack-allocated.");
+    LSUP_Term term_s;
+    LSUP_Term *term = &term_s;
+
+    char *data = "http://hello.org";
+
+    LSUP_term_init(
+            term, LSUP_TERM_URI, data, NULL, NULL);
+
+    EXPECT_STR_EQ(term->data, data);
+    ASSERT(term->datatype == NULL,"Term datatype is not NULL!" );
+    ASSERT(strlen(term->lang) == 0, "Term lang incorrect!");
+    TRACE(STR, "freeing term.");
+    LSUP_term_done(term);
+
+    return 0;
+}
+
+static int test_term_heap()
+{
+    char *data = "hello";
+    char *datatype = "xsd:string";
+    char *lang = "en-US";
+
+    TRACE(STR, "Test term, heap-allocated.");
+    LSUP_Term *term = LSUP_term_new(
+            LSUP_TERM_LITERAL, data, datatype, lang);
+
+    TRACE("Term data: %s", term->data);
+    EXPECT_STR_EQ(term->data, data);
+    EXPECT_STR_EQ(term->datatype, datatype);
+    EXPECT_STR_EQ(term->lang, lang);
+
+    LSUP_term_free(term);
+
+    return 0;
+}
+
+static int test_term_serialize_deserialize()
+{
+    LSUP_Term *uri = LSUP_term_new(
+            LSUP_TERM_URI, "http://hello.org", NULL, NULL);
+    LSUP_Term *lit = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", NULL, NULL);
+    LSUP_Term *tlit = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", "xsd:string", NULL);
+    LSUP_Term *tllit = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", "xsd:string", "en-US");
+
+    LSUP_Buffer sterm_s;
+    LSUP_Buffer *sterm = &sterm_s;
+
+    LSUP_Term dsterm_s;
+    LSUP_Term *dsterm = &dsterm_s;
+
+    LSUP_term_serialize(uri, sterm);
+    TRACE("%s", "Serialized URI: ");
+    LSUP_buffer_print(sterm);
+    TRACE("%s", "\n");
+    LSUP_term_deserialize(sterm, dsterm);
+    ASSERT(LSUP_term_equals(dsterm, uri), "URI serialization error!");
+    LSUP_term_done(dsterm);
+    LSUP_buffer_done(sterm);
+    LSUP_term_free(uri);
+
+    LSUP_term_serialize(lit, sterm);
+    TRACE("%s", "Serialized literal: ");
+    LSUP_buffer_print(sterm);
+    TRACE("%s", "\n");
+    LSUP_term_deserialize(sterm, dsterm);
+    ASSERT(LSUP_term_equals(dsterm, lit), "lit serialization error!");
+    LSUP_term_done(dsterm);
+    LSUP_buffer_done(sterm);
+    LSUP_term_free(lit);
+
+    LSUP_term_serialize(tlit, sterm);
+    TRACE("%s", "Serialized typed literal: ");
+    LSUP_buffer_print(sterm);
+    TRACE("%s", "\n");
+    LSUP_term_deserialize(sterm, dsterm);
+    ASSERT(LSUP_term_equals(dsterm, tlit), "tlit serialization error!");
+    LSUP_term_done(dsterm);
+    LSUP_buffer_done(sterm);
+    LSUP_term_free(tlit);
+
+    LSUP_term_serialize(tllit, sterm);
+    TRACE("%s", "Serialized typed and language-tagged URI: ");
+    LSUP_buffer_print(sterm);
+    TRACE("%s", "\n");
+    LSUP_term_deserialize(sterm, dsterm);
+    ASSERT(LSUP_term_equals(dsterm, tllit), "URI serialization error!");
+    LSUP_term_done(dsterm);
+    LSUP_buffer_done(sterm);
+    LSUP_term_free(tllit);
+
+    return 0;
+}
+
+
+static int test_term_to_key()
+{
+    LSUP_Term *uri = LSUP_term_new(
+            LSUP_TERM_URI, "http://hello.org", NULL, NULL);
+    LSUP_Term *lit = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", NULL, NULL);
+    LSUP_Term *tlit = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", "xsd:string", NULL);
+    LSUP_Term *tllit1 = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", "xsd:string", "en-US");
+    LSUP_Term *tllit2 = LSUP_term_new(
+            LSUP_TERM_LITERAL, "hello", "xsd:string", "en-GB");
+
+    LSUP_Key uri_key = LSUP_term_to_key(uri);
+    LSUP_Key lit_key = LSUP_term_to_key(lit);
+    LSUP_Key tlit_key = LSUP_term_to_key(tlit);
+    LSUP_Key tllit1_key = LSUP_term_to_key(tllit1);
+    LSUP_Key tllit2_key = LSUP_term_to_key(tllit2);
+
+    ASSERT(uri_key != lit_key, "URI key conflict!");
+    ASSERT(lit_key != tlit_key, "URI key conflict!");
+    ASSERT(lit_key != tllit1_key, "URI key conflict!");
+    ASSERT(tlit_key != tllit1_key, "URI key conflict!");
+    ASSERT(tllit1_key != tllit2_key, "URI key conflict!");
+
+    LSUP_term_free(uri);
+    LSUP_term_free(lit);
+    LSUP_term_free(tlit);
+    LSUP_term_free(tllit1);
+    LSUP_term_free(tllit2);
+
+    return 0;
+}
+
+
+int term_tests() {
+    RUN(test_term_stack);
+    RUN(test_term_heap);
+    RUN(test_term_serialize_deserialize);
+    RUN(test_term_to_key);
+
+    return 0;
+}
+