Forráskód Böngészése

Cython part compiles.

Stefano Cossu 5 éve
szülő
commit
f36821eeee

+ 0 - 40
lakesuperior/model/graph/callbacks.pxd

@@ -1,40 +0,0 @@
-from libc.stdint cimport uint32_t, uint64_t
-
-from lakesuperior.model.base cimport Buffer, TripleKey
-from lakesuperior.model.graph.graph cimport Graph
-from lakesuperior.model.graph.triple cimport BufferTriple
-
-cdef extern from 'spookyhash_api.h':
-    uint64_t spookyhash_64(const void *input, size_t input_size, uint64_t seed)
-
-cdef:
-    bint graph_eq_fn(Graph g1, Graph g2)
-    int term_cmp_fn(const void* key1, const void* key2)
-    int trp_cmp_fn(const void* key1, const void* key2)
-    size_t term_hash_fn(const void* key, int l, uint32_t seed)
-    size_t trp_hash_fn(const void* key, int l, uint32_t seed)
-
-    bint lookup_none_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_s_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_p_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_o_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_sp_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_so_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_po_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    void add_trp_callback(Graph gr, const TripleKey* spok_p, void* ctx)
-    void del_trp_callback(Graph gr, const TripleKey* spok_p, void* ctx)
-

+ 0 - 244
lakesuperior/model/graph/callbacks.pyx

@@ -1,244 +0,0 @@
-import logging
-
-from libc.stdint cimport uint32_t, uint64_t
-from libc.string cimport memcmp
-
-cimport lakesuperior.cy_include.collections as cc
-cimport lakesuperior.cy_include.spookyhash as sph
-
-from lakesuperior.model.base cimport Buffer, buffer_dump
-from lakesuperior.model.graph.graph cimport Graph
-from lakesuperior.model.graph.triple cimport BufferTriple
-
-logger = logging.getLogger(__name__)
-
-
-cdef int term_cmp_fn(const void* key1, const void* key2):
-    """
-    Compare function for two Buffer objects.
-
-    :rtype: int
-    :return: 0 if the byte streams are the same, another integer otherwise.
-    """
-    b1 = <Buffer *>key1
-    b2 = <Buffer *>key2
-
-    if b1.sz != b2.sz:
-        return 1
-
-    return memcmp(b1.addr, b2.addr, b1.sz)
-
-
-cdef int trp_cmp_fn(const void* key1, const void* key2):
-    """
-    Compare function for two triples in a set.
-
-    s, p, o byte data are compared literally.
-
-    :rtype: int
-    :return: 0 if all three terms point to byte-wise identical data in both
-        triples.
-    """
-    t1 = <BufferTriple *>key1
-    t2 = <BufferTriple *>key2
-
-    return (
-        term_cmp_fn(t1.o, t2.o) or
-        term_cmp_fn(t1.s, t2.s) or
-        term_cmp_fn(t1.p, t2.p)
-    )
-
-
-#cdef int trp_cmp_fn(const void* key1, const void* key2):
-#    """
-#    Compare function for two triples in a set.
-#
-#    Here, pointers to terms are compared for s, p, o. The pointers should be
-#    guaranteed to point to unique values (i.e. no two pointers have the same
-#    term value within a graph).
-#
-#    :rtype: int
-#    :return: 0 if the addresses of all terms are the same, 1 otherwise.
-#    """
-#    t1 = <BufferTriple *>key1
-#    t2 = <BufferTriple *>key2
-#
-#    cdef int is_not_equal = (
-#        t1.s.addr != t2.s.addr or
-#        t1.p.addr != t2.p.addr or
-#        t1.o.addr != t2.o.addr
-#    )
-#
-#    logger.info(f'Triples match: {not(is_not_equal)}')
-#    return is_not_equal
-
-
-cdef bint graph_eq_fn(Graph g1, Graph g2):
-    """
-    Compare 2 graphs for equality.
-
-    Note that this returns the opposite value than the triple and term
-    compare functions: 1 (True) if equal, 0 (False) if not.
-    """
-    cdef:
-        void* el
-        cc.HashSetIter it
-
-    cc.hashset_iter_init(&it, g1._triples)
-    while cc.hashset_iter_next(&it, &el) != cc.CC_ITER_END:
-        if cc.hashset_contains(g2._triples, el):
-            return False
-
-    return True
-
-
-cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
-    """
-    Hash function for serialized terms (:py:class:`Buffer` objects)
-    """
-    return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
-
-
-cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
-    """
-    Hash function for sets of (serialized) triples.
-
-    This function concatenates the literal terms of the triple as bytes
-    and computes their hash.
-    """
-    trp = <BufferTriple*>key
-    seed64 = <uint64_t>seed
-    seed_dummy = seed64
-
-    cdef sph.spookyhash_context ctx
-
-    sph.spookyhash_context_init(&ctx, seed64, seed_dummy)
-    sph.spookyhash_update(&ctx, trp.s.addr, trp.s.sz)
-    sph.spookyhash_update(&ctx, trp.s.addr, trp.p.sz)
-    sph.spookyhash_update(&ctx, trp.s.addr, trp.o.sz)
-    sph.spookyhash_final(&ctx, &seed64, &seed_dummy)
-
-    return <size_t>seed64
-
-
-#cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
-#    """
-#    Hash function for sets of (serialized) triples.
-#
-#    This function computes the hash of the concatenated pointer values in the
-#    s, p, o members of the triple. The triple structure is treated as a byte
-#    string. This is safe in spite of byte-wise struct evaluation being a
-#    frowned-upon practice (due to padding issues), because it is assumed that
-#    the input value is always the same type of structure.
-#    """
-#    return <size_t>spookyhash_64(key, l, seed)
-
-
-#cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
-#    """
-#    No-op function that takes a pointer and does *not* hash it.
-#
-#    The pointer value is used as the "hash".
-#    """
-#    return <size_t>key
-
-
-cdef inline bint lookup_none_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Dummy callback for queries with all parameters unbound.
-
-    This function always returns ``True`` 
-    """
-    return True
-
-
-cdef inline bint lookup_s_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``s`` in a triple.
-
-    The function returns ``True`` if ``t1`` matches the first term.
-
-    ``t2`` is not used and is declared only for compatibility with the
-    other interchangeable functions.
-    """
-    return not term_cmp_fn(t1, trp[0].s)
-
-
-cdef inline bint lookup_p_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``p`` in a triple.
-    """
-    return not term_cmp_fn(t1, trp[0].p)
-
-
-cdef inline bint lookup_o_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``o`` in a triple.
-    """
-    return not term_cmp_fn(t1, trp[0].o)
-
-
-cdef inline bint lookup_sp_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``s`` and ``p`` pair.
-    """
-    return (
-            not term_cmp_fn(t1, trp[0].s)
-            and not term_cmp_fn(t2, trp[0].p))
-
-
-cdef inline bint lookup_so_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``s`` and ``o`` pair.
-    """
-    return (
-            not term_cmp_fn(t1, trp[0].s)
-            and not term_cmp_fn(t2, trp[0].o))
-
-
-cdef inline bint lookup_po_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``p`` and ``o`` pair.
-    """
-    return (
-            not term_cmp_fn(t1, trp[0].p)
-            and not term_cmp_fn(t2, trp[0].o))
-
-
-## LOOKUP CALLBACK FUNCTIONS
-
-cdef inline void add_trp_callback(
-    Graph gr, const TripleKey* spok_p, void* ctx
-):
-    """
-    Add a triple to a graph as a result of a lookup callback.
-    """
-    gr.keys.add(spok_p)
-
-
-cdef inline void del_trp_callback(
-    Graph gr, const TripleKey* spok_p, void* ctx
-):
-    """
-    Remove a triple from a graph as a result of a lookup callback.
-    """
-    #logger.info('removing triple: {} {} {}'.format(
-    #    buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
-    #))
-    gr.keys.remove(spok_p)
-
-

+ 11 - 26
lakesuperior/model/graph/graph.pxd

@@ -4,49 +4,34 @@ from cymem.cymem cimport Pool
 
 
 cimport lakesuperior.cy_include.collections as cc
 cimport lakesuperior.cy_include.collections as cc
 
 
-from lakesuperior.model.base cimport Buffer, TripleKey
+from lakesuperior.model.base cimport Key, TripleKey
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.structures.keyset cimport Keyset
 from lakesuperior.model.structures.keyset cimport Keyset
-
-# Lookup function that returns whether a triple contains a match pattern.
-# Return True if the triple exists, False otherwise.
-ctypedef bint (*lookup_fn_t)(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2)
+from lakesuperior.store.ldp_rs cimport lmdb_triplestore
 
 
 # Callback for an iterator.
 # Callback for an iterator.
 ctypedef void (*lookup_callback_fn_t)(
 ctypedef void (*lookup_callback_fn_t)(
-    Graph gr, const BufferTriple* trp, void* ctx
+    Graph gr, const TripleKey* spok_p, void* ctx
 )
 )
 
 
-ctypedef Buffer SPOBuffer[3]
-ctypedef Buffer *BufferPtr
-
 cdef class Graph:
 cdef class Graph:
     cdef:
     cdef:
-        cc.HashSet *_terms # Set of unique serialized terms.
-        cc.HashSet *_triples # Set of unique triples.
-        # Temp data pool. It gets managed with the object lifecycle via cymem.
-        Pool pool
+        lmdb_triplestore.LmdbTriplestore store
         Keyset keys
         Keyset keys
 
 
         cc.key_compare_ft term_cmp_fn
         cc.key_compare_ft term_cmp_fn
         cc.key_compare_ft trp_cmp_fn
         cc.key_compare_ft trp_cmp_fn
 
 
-        bint trp_contains(self, const BufferTriple* btrp)
-
-        # Basic graph operations.
-        void ip_union(self, Graph other) except *
-        void ip_subtraction(self, Graph other) except *
-        void ip_intersection(self, Graph other) except *
-        void ip_xor(self, Graph other) except *
-        Graph empty_copy(self)
+        Graph copy(self, str uri=*)
+        Graph empty_copy(self, str uri=*)
         void _match_ptn_callback(
         void _match_ptn_callback(
             self, pattern, Graph gr,
             self, pattern, Graph gr,
             lookup_callback_fn_t callback_fn, void* ctx=*
             lookup_callback_fn_t callback_fn, void* ctx=*
         ) except *
         ) except *
 
 
-    cpdef union_(self, Graph other)
-    cpdef subtraction(self, Graph other)
-    cpdef intersection(self, Graph other)
-    cpdef xor(self, Graph other)
     cpdef void set(self, tuple trp) except *
     cpdef void set(self, tuple trp) except *
+
+
+cdef:
+    void add_trp_callback(Graph gr, const TripleKey* spok_p, void* ctx)
+    void del_trp_callback(Graph gr, const TripleKey* spok_p, void* ctx)

+ 139 - 284
lakesuperior/model/graph/graph.pyx

@@ -10,13 +10,14 @@ from libc.stdlib cimport free
 from cymem.cymem cimport Pool
 from cymem.cymem cimport Pool
 
 
 cimport lakesuperior.cy_include.collections as cc
 cimport lakesuperior.cy_include.collections as cc
-cimport lakesuperior.model.graph.callbacks as cb
+cimport lakesuperior.model.structures.callbacks as cb
+cimport lakesuperior.model.structures.keyset as kset
 
 
-from lakesuperior.model.base cimport Buffer, buffer_dump
-from lakesuperior.model.structures.keyset cimport Keyset
+from lakesuperior.model.base cimport Key, TripleKey
 from lakesuperior.model.graph cimport term
 from lakesuperior.model.graph cimport term
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.structures.hash cimport term_hash_seed32
 from lakesuperior.model.structures.hash cimport term_hash_seed32
+from lakesuperior.model.structures.keyset cimport Keyset
 
 
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 
 
@@ -28,27 +29,52 @@ cdef class Graph:
     Most functions should mimic RDFLib's graph with less overhead. It uses
     Most functions should mimic RDFLib's graph with less overhead. It uses
     the same funny but functional slicing notation.
     the same funny but functional slicing notation.
 
 
-    A Graph can be instantiated from a store lookup. This makes it
-    possible to use a Keyset to perform initial filtering via identity by key,
-    then the filtered Keyset can be converted into a set of meaningful terms.
-
-    An instance of this class can also be converted to and from a
-    ``rdflib.Graph`` instance.
+    A Graph contains a :py:class:`lakesuperior.model.structures.keyset.Keyset`
+    at its core and is bound to a
+    :py:class:`~lakesuperior.store.ldp_rs.lmdb_triplestore.LmdbTriplestore`.
+    This makes lookups and boolean operations very efficient because all these
+    operations are performed on an array of integers.
+
+    In order to retrieve RDF values from a ``Graph``, the underlying store
+    must be looked up. This can be done in a different transaction than the
+    one used to create or otherwise manipulate the graph.
+
+    Every time a term is looked up or added to even a temporary graph, that
+    term is added to the store and creates a key. This is because in the
+    majority of cases that term is bound to be stored permanently anyway, and
+    it's more efficient to hash it and allocate it immediately. A cleanup
+    function to remove all orphaned terms (not in any triple or context index)
+    can be later devised to compact the database.
+
+    An instance of this class can also be converted to a ``rdflib.Graph``
+    instance.
     """
     """
 
 
     def __cinit__(
     def __cinit__(
         self, store, size_t ct=0, str uri=None, set data=set()
         self, store, size_t ct=0, str uri=None, set data=set()
     ):
     ):
         """
         """
-        Initialize the graph, optionally with Python data.
+        Initialize the graph, optionally from Python/RDFlib data.
+
+        :type store: lakesuperior.store.ldp_rs.lmdb_triplestore.LmdbTriplestore
+        :param store: Triplestore where keys are mapped to terms. By default
+            this is the default application store
+            (``env.app_globals.rdf_store``).
 
 
-        :param set data: Initial data as a set of 3-tuples of RDFLib terms.
+        :param size_t ct: Initial number of allocated triples.
+
+        :param str uri: If specified, the graph becomes a named graph and can
+            utilize the :py:meth:`value()` method and special slicing notation.
+
+        :param set data: If specified, ``ct`` is ignored and an initial key
+            set is created from a set of 3-tuples of :py:class:``rdflib.Term``
+            instances.
         """
         """
 
 
         self.pool = Pool()
         self.pool = Pool()
 
 
         if not store:
         if not store:
-            store = env.app_globals.ldprs_store
+            store = env.app_globals.rdf_store
         # Initialize empty data set.
         # Initialize empty data set.
         if data:
         if data:
             # Populate with provided Python set.
             # Populate with provided Python set.
@@ -73,7 +99,7 @@ cdef class Graph:
     @property
     @property
     def data(self):
     def data(self):
         """
         """
-        Triple data as a Python set.
+        Triple data as a Python/RDFlib set.
 
 
         :rtype: set
         :rtype: set
         """
         """
@@ -82,11 +108,11 @@ cdef class Graph:
         ret = set()
         ret = set()
 
 
         self.seek()
         self.seek()
-        while self.get_next(&spok):
-            ret.add((
-                self.store.from_key(trp[0]),
-                self.store.from_key(trp[1]),
-                self.store.from_key(trp[2])
+        while self.keys.get_next(&spok):
+            ret.keys.add((
+                self.store.from_key(spok[0]),
+                self.store.from_key(spok[1]),
+                self.store.from_key(spok[2])
             ))
             ))
 
 
         return ret
         return ret
@@ -101,7 +127,7 @@ cdef class Graph:
 
 
     def __eq__(self, other):
     def __eq__(self, other):
         """ Equality operator between ``Graph`` instances. """
         """ Equality operator between ``Graph`` instances. """
-        return len(self ^ other) == 0
+        return len(self & other) == 0
 
 
 
 
     def __repr__(self):
     def __repr__(self):
@@ -125,55 +151,74 @@ cdef class Graph:
 
 
     def __add__(self, other):
     def __add__(self, other):
         """ Alias for set-theoretical union. """
         """ Alias for set-theoretical union. """
-        return self.union_(other)
+        return self.__or__(other)
 
 
 
 
     def __iadd__(self, other):
     def __iadd__(self, other):
         """ Alias for in-place set-theoretical union. """
         """ Alias for in-place set-theoretical union. """
-        self.ip_union(other)
-        return self
+        return self.__ior__(other)
 
 
 
 
     def __sub__(self, other):
     def __sub__(self, other):
         """ Set-theoretical subtraction. """
         """ Set-theoretical subtraction. """
-        return self.subtraction(other)
+        cdef Graph gr3 = self.empty_copy()
+
+        gr3.keys = kset.subtract(self.keys, other.keys)
+
+        return gr3
 
 
 
 
     def __isub__(self, other):
     def __isub__(self, other):
         """ In-place set-theoretical subtraction. """
         """ In-place set-theoretical subtraction. """
-        self.ip_subtraction(other)
+        self.keys = kset.subtract(self.keys, other.keys)
+
         return self
         return self
 
 
     def __and__(self, other):
     def __and__(self, other):
         """ Set-theoretical intersection. """
         """ Set-theoretical intersection. """
-        return self.intersection(other)
+        cdef Graph gr3 = self.empty_copy()
+
+        gr3.keys = kset.intersect(self.keys, other.keys)
+
+        return gr3
 
 
 
 
     def __iand__(self, other):
     def __iand__(self, other):
         """ In-place set-theoretical intersection. """
         """ In-place set-theoretical intersection. """
-        self.ip_intersection(other)
+        self.keys = kset.intersect(self.keys, other.keys)
+
         return self
         return self
 
 
 
 
     def __or__(self, other):
     def __or__(self, other):
         """ Set-theoretical union. """
         """ Set-theoretical union. """
-        return self.union_(other)
+        cdef Graph gr3 = self.copy()
+
+        gr3.keys = kset.merge(self.keys, other.keys)
+
+        return gr3
 
 
 
 
     def __ior__(self, other):
     def __ior__(self, other):
         """ In-place set-theoretical union. """
         """ In-place set-theoretical union. """
-        self.ip_union(other)
+        self.keys = kset.merge(self.keys, other.keys)
+
         return self
         return self
 
 
 
 
     def __xor__(self, other):
     def __xor__(self, other):
         """ Set-theoretical exclusive disjunction (XOR). """
         """ Set-theoretical exclusive disjunction (XOR). """
-        return self.xor(other)
+        cdef Graph gr3 = self.empty_copy()
+
+        gr3.keys = kset.xor(self.keys, other.keys)
+
+        return gr3
 
 
 
 
     def __ixor__(self, other):
     def __ixor__(self, other):
         """ In-place set-theoretical exclusive disjunction (XOR). """
         """ In-place set-theoretical exclusive disjunction (XOR). """
-        self.ip_xor(other)
+        self.keys = kset.xor(self.keys, other.keys)
+
         return self
         return self
 
 
 
 
@@ -191,7 +236,7 @@ cdef class Graph:
             self.store.to_key(trp[2]),
             self.store.to_key(trp[2]),
         ]
         ]
 
 
-        return self.data.contains(&spok)
+        return self.keys.contains(&spok)
 
 
 
 
     def __iter__(self):
     def __iter__(self):
@@ -261,7 +306,7 @@ cdef class Graph:
         return {r[i] for r in self.data}
         return {r[i] for r in self.data}
 
 
 
 
-    def add_triples(self, trp):
+    def add_triples(self, triples):
         """
         """
         Add triples to the graph.
         Add triples to the graph.
 
 
@@ -269,12 +314,15 @@ cdef class Graph:
 
 
         :param iterable triples: iterable of 3-tuple triples.
         :param iterable triples: iterable of 3-tuple triples.
         """
         """
+        cdef TripleKey spok
+
         for s, p, o in triples:
         for s, p, o in triples:
-            self.keys.add([
+            spok = [
                 self.store.to_key(s),
                 self.store.to_key(s),
                 self.store.to_key(p),
                 self.store.to_key(p),
                 self.store.to_key(o),
                 self.store.to_key(o),
-            ], True)
+            ]
+            self.keys.add(&spok, True)
 
 
 
 
     def remove(self, pattern):
     def remove(self, pattern):
@@ -284,240 +332,32 @@ cdef class Graph:
         The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
         The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
         """
         """
         self._match_ptn_callback(
         self._match_ptn_callback(
-            pattern, self, cb.del_trp_callback, NULL
+            pattern, self, del_trp_callback, NULL
         )
         )
 
 
 
 
     ## CYTHON-ACCESSIBLE BASIC METHODS ##
     ## CYTHON-ACCESSIBLE BASIC METHODS ##
 
 
-    cdef Graph empty_copy(self):
-        """
-        Create an empty copy carrying over some key properties.
-
-        Override in subclasses to accommodate for different init properties.
+    cdef Graph copy(self, str uri=None):
         """
         """
-        return self.__class__(self.ct, self.store, uri=self.id)
-
+        Create copy of the graph with a different (or no) URI.
 
 
-    cpdef union_(self, Graph other):
+        :param str uri: URI of the new graph. This should be different from
+            the original.
         """
         """
-        Perform set union resulting in a new Graph instance.
+        cdef Graph new_gr = Graph(self.store, self.ct, uri=uri)
 
 
-        TODO Allow union of multiple graphs at a time.
+        new_gr.keys = self.keys.copy()
 
 
-        :param Graph other: The other graph to merge.
 
 
-        :rtype: Graph
-        :return: A new Graph instance.
+    cdef Graph empty_copy(self, str uri=None):
         """
         """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-            BufferTriple *trp
-
-        new_gr = self.empty_copy()
+        Create an empty copy with same capacity and store binding.
 
 
-        for gr in (self, other):
-            cc.hashset_iter_init(&it, gr._triples)
-            while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-                bt = <BufferTriple*>cur
-                new_gr.add_triple(bt, True)
-
-        return new_gr
-
-
-    cdef void ip_union(self, Graph other) except *:
-        """
-        Perform an in-place set union that adds triples to this instance
-
-        TODO Allow union of multiple graphs at a time.
-
-        :param Graph other: The other graph to merge.
-
-        :rtype: void
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        cc.hashset_iter_init(&it, other._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            self.add_triple(bt, True)
-
-
-    cpdef intersection(self, Graph other):
-        """
-        Graph intersection.
-
-        :param Graph other: The other graph to intersect.
-
-        :rtype: Graph
-        :return: A new Graph instance.
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        new_gr = self.empty_copy()
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if other.trp_contains(bt):
-                new_gr.add_triple(bt, True)
-
-        return new_gr
-
-
-    cdef void ip_intersection(self, Graph other) except *:
+        :param str uri: URI of the new graph. This should be different from
+            the original.
         """
         """
-        In-place graph intersection.
-
-        Triples not in common with another graph are removed from the current
-        one.
-
-        :param Graph other: The other graph to intersect.
-
-        :rtype: void
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if not other.trp_contains(bt):
-                self.remove_triple(bt)
-
-
-    cpdef subtraction(self, Graph other):
-        """
-        Graph set-theoretical subtraction.
-
-        Create a new graph with the triples of this graph minus the ones in
-        common with the other graph.
-
-        :param Graph other: The other graph to subtract to this.
-
-        :rtype: Graph
-        :return: A new Graph instance.
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        new_gr = self.empty_copy()
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if not other.trp_contains(bt):
-                new_gr.add_triple(bt, True)
-
-        return new_gr
-
-
-    cdef void ip_subtraction(self, Graph other) except *:
-        """
-        In-place graph subtraction.
-
-        Triples in common with another graph are removed from the current one.
-
-        :param Graph other: The other graph to intersect.
-
-        :rtype: void
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if other.trp_contains(bt):
-                self.remove_triple(bt)
-
-
-    cpdef xor(self, Graph other):
-        """
-        Graph Exclusive disjunction (XOR).
-
-        :param Graph other: The other graph to perform XOR with.
-
-        :rtype: Graph
-        :return: A new Graph instance.
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-            BufferTriple* bt
-
-        new_gr = self.empty_copy()
-
-        # Add triples in this and not in other.
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if not other.trp_contains(bt):
-                new_gr.add_triple(bt, True)
-
-        # Other way around.
-        cc.hashset_iter_init(&it, other._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if not self.trp_contains(bt):
-                new_gr.add_triple(bt, True)
-
-        return new_gr
-
-
-    cdef void ip_xor(self, Graph other) except *:
-        """
-        In-place graph XOR.
-
-        Triples in common with another graph are removed from the current one,
-        and triples not in common will be added from the other one.
-
-        :param Graph other: The other graph to perform XOR with.
-
-        :rtype: void
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-            # TODO This could be more efficient to stash values in a simple
-            # array, but how urgent is it to improve an in-place XOR?
-            Graph tmp = Graph()
-
-        # Add *to the tmp graph* triples in other graph and not in this graph.
-        cc.hashset_iter_init(&it, other._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if not self.trp_contains(bt):
-                tmp.add_triple(bt)
-
-        # Remove triples in common.
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if other.trp_contains(bt):
-                self.remove_triple(bt)
-
-        self |= tmp
-
-
-    cdef bint trp_contains(self, const BufferTriple* btrp):
-        cdef:
-            cc.HashSetIter it
-            void* cur
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            if self.trp_cmp_fn(cur, btrp) == 0:
-                return True
-        return False
+        return Graph(self.store, self.ct, uri=uri)
 
 
 
 
     cpdef void set(self, tuple trp) except *:
     cpdef void set(self, tuple trp) except *:
@@ -594,11 +434,10 @@ cdef class Graph:
         "return: New Graph instance with matching triples.
         "return: New Graph instance with matching triples.
         """
         """
         cdef:
         cdef:
-            void* cur
-            BufferTriple trp
-            Graph res_gr = Graph()
+            Graph res_gr = self.empty_copy()
 
 
-        self._match_ptn_callback(pattern, res_gr, cb.add_trp_callback, NULL)
+        self._match_ptn_callback(pattern, res_gr, add_trp_callback, NULL)
+        res_gr.data.resize()
 
 
         return res_gr
         return res_gr
 
 
@@ -615,14 +454,8 @@ cdef class Graph:
         or a different one.
         or a different one.
         """
         """
         cdef:
         cdef:
-            void* cur
-            Buffer t1, t2
-            Buffer ss, sp, so
-            BufferTriple trp
-            BufferTriple* trp_p
-            lookup_fn_t cmp_fn
-            cc.HashSetIter it
-
+            kset.key_cmp_fn_t cmp_fn
+            Key k1, k2, sk, pk, ok
             TripleKey spok
             TripleKey spok
 
 
         s, p, o = pattern
         s, p, o = pattern
@@ -636,37 +469,59 @@ cdef class Graph:
                 self.store.to_key(o),
                 self.store.to_key(o),
             ]
             ]
 
 
-            if self.keys.contains(spok):
+            if self.keys.contains(&spok):
                 callback_fn(gr, &spok, ctx)
                 callback_fn(gr, &spok, ctx)
                 return
                 return
 
 
         if s is not None:
         if s is not None:
-            term.serialize_from_rdflib(s, &t1)
+            k1 = self.store.to_key(s)
             if p is not None:
             if p is not None:
-                cmp_fn = cb.lookup_sp_cmp_fn
-                term.serialize_from_rdflib(p, &t2)
+                cmp_fn = cb.lookup_skpk_cmp_fn
+                k2 = self.store.to_key(p)
             elif o is not None:
             elif o is not None:
-                cmp_fn = cb.lookup_so_cmp_fn
-                term.serialize_from_rdflib(o, &t2)
+                cmp_fn = cb.lookup_skok_cmp_fn
+                k2 = self.store.to_key(o)
             else:
             else:
-                cmp_fn = cb.lookup_s_cmp_fn
+                cmp_fn = cb.lookup_sk_cmp_fn
         elif p is not None:
         elif p is not None:
-            term.serialize_from_rdflib(p, &t1)
+            k1 = self.store.to_key(p)
             if o is not None:
             if o is not None:
-                cmp_fn = cb.lookup_po_cmp_fn
-                term.serialize_from_rdflib(o, &t2)
+                cmp_fn = cb.lookup_pkok_cmp_fn
+                k2 = self.store.to_key(o)
             else:
             else:
-                cmp_fn = cb.lookup_p_cmp_fn
+                cmp_fn = cb.lookup_pk_cmp_fn
         elif o is not None:
         elif o is not None:
-            cmp_fn = cb.lookup_o_cmp_fn
-            term.serialize_from_rdflib(o, &t1)
+            cmp_fn = cb.lookup_ok_cmp_fn
+            k1 = self.store.to_key(o)
         else:
         else:
             cmp_fn = cb.lookup_none_cmp_fn
             cmp_fn = cb.lookup_none_cmp_fn
 
 
         # Iterate over serialized triples.
         # Iterate over serialized triples.
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            trp_p = <BufferTriple*>cur
-            if cmp_fn(trp_p, &t1, &t2):
-                callback_fn(gr, trp_p, ctx)
+        while self.keys.get_next(&spok):
+            if cmp_fn(&spok, k1, k2):
+                callback_fn(gr, &spok, ctx)
+
+
+
+## LOOKUP CALLBACK FUNCTIONS
+
+cdef inline void add_trp_callback(
+    Graph gr, const TripleKey* spok_p, void* ctx
+):
+    """
+    Add a triple to a graph as a result of a lookup callback.
+    """
+    gr.keys.add(spok_p)
+
+
+cdef inline void del_trp_callback(
+    Graph gr, const TripleKey* spok_p, void* ctx
+):
+    """
+    Remove a triple from a graph as a result of a lookup callback.
+    """
+    #logger.info('removing triple: {} {} {}'.format(
+    #    buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
+    #))
+    gr.keys.remove(spok_p)
 
 

+ 15 - 6
lakesuperior/model/structures/callbacks.pxd

@@ -2,20 +2,29 @@ from lakesuperior.model.base cimport Key, TripleKey
 
 
 cdef:
 cdef:
     bint lookup_sk_cmp_fn(
     bint lookup_sk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
+        const TripleKey* spok, const Key k1, const Key k2
     )
     )
+
     bint lookup_pk_cmp_fn(
     bint lookup_pk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
+        const TripleKey* spok, const Key k1, const Key k2
     )
     )
+
     bint lookup_ok_cmp_fn(
     bint lookup_ok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
+        const TripleKey* spok, const Key k1, const Key k2
     )
     )
+
     bint lookup_skpk_cmp_fn(
     bint lookup_skpk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
+        const TripleKey* spok, const Key k1, const Key k2
     )
     )
+
     bint lookup_skok_cmp_fn(
     bint lookup_skok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
+        const TripleKey* spok, const Key k1, const Key k2
     )
     )
+
     bint lookup_pkok_cmp_fn(
     bint lookup_pkok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
+        const TripleKey* spok, const Key k1, const Key k2
+    )
+
+    bint lookup_none_cmp_fn(
+        const TripleKey* spok, const Key k1, const Key k2
     )
     )

+ 39 - 24
lakesuperior/model/structures/callbacks.pyx

@@ -1,39 +1,54 @@
 from lakesuperior.model.base cimport Key, TripleKey
 from lakesuperior.model.base cimport Key, TripleKey
 
 
-cdef bint lookup_sk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
+cdef inline bint lookup_sk_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
     """ Keyset lookup for S key. """
     """ Keyset lookup for S key. """
-    return spok[0] == k1
+    return spok[0][0] == k1
 
 
-cdef bint lookup_pk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
+
+cdef inline bint lookup_pk_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
     """ Keyset lookup for P key. """
     """ Keyset lookup for P key. """
-    return spok[1] == k1
+    return spok[0][1] == k1
+
 
 
-cdef bint lookup_ok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
+cdef inline bint lookup_ok_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
     """ Keyset lookup for O key. """
     """ Keyset lookup for O key. """
-    return spok[2] == k1
+    return spok[0][2] == k1
 
 
-cdef bint lookup_skpk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
+
+cdef inline bint lookup_skpk_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
     """ Keyset lookup for S and P keys. """
     """ Keyset lookup for S and P keys. """
-    return spok[0] == k1 and spok[1] == k2
+    return spok[0][0] == k1 and spok[0][1] == k2
+
 
 
-cdef bint lookup_skok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
+cdef inline bint lookup_skok_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
     """ Keyset lookup for S and O keys. """
     """ Keyset lookup for S and O keys. """
-    return spok[0] == k1 and spok[2] == k2
+    return spok[0][0] == k1 and spok[0][2] == k2
 
 
-cdef bint lookup_pkok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
+
+cdef inline bint lookup_pkok_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
     """ Keyset lookup for P and O keys. """
     """ Keyset lookup for P and O keys. """
-    return spok[1] == k1 and spok[2] == k2
+    return spok[0][1] == k1 and spok[0][2] == k2
+
+
+cdef inline bint lookup_none_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
+    """
+    Dummy callback for queries with all parameters unbound.
 
 
+    This function always returns ``True`` 
+    """
+    return True
 
 

+ 10 - 5
lakesuperior/model/structures/keyset.pxd

@@ -3,7 +3,7 @@ from lakesuperior.model.base cimport (
 )
 )
 
 
 ctypedef bint (*key_cmp_fn_t)(
 ctypedef bint (*key_cmp_fn_t)(
-    const TripleKey* spok, const Key* k1, const Key* k2
+    const TripleKey* spok, const Key k1, const Key k2
 )
 )
 
 
 cdef class Keyset:
 cdef class Keyset:
@@ -22,11 +22,16 @@ cdef class Keyset:
         size_t size(self)
         size_t size(self)
         size_t tell(self)
         size_t tell(self)
         bint get_next(self, TripleKey* item)
         bint get_next(self, TripleKey* item)
-        void add(self, const TripleKey* val) except *
+        void add(self, const TripleKey* val, bint check_dup=*) except *
         void remove(self, const TripleKey* val) except *
         void remove(self, const TripleKey* val) except *
         bint contains(self, const TripleKey* val)
         bint contains(self, const TripleKey* val)
         Keyset copy(self)
         Keyset copy(self)
+        Keyset sparse_copy(self)
         void resize(self, size_t size=*) except *
         void resize(self, size_t size=*) except *
-        Keyset lookup(
-            self, const Key* sk, const Key* pk, const Key* ok
-        )
+        Keyset lookup(self, const Key sk, const Key pk, const Key ok)
+
+cdef:
+    Keyset merge(Keyset ks1, Keyset ks2)
+    Keyset subtract(Keyset ks1, Keyset ks2)
+    Keyset intersect(Keyset ks1, Keyset ks2)
+    Keyset xor(Keyset ks1, Keyset ks2)

+ 123 - 33
lakesuperior/model/structures/keyset.pyx

@@ -15,7 +15,7 @@ cdef class Keyset:
     """
     """
     Pre-allocated array (not set, as the name may suggest) of ``TripleKey``s.
     Pre-allocated array (not set, as the name may suggest) of ``TripleKey``s.
     """
     """
-    def __cinit__(self, size_t ct=0, expand_ratio=.5, *args, **kwargs):
+    def __cinit__(self, size_t ct=0, expand_ratio=.5):
         """
         """
         Initialize and allocate memory for the data set.
         Initialize and allocate memory for the data set.
 
 
@@ -67,7 +67,7 @@ cdef class Keyset:
         return self._cur
         return self._cur
 
 
 
 
-    cdef bint get_next(self, TripleKey* item):
+    cdef bint get_next(self, TripleKey* val):
         """
         """
         Populate the current value and advance the cursor by 1.
         Populate the current value and advance the cursor by 1.
 
 
@@ -81,25 +81,25 @@ cdef class Keyset:
         if self._cur >= self._free_i:
         if self._cur >= self._free_i:
             return False
             return False
 
 
-        item[0] = self.data[self._cur]
+        val[0] = self.data[self._cur]
         self._cur += 1
         self._cur += 1
 
 
         return True
         return True
 
 
 
 
-    cdef void add(self, const TripleKey* val, check_dup=False) except *:
+    cdef void add(self, const TripleKey* val, bint check_dup=False) except *:
         """
         """
         Add a triple key to the array.
         Add a triple key to the array.
         """
         """
-        # Optionally check for duplicates.
-        if check_dup and self.contains(val):
+        # Check for deleted triples and optionally duplicates.
+        if val[0] == NULL_TRP or (check_dup and self.contains(val)):
             return
             return
 
 
         if self._free_i >= self.threshod:
         if self._free_i >= self.threshod:
             if self.expand_ratio > 0:
             if self.expand_ratio > 0:
                 # In some edge casees, a very small ratio may round down to a
                 # In some edge casees, a very small ratio may round down to a
                 # zero increase, so the baseline increase is 1 element.
                 # zero increase, so the baseline increase is 1 element.
-                self.resize(1 + self.ct * (1 + self.expand_ratio))
+                self.resize(1 + <size_t>(self.ct * (1 + self.expand_ratio)))
             else:
             else:
                 raise MemoryError('No space left in key set.')
                 raise MemoryError('No space left in key set.')
 
 
@@ -117,32 +117,15 @@ cdef class Keyset:
         forseen, using :py:meth:`subtract`_ is advised.
         forseen, using :py:meth:`subtract`_ is advised.
         """
         """
 
 
-        cdef TripleKey stored_val
+        cdef TripleKey* stored_val
 
 
         self.seek()
         self.seek()
-        while self.get_next(&stored_val):
+        while self.get_next(stored_val):
             if memcmp(val, stored_val, TRP_KLEN) == 0:
             if memcmp(val, stored_val, TRP_KLEN) == 0:
                 stored_val[0] = NULL_TRP
                 stored_val[0] = NULL_TRP
                 return
                 return
 
 
 
 
-    cdef Keyset subtract(self, const Keyset* other):
-        """
-        Create a Keyset by subtracting an``other`` Keyset from the current one.
-
-        :rtype: Keyset
-        """
-        cdef Keyset res = Keyset(self.ct)
-
-        self.seek()
-        while self.get_next(&val):
-            if not other.contains(val):
-                res.add(val)
-        res.resize()
-
-        return res
-
-
     cdef bint contains(self, const TripleKey* val):
     cdef bint contains(self, const TripleKey* val):
         """
         """
         Whether a value exists in the set.
         Whether a value exists in the set.
@@ -160,13 +143,34 @@ cdef class Keyset:
         """
         """
         Copy a Keyset.
         Copy a Keyset.
         """
         """
-        cdef Keyset new_ks = Keyset(self.ct)
+        cdef Keyset new_ks = Keyset(self.ct, expand_ratio=self.expand_ratio)
         memcpy(new_ks.data, self.data, self.ct * TRP_KLEN)
         memcpy(new_ks.data, self.data, self.ct * TRP_KLEN)
         new_ks.seek()
         new_ks.seek()
 
 
         return new_ks
         return new_ks
 
 
 
 
+    cdef Keyset sparse_copy(self):
+        """
+        Copy a Keyset and plug holes.
+
+        ``NULL_TRP`` values left from removing triple keys are skipped in the
+        copy and the set is shrunk to its used size.
+        """
+        cdef:
+            TripleKey val
+            Keyset new_ks = Keyset(self.ct, self.expand_ratio)
+
+        self.seek()
+        while self.get_next(&val):
+            if val != NULL_TRP:
+                new_ks.add(&val)
+
+        new_ks.resize()
+
+        return new_ks
+
+
     cdef void resize(self, size_t size=0) except *:
     cdef void resize(self, size_t size=0) except *:
         """
         """
         Change the array capacity.
         Change the array capacity.
@@ -191,9 +195,7 @@ cdef class Keyset:
         self.seek()
         self.seek()
 
 
 
 
-    cdef Keyset lookup(
-            self, const Key* sk, const Key* pk, const Key* ok
-    ):
+    cdef Keyset lookup(self, const Key sk, const Key pk, const Key ok):
         """
         """
         Look up triple keys.
         Look up triple keys.
 
 
@@ -209,8 +211,7 @@ cdef class Keyset:
         cdef:
         cdef:
             TripleKey spok
             TripleKey spok
             Keyset ret = Keyset(self.ct)
             Keyset ret = Keyset(self.ct)
-            Key* k1 = NULL
-            Key* k2 = NULL
+            Key k1, k2
             key_cmp_fn_t cmp_fn
             key_cmp_fn_t cmp_fn
 
 
         if sk and pk and ok: # s p o
         if sk and pk and ok: # s p o
@@ -247,9 +248,98 @@ cdef class Keyset:
 
 
         self.seek()
         self.seek()
         while self.get_next(&spok):
         while self.get_next(&spok):
-            if cmp_fn(<TripleKey*>spok, k1, k2):
+            if cmp_fn(&spok, k1, k2):
                 ret.add(&spok)
                 ret.add(&spok)
 
 
         ret.resize()
         ret.resize()
 
 
         return ret
         return ret
+
+
+
+## Boolean operations.
+
+cdef Keyset merge(Keyset ks1, Keyset ks2):
+    """
+    Create a Keyset by merging an``ks2`` Keyset with the current one.
+
+    :rtype: Keyset
+    """
+    cdef:
+        TripleKey val
+        Keyset ks3 = ks1.copy()
+
+    ks2.seek()
+    while ks2.get_next(&val):
+        ks3.add(&val, True)
+
+    ks3.resize()
+
+    return ks3
+
+
+cdef Keyset subtract(Keyset ks1, Keyset ks2):
+    """
+    Create a Keyset by subtracting an``ks2`` Keyset from the current one.
+
+    :rtype: Keyset
+    """
+    cdef:
+        TripleKey val
+        Keyset ks3 = Keyset(ks1.ct)
+
+    ks1.seek()
+    while ks1.get_next(&val):
+        if val != NULL_TRP and not ks2.contains(&val):
+            ks3.add(&val)
+
+    ks3.resize()
+
+    return ks3
+
+
+cdef Keyset intersect(Keyset ks1, Keyset ks2):
+    """
+    Create a Keyset by intersection with an``ks2`` Keyset.
+
+    :rtype: Keyset
+    """
+    cdef:
+        TripleKey val
+        Keyset ks3 = Keyset(ks1.ct)
+
+    ks1.seek()
+    while ks1.get_next(&val):
+        if val != NULL_TRP and ks2.contains(&val):
+            ks3.add(&val)
+
+    ks3.resize()
+
+    return ks3
+
+
+cdef Keyset xor(Keyset ks1, Keyset ks2):
+    """
+    Create a Keyset by disjunction (XOR) with an``ks2`` Keyset.
+
+    :rtype: Keyset
+    """
+    cdef:
+        TripleKey val
+        Keyset ks3 = Keyset(ks1.ct + ks2.ct)
+
+    ks1.seek()
+    while ks1.get_next(&val):
+        if val != NULL_TRP and not ks2.contains(&val):
+            ks3.add(&val)
+
+    ks2.seek()
+    while ks2.get_next(&val):
+        if val != NULL_TRP and not ks1.contains(&val):
+            ks3.add(&val)
+
+    ks3.resize()
+
+    return ks3
+
+

+ 4 - 8
lakesuperior/store/ldp_rs/lmdb_triplestore.pxd

@@ -28,19 +28,15 @@ cdef class LmdbTriplestore(BaseLmdbStore):
     cpdef void _remove(self, tuple triple_pattern, context=*) except *
     cpdef void _remove(self, tuple triple_pattern, context=*) except *
     cpdef void _remove_graph(self, object gr_uri) except *
     cpdef void _remove_graph(self, object gr_uri) except *
     cpdef tuple all_namespaces(self)
     cpdef tuple all_namespaces(self)
-    cpdef Graph graph_lookup(
-        self, triple_pattern, context=*, uri=*, copy=*
-    )
 
 
     cdef:
     cdef:
-        void _add_graph(self, Buffer* pk_gr) except *
         void _index_triple(self, int op, TripleKey spok) except *
         void _index_triple(self, int op, TripleKey spok) except *
-        Keyset triple_keys(self, tuple triple_pattern, context=*)
+        Graph triple_keys(self, tuple triple_pattern, str context=*, str uri=*)
         void _all_term_keys(self, term_type, cc.HashSet** tkeys) except *
         void _all_term_keys(self, term_type, cc.HashSet** tkeys) except *
         void lookup_term(self, const Key* tk, Buffer* data) except *
         void lookup_term(self, const Key* tk, Buffer* data) except *
-        Keyset _lookup(self, tuple triple_pattern)
-        Keyset _lookup_1bound(self, unsigned char idx, Key luk)
-        Keyset _lookup_2bound(
+        Graph _lookup(self, tuple triple_pattern)
+        Graph _lookup_1bound(self, unsigned char idx, Key luk)
+        Graph _lookup_2bound(
             self, unsigned char idx1, unsigned char idx2, DoubleKey tks
             self, unsigned char idx1, unsigned char idx2, DoubleKey tks
         )
         )
         object from_key(self, const Key tk)
         object from_key(self, const Key tk)

+ 23 - 19
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx

@@ -280,7 +280,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
 
     cpdef add_graph(self, c):
     cpdef add_graph(self, c):
         """
         """
-        Add a graph to the database.
+        Add a graph (context) to the database.
 
 
         This creates an empty graph by associating the graph URI with the
         This creates an empty graph by associating the graph URI with the
         pickled `None` value. This prevents from removing the graph when all
         pickled `None` value. This prevents from removing the graph when all
@@ -297,7 +297,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             c = c.identifier
             c = c.identifier
 
 
         ck = self.to_key(c)
         ck = self.to_key(c)
-        if not self._key_exists(chash, HLEN, b'th:t'):
+        if not self._key_exists(<unsigned char*>ck, KLEN, b'c:'):
             # Insert context term if not existing.
             # Insert context term if not existing.
             if self.is_txn_rw:
             if self.is_txn_rw:
                 _txn = self.txn
                 _txn = self.txn
@@ -351,7 +351,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             if context is not None:
             if context is not None:
                 ck_v.mv_data = &ck
                 ck_v.mv_data = &ck
                 ck_v.mv_size = KLEN
                 ck_v.mv_size = KLEN
-                while match_set.get_next(&spok_cur):
+                while match_set.keys.get_next(&spok_cur):
                     spok_v.mv_data = spok_cur
                     spok_v.mv_data = spok_cur
                     # Delete spo:c entry.
                     # Delete spo:c entry.
                     try:
                     try:
@@ -383,13 +383,13 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                             _check(lmdb.mdb_cursor_get(
                             _check(lmdb.mdb_cursor_get(
                                 dcur, &spok_v, NULL, lmdb.MDB_SET))
                                 dcur, &spok_v, NULL, lmdb.MDB_SET))
                         except KeyNotFoundError:
                         except KeyNotFoundError:
-                            self._index_triple(IDX_OP_REMOVE, <TripleKey>spok_cur)
+                            self._index_triple(IDX_OP_REMOVE, spok_cur)
 
 
             # If no context is specified, remove all associations.
             # If no context is specified, remove all associations.
             else:
             else:
                 logger.debug('Removing triples in all contexts.')
                 logger.debug('Removing triples in all contexts.')
                 # Loop over all SPO matching the triple pattern.
                 # Loop over all SPO matching the triple pattern.
-                while match_set.get_next(&spok_cur):
+                while match_set.keys.get_next(&spok_cur):
                     spok_v.mv_data = spok_cur
                     spok_v.mv_data = spok_cur
                     # Loop over all context associations for this SPO.
                     # Loop over all context associations for this SPO.
                     try:
                     try:
@@ -425,7 +425,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                             pass
                             pass
                         else:
                         else:
                             lmdb.mdb_cursor_del(dcur, lmdb.MDB_NODUPDATA)
                             lmdb.mdb_cursor_del(dcur, lmdb.MDB_NODUPDATA)
-                            self._index_triple(IDX_OP_REMOVE, <TripleKey>spok_cur)
+                            self._index_triple(IDX_OP_REMOVE, spok_cur)
 
 
         finally:
         finally:
             self._cur_close(dcur)
             self._cur_close(dcur)
@@ -617,8 +617,8 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         cur = self._cur_open('spo:c')
         cur = self._cur_open('spo:c')
         try:
         try:
             key_v.mv_size = TRP_KLEN
             key_v.mv_size = TRP_KLEN
-            rset.seek()
-            while rset.get_next(&it_cur):
+            rset.keys.seek()
+            while rset.keys.get_next(&it_cur):
                 key_v.mv_data = it_cur
                 key_v.mv_data = it_cur
                 # Get contexts associated with each triple.
                 # Get contexts associated with each triple.
                 contexts = []
                 contexts = []
@@ -645,7 +645,9 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             self._cur_close(cur)
             self._cur_close(cur)
 
 
 
 
-    cdef Graph triple_keys(self, tuple triple_pattern, context=None, uri):
+    cdef Graph triple_keys(
+        self, tuple triple_pattern, str context=None, str uri=None
+    ):
         """
         """
         Top-level lookup method.
         Top-level lookup method.
 
 
@@ -697,7 +699,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                         #logger.debug('spok / ck pair not found.')
                         #logger.debug('spok / ck pair not found.')
                         return Graph(self)
                         return Graph(self)
                     ret = Graph(self, 1)
                     ret = Graph(self, 1)
-                    ret.add(&spok)
+                    ret.keys.add(&spok)
 
 
                     return ret
                     return ret
 
 
@@ -720,7 +722,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                         # Loop over page data.
                         # Loop over page data.
                         spok_page = <TripleKey*>data_v.mv_data
                         spok_page = <TripleKey*>data_v.mv_data
                         for i in range(data_v.mv_size // TRP_KLEN):
                         for i in range(data_v.mv_size // TRP_KLEN):
-                            ret.add(spok_page + i)
+                            ret.keys.add(spok_page + i)
 
 
                         try:
                         try:
                             # Get next page.
                             # Get next page.
@@ -742,7 +744,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
 
                     flt_res = Graph(self, res.ct)
                     flt_res = Graph(self, res.ct)
                     res.seek()
                     res.seek()
-                    while res.get_next(&spok):
+                    while res.keys.get_next(&spok):
                         data_v.mv_data = spok
                         data_v.mv_data = spok
                         try:
                         try:
                             # Verify that the triple is associated with the
                             # Verify that the triple is associated with the
@@ -810,7 +812,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                         return Graph(self)
                         return Graph(self)
 
 
                     matches = Graph(self, 1)
                     matches = Graph(self, 1)
-                    matches.data.add(&spok)
+                    matches.keys.add(&spok)
                     return matches
                     return matches
 
 
                 # s p ?
                 # s p ?
@@ -856,7 +858,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                     dcur, &key_v, &data_v, lmdb.MDB_FIRST))
                     dcur, &key_v, &data_v, lmdb.MDB_FIRST))
             while True:
             while True:
                 spok = <TripleKey>key_v.mv_data
                 spok = <TripleKey>key_v.mv_data
-                ret.add(&spok)
+                ret.keys.add(&spok)
 
 
                 try:
                 try:
                     _check(lmdb.mdb_cursor_get(
                     _check(lmdb.mdb_cursor_get(
@@ -915,7 +917,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                     spok[term_order[1]] = lu_dset[i][0]
                     spok[term_order[1]] = lu_dset[i][0]
                     spok[term_order[2]] = lu_dset[i][1]
                     spok[term_order[2]] = lu_dset[i][1]
 
 
-                    ret.data.add(&spok)
+                    ret.keys.add(&spok)
 
 
                 try:
                 try:
                     # Get results by the page.
                     # Get results by the page.
@@ -995,7 +997,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                     spok[term_order[1]] = luk[1]
                     spok[term_order[1]] = luk[1]
                     spok[term_order[2]] = lu_dset[i]
                     spok[term_order[2]] = lu_dset[i]
 
 
-                    ret.data.add(&spok)
+                    ret.keys.add(&spok)
 
 
                 try:
                 try:
                     # Get results by the page.
                     # Get results by the page.
@@ -1232,6 +1234,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             lmdb.MDB_txn *_txn
             lmdb.MDB_txn *_txn
             Hash128 thash
             Hash128 thash
             Buffer pk_t
             Buffer pk_t
+            Key tk
 
 
         #logger.debug(f'Serializing term: {term}')
         #logger.debug(f'Serializing term: {term}')
         serialize_from_rdflib(term, &pk_t)
         serialize_from_rdflib(term, &pk_t)
@@ -1245,6 +1248,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             )
             )
 
 
             return (<Key*>data_v.mv_data)[0]
             return (<Key*>data_v.mv_data)[0]
+
         except KeyNotFoundError:
         except KeyNotFoundError:
             # If key is not in the store, add it.
             # If key is not in the store, add it.
             if self.is_txn_rw:
             if self.is_txn_rw:
@@ -1258,10 +1262,10 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
 
             try:
             try:
                 # Main entry.
                 # Main entry.
-                ck = self._append(pk_gr, b't:st', txn=_txn)
+                tk = self._append(&pk_t, b't:st', txn=_txn)
 
 
                 # Index.
                 # Index.
-                data_v.mv_data = &ck
+                data_v.mv_data = &tk
                 data_v.mv_size = KLEN
                 data_v.mv_size = KLEN
                 _check(lmdb.mdb_put(
                 _check(lmdb.mdb_put(
                     _txn, self.get_dbi(b'th:t'), &key_v, &data_v, 0
                     _txn, self.get_dbi(b'th:t'), &key_v, &data_v, 0
@@ -1269,7 +1273,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                 if not self.is_txn_rw:
                 if not self.is_txn_rw:
                     _check(lmdb.mdb_txn_commit(_txn))
                     _check(lmdb.mdb_txn_commit(_txn))
 
 
-                return ck
+                return tk
             except:
             except:
                 if not self.is_txn_rw:
                 if not self.is_txn_rw:
                     lmdb.mdb_txn_abort(_txn)
                     lmdb.mdb_txn_abort(_txn)

+ 13 - 13
setup.py

@@ -100,35 +100,35 @@ extensions = [
         #extra_link_args=['-fopenmp']
         #extra_link_args=['-fopenmp']
     ),
     ),
     Extension(
     Extension(
-        'lakesuperior.model.graph.*',
+        'lakesuperior.store.base_lmdb_store',
         [
         [
-            path.join(tpl_src_dir, 'tpl.c'),
-            path.join(spookyhash_src_dir, 'context.c'),
-            path.join(spookyhash_src_dir, 'globals.c'),
-            path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'array.c'),
             path.join(coll_src_dir, 'array.c'),
             path.join(coll_src_dir, 'hashtable.c'),
             path.join(coll_src_dir, 'hashtable.c'),
             path.join(coll_src_dir, 'hashset.c'),
             path.join(coll_src_dir, 'hashset.c'),
-            path.join('lakesuperior', 'model', 'graph', f'*.{ext}'),
+            path.join(tpl_src_dir, 'tpl.c'),
+            path.join(lmdb_src_dir, 'mdb.c'),
+            path.join(lmdb_src_dir, 'midl.c'),
+            path.join('lakesuperior', 'store', f'base_lmdb_store.{ext}'),
         ],
         ],
         include_dirs=include_dirs,
         include_dirs=include_dirs,
-        #extra_compile_args=['-fopenmp'],
-        #extra_link_args=['-fopenmp']
     ),
     ),
     Extension(
     Extension(
-        'lakesuperior.store.base_lmdb_store',
+        'lakesuperior.model.graph.*',
         [
         [
+            path.join(tpl_src_dir, 'tpl.c'),
+            path.join(spookyhash_src_dir, 'context.c'),
+            path.join(spookyhash_src_dir, 'globals.c'),
+            path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'array.c'),
             path.join(coll_src_dir, 'array.c'),
             path.join(coll_src_dir, 'hashtable.c'),
             path.join(coll_src_dir, 'hashtable.c'),
             path.join(coll_src_dir, 'hashset.c'),
             path.join(coll_src_dir, 'hashset.c'),
-            path.join(tpl_src_dir, 'tpl.c'),
-            path.join(lmdb_src_dir, 'mdb.c'),
-            path.join(lmdb_src_dir, 'midl.c'),
-            path.join('lakesuperior', 'store', f'base_lmdb_store.{ext}'),
+            path.join('lakesuperior', 'model', 'graph', f'*.{ext}'),
         ],
         ],
         include_dirs=include_dirs,
         include_dirs=include_dirs,
+        #extra_compile_args=['-fopenmp'],
+        #extra_link_args=['-fopenmp']
     ),
     ),
     Extension(
     Extension(
         'lakesuperior.store.ldp_rs.lmdb_triplestore',
         'lakesuperior.store.ldp_rs.lmdb_triplestore',