Browse Source

[WIP] Begin butchering away at Keyset, Graph and LmdbTripleStore.

Stefano Cossu 6 years ago
parent
commit
78554942ef

+ 7 - 13
lakesuperior/model/graph/callbacks.pyx

@@ -24,12 +24,9 @@ cdef int term_cmp_fn(const void* key1, const void* key2):
     b2 = <Buffer *>key2
     b2 = <Buffer *>key2
 
 
     if b1.sz != b2.sz:
     if b1.sz != b2.sz:
-        #logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
         return 1
         return 1
 
 
-    cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
-    #logger.info(f'term memcmp: {cmp}')
-    return cmp
+    return memcmp(b1.addr, b2.addr, b1.sz)
 
 
 
 
 cdef int trp_cmp_fn(const void* key1, const void* key2):
 cdef int trp_cmp_fn(const void* key1, const void* key2):
@@ -45,15 +42,12 @@ cdef int trp_cmp_fn(const void* key1, const void* key2):
     t1 = <BufferTriple *>key1
     t1 = <BufferTriple *>key1
     t2 = <BufferTriple *>key2
     t2 = <BufferTriple *>key2
 
 
-    diff = (
+    return (
         term_cmp_fn(t1.o, t2.o) or
         term_cmp_fn(t1.o, t2.o) or
         term_cmp_fn(t1.s, t2.s) or
         term_cmp_fn(t1.s, t2.s) or
         term_cmp_fn(t1.p, t2.p)
         term_cmp_fn(t1.p, t2.p)
     )
     )
 
 
-    #logger.info(f'Triples match: {not(diff)}')
-    return diff
-
 
 
 #cdef int trp_cmp_fn(const void* key1, const void* key2):
 #cdef int trp_cmp_fn(const void* key1, const void* key2):
 #    """
 #    """
@@ -79,7 +73,7 @@ cdef int trp_cmp_fn(const void* key1, const void* key2):
 #    return is_not_equal
 #    return is_not_equal
 
 
 
 
-cdef bint graph_eq_fn(graph.SimpleGraph g1, graph.SimpleGraph g2):
+cdef bint graph_eq_fn(graph.Graph g1, graph.Graph g2):
     """
     """
     Compare 2 graphs for equality.
     Compare 2 graphs for equality.
 
 
@@ -228,16 +222,16 @@ cdef inline bint lookup_po_cmp_fn(
 ## LOOKUP CALLBACK FUNCTIONS
 ## LOOKUP CALLBACK FUNCTIONS
 
 
 cdef inline void add_trp_callback(
 cdef inline void add_trp_callback(
-    graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
+    graph.Graph gr, const TripleKey spok, void* ctx
 ):
 ):
     """
     """
     Add a triple to a graph as a result of a lookup callback.
     Add a triple to a graph as a result of a lookup callback.
     """
     """
-    gr.add_triple(trp, True)
+    gr.add(trp)
 
 
 
 
 cdef inline void del_trp_callback(
 cdef inline void del_trp_callback(
-    graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
+    graph.Graph gr, const TripleKey spok, void* ctx
 ):
 ):
     """
     """
     Remove a triple from a graph as a result of a lookup callback.
     Remove a triple from a graph as a result of a lookup callback.
@@ -245,6 +239,6 @@ cdef inline void del_trp_callback(
     #logger.info('removing triple: {} {} {}'.format(
     #logger.info('removing triple: {} {} {}'.format(
         #buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
         #buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
     #))
     #))
-    gr.remove_triple(trp)
+    gr.remove(spok)
 
 
 
 

+ 13 - 13
lakesuperior/model/graph/graph.pxd

@@ -14,13 +14,13 @@ ctypedef bint (*lookup_fn_t)(
 
 
 # Callback for an iterator.
 # Callback for an iterator.
 ctypedef void (*lookup_callback_fn_t)(
 ctypedef void (*lookup_callback_fn_t)(
-    SimpleGraph gr, const BufferTriple* trp, void* ctx
+    Graph gr, const BufferTriple* trp, void* ctx
 )
 )
 
 
 ctypedef Buffer SPOBuffer[3]
 ctypedef Buffer SPOBuffer[3]
 ctypedef Buffer *BufferPtr
 ctypedef Buffer *BufferPtr
 
 
-cdef class SimpleGraph:
+cdef class Graph:
     cdef:
     cdef:
         cc.HashSet *_terms # Set of unique serialized terms.
         cc.HashSet *_terms # Set of unique serialized terms.
         cc.HashSet *_triples # Set of unique triples.
         cc.HashSet *_triples # Set of unique triples.
@@ -38,24 +38,24 @@ cdef class SimpleGraph:
         bint trp_contains(self, const BufferTriple* btrp)
         bint trp_contains(self, const BufferTriple* btrp)
 
 
         # Basic graph operations.
         # Basic graph operations.
-        void ip_union(self, SimpleGraph other) except *
-        void ip_subtraction(self, SimpleGraph other) except *
-        void ip_intersection(self, SimpleGraph other) except *
-        void ip_xor(self, SimpleGraph other) except *
-        SimpleGraph empty_copy(self)
+        void ip_union(self, Graph other) except *
+        void ip_subtraction(self, Graph other) except *
+        void ip_intersection(self, Graph other) except *
+        void ip_xor(self, Graph other) except *
+        Graph empty_copy(self)
         void _match_ptn_callback(
         void _match_ptn_callback(
-            self, pattern, SimpleGraph gr,
+            self, pattern, Graph gr,
             lookup_callback_fn_t callback_fn, void* ctx=*
             lookup_callback_fn_t callback_fn, void* ctx=*
         ) except *
         ) except *
 
 
-    cpdef union_(self, SimpleGraph other)
-    cpdef subtraction(self, SimpleGraph other)
-    cpdef intersection(self, SimpleGraph other)
-    cpdef xor(self, SimpleGraph other)
+    cpdef union_(self, Graph other)
+    cpdef subtraction(self, Graph other)
+    cpdef intersection(self, Graph other)
+    cpdef xor(self, Graph other)
     cpdef void set(self, tuple trp) except *
     cpdef void set(self, tuple trp) except *
 
 
 
 
-cdef class Imr(SimpleGraph):
+cdef class Imr(Graph):
     cdef:
     cdef:
         readonly str id
         readonly str id
         Imr empty_copy(self)
         Imr empty_copy(self)

+ 60 - 155
lakesuperior/model/graph/graph.pyx

@@ -16,6 +16,7 @@ cimport lakesuperior.cy_include.collections as cc
 cimport lakesuperior.model.graph.callbacks as cb
 cimport lakesuperior.model.graph.callbacks as cb
 
 
 from lakesuperior.model.base cimport Buffer, buffer_dump
 from lakesuperior.model.base cimport Buffer, buffer_dump
+from lakesuperior.model.structures.keyset import Keyset
 from lakesuperior.model.graph cimport term
 from lakesuperior.model.graph cimport term
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.structures.hash cimport term_hash_seed32
 from lakesuperior.model.structures.hash cimport term_hash_seed32
@@ -23,14 +24,14 @@ from lakesuperior.model.structures.hash cimport term_hash_seed32
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 
 
 
 
-cdef class SimpleGraph:
+cdef class Graph(Keyset):
     """
     """
     Fast and simple implementation of a graph.
     Fast and simple implementation of a graph.
 
 
     Most functions should mimic RDFLib's graph with less overhead. It uses
     Most functions should mimic RDFLib's graph with less overhead. It uses
     the same funny but functional slicing notation.
     the same funny but functional slicing notation.
 
 
-    A SimpleGraph can be instantiated from a store lookup. This makes it
+    A Graph can be instantiated from a store lookup. This makes it
     possible to use a Keyset to perform initial filtering via identity by key,
     possible to use a Keyset to perform initial filtering via identity by key,
     then the filtered Keyset can be converted into a set of meaningful terms.
     then the filtered Keyset can be converted into a set of meaningful terms.
 
 
@@ -38,34 +39,12 @@ cdef class SimpleGraph:
     ``rdflib.Graph`` instance.
     ``rdflib.Graph`` instance.
     """
     """
 
 
-    def __cinit__(self, set data=set(), *args, **kwargs):
+    def __cinit__(self, *args, str uri=None, set data=set(), **kwargs):
         """
         """
         Initialize the graph, optionally with Python data.
         Initialize the graph, optionally with Python data.
 
 
         :param set data: Initial data as a set of 3-tuples of RDFLib terms.
         :param set data: Initial data as a set of 3-tuples of RDFLib terms.
         """
         """
-        cdef:
-            cc.HashSetConf terms_conf, trp_conf
-
-        self.term_cmp_fn = cb.term_cmp_fn
-        self.trp_cmp_fn = cb.trp_cmp_fn
-
-        cc.hashset_conf_init(&terms_conf)
-        terms_conf.load_factor = 0.85
-        terms_conf.hash = cb.term_hash_fn
-        terms_conf.hash_seed = term_hash_seed32
-        terms_conf.key_compare = self.term_cmp_fn
-        terms_conf.key_length = sizeof(Buffer*)
-
-        cc.hashset_conf_init(&trp_conf)
-        trp_conf.load_factor = 0.75
-        trp_conf.hash = cb.trp_hash_fn
-        trp_conf.hash_seed = term_hash_seed32
-        trp_conf.key_compare = self.trp_cmp_fn
-        trp_conf.key_length = sizeof(BufferTriple)
-
-        cc.hashset_new_conf(&terms_conf, &self._terms)
-        cc.hashset_new_conf(&trp_conf, &self._triples)
 
 
         self.pool = Pool()
         self.pool = Pool()
 
 
@@ -75,69 +54,39 @@ cdef class SimpleGraph:
             self.add(data)
             self.add(data)
 
 
 
 
-    def __dealloc__(self):
-        """
-        Free the triple pointers.
-        """
-        free(self._triples)
-        free(self._terms)
-
-
     ## PROPERTIES ##
     ## PROPERTIES ##
 
 
     @property
     @property
     def data(self):
     def data(self):
         """
         """
-        Triple data as a Python generator.
+        Triple data as a Python set.
 
 
-        :rtype: generator
+        :rtype: set
         """
         """
-        cdef:
-            void *void_p
-            cc.HashSetIter ti
-            Buffer* ss
-            Buffer* sp
-            Buffer* so
-
-        cc.hashset_iter_init(&ti, self._triples)
-        while cc.hashset_iter_next(&ti, &void_p) != cc.CC_ITER_END:
-            trp = <BufferTriple *>void_p
-            yield (
-                term.deserialize_to_rdflib(trp.s),
-                term.deserialize_to_rdflib(trp.p),
-                term.deserialize_to_rdflib(trp.o),
-            )
-
-    @property
-    def stored_terms(self):
-        """
-        All terms in the graph with their memory address.
-
-        For debugging purposes.
-        """
-        cdef:
-            cc.HashSetIter it
-            void *cur
+        cdef TripleKey spok
 
 
-        terms = set()
+        ret = set()
 
 
-        cc.hashset_iter_init(&it, self._terms)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            s_term = <Buffer*>cur
-            terms.add((f'0x{<size_t>cur:02x}', term.deserialize_to_rdflib(s_term)))
+        self.seek()
+        while self.get_next(&spok):
+            ret.add((
+                self.store.from_key(trp[0]),
+                self.store.from_key(trp[1]),
+                self.store.from_key(trp[2])
+            ))
 
 
-        return terms
+        return ret
 
 
 
 
     ## MAGIC METHODS ##
     ## MAGIC METHODS ##
 
 
     def __len__(self):
     def __len__(self):
         """ Number of triples in the graph. """
         """ Number of triples in the graph. """
-        return cc.hashset_size(self._triples)
+        return self._free_i
 
 
 
 
     def __eq__(self, other):
     def __eq__(self, other):
-        """ Equality operator between ``SimpleGraph`` instances. """
+        """ Equality operator between ``Graph`` instances. """
         return len(self ^ other) == 0
         return len(self ^ other) == 0
 
 
 
 
@@ -272,31 +221,18 @@ cdef class SimpleGraph:
         return {r[i] for r in self.data}
         return {r[i] for r in self.data}
 
 
 
 
-    def add(self, trp):
+    def add_triples(self, trp):
         """
         """
         Add triples to the graph.
         Add triples to the graph.
 
 
         :param iterable triples: iterable of 3-tuple triples.
         :param iterable triples: iterable of 3-tuple triples.
         """
         """
-        cdef size_t cur = 0, trp_cur = 0
-
-        trp_ct = len(trp)
-        term_buf = <Buffer*>self.pool.alloc(3 * trp_ct, sizeof(Buffer))
-        trp_buf = <BufferTriple*>self.pool.alloc(trp_ct, sizeof(BufferTriple))
-
-        for s, p, o in trp:
-            term.serialize_from_rdflib(s, term_buf + cur, self.pool)
-            term.serialize_from_rdflib(p, term_buf + cur + 1, self.pool)
-            term.serialize_from_rdflib(o, term_buf + cur + 2, self.pool)
-
-            (trp_buf + trp_cur).s = term_buf + cur
-            (trp_buf + trp_cur).p = term_buf + cur + 1
-            (trp_buf + trp_cur).o = term_buf + cur + 2
-
-            self.add_triple(trp_buf + trp_cur)
-
-            trp_cur += 1
-            cur += 3
+        for s, p, o in triples:
+            self.add([
+                self.store.to_key(s),
+                self.store.to_key(p),
+                self.store.to_key(o),
+            ])
 
 
 
 
     def len_terms(self):
     def len_terms(self):
@@ -317,7 +253,7 @@ cdef class SimpleGraph:
 
 
     ## CYTHON-ACCESSIBLE BASIC METHODS ##
     ## CYTHON-ACCESSIBLE BASIC METHODS ##
 
 
-    cdef SimpleGraph empty_copy(self):
+    cdef Graph empty_copy(self):
         """
         """
         Create an empty copy carrying over some key properties.
         Create an empty copy carrying over some key properties.
 
 
@@ -326,16 +262,16 @@ cdef class SimpleGraph:
         return self.__class__()
         return self.__class__()
 
 
 
 
-    cpdef union_(self, SimpleGraph other):
+    cpdef union_(self, Graph other):
         """
         """
-        Perform set union resulting in a new SimpleGraph instance.
+        Perform set union resulting in a new Graph instance.
 
 
         TODO Allow union of multiple graphs at a time.
         TODO Allow union of multiple graphs at a time.
 
 
-        :param SimpleGraph other: The other graph to merge.
+        :param Graph other: The other graph to merge.
 
 
-        :rtype: SimpleGraph
-        :return: A new SimpleGraph instance.
+        :rtype: Graph
+        :return: A new Graph instance.
         """
         """
         cdef:
         cdef:
             void *cur
             void *cur
@@ -353,13 +289,13 @@ cdef class SimpleGraph:
         return new_gr
         return new_gr
 
 
 
 
-    cdef void ip_union(self, SimpleGraph other) except *:
+    cdef void ip_union(self, Graph other) except *:
         """
         """
         Perform an in-place set union that adds triples to this instance
         Perform an in-place set union that adds triples to this instance
 
 
         TODO Allow union of multiple graphs at a time.
         TODO Allow union of multiple graphs at a time.
 
 
-        :param SimpleGraph other: The other graph to merge.
+        :param Graph other: The other graph to merge.
 
 
         :rtype: void
         :rtype: void
         """
         """
@@ -373,14 +309,14 @@ cdef class SimpleGraph:
             self.add_triple(bt, True)
             self.add_triple(bt, True)
 
 
 
 
-    cpdef intersection(self, SimpleGraph other):
+    cpdef intersection(self, Graph other):
         """
         """
         Graph intersection.
         Graph intersection.
 
 
-        :param SimpleGraph other: The other graph to intersect.
+        :param Graph other: The other graph to intersect.
 
 
-        :rtype: SimpleGraph
-        :return: A new SimpleGraph instance.
+        :rtype: Graph
+        :return: A new Graph instance.
         """
         """
         cdef:
         cdef:
             void *cur
             void *cur
@@ -397,14 +333,14 @@ cdef class SimpleGraph:
         return new_gr
         return new_gr
 
 
 
 
-    cdef void ip_intersection(self, SimpleGraph other) except *:
+    cdef void ip_intersection(self, Graph other) except *:
         """
         """
         In-place graph intersection.
         In-place graph intersection.
 
 
         Triples not in common with another graph are removed from the current
         Triples not in common with another graph are removed from the current
         one.
         one.
 
 
-        :param SimpleGraph other: The other graph to intersect.
+        :param Graph other: The other graph to intersect.
 
 
         :rtype: void
         :rtype: void
         """
         """
@@ -419,17 +355,17 @@ cdef class SimpleGraph:
                 self.remove_triple(bt)
                 self.remove_triple(bt)
 
 
 
 
-    cpdef subtraction(self, SimpleGraph other):
+    cpdef subtraction(self, Graph other):
         """
         """
         Graph set-theoretical subtraction.
         Graph set-theoretical subtraction.
 
 
         Create a new graph with the triples of this graph minus the ones in
         Create a new graph with the triples of this graph minus the ones in
         common with the other graph.
         common with the other graph.
 
 
-        :param SimpleGraph other: The other graph to subtract to this.
+        :param Graph other: The other graph to subtract to this.
 
 
-        :rtype: SimpleGraph
-        :return: A new SimpleGraph instance.
+        :rtype: Graph
+        :return: A new Graph instance.
         """
         """
         cdef:
         cdef:
             void *cur
             void *cur
@@ -446,13 +382,13 @@ cdef class SimpleGraph:
         return new_gr
         return new_gr
 
 
 
 
-    cdef void ip_subtraction(self, SimpleGraph other) except *:
+    cdef void ip_subtraction(self, Graph other) except *:
         """
         """
         In-place graph subtraction.
         In-place graph subtraction.
 
 
         Triples in common with another graph are removed from the current one.
         Triples in common with another graph are removed from the current one.
 
 
-        :param SimpleGraph other: The other graph to intersect.
+        :param Graph other: The other graph to intersect.
 
 
         :rtype: void
         :rtype: void
         """
         """
@@ -467,14 +403,14 @@ cdef class SimpleGraph:
                 self.remove_triple(bt)
                 self.remove_triple(bt)
 
 
 
 
-    cpdef xor(self, SimpleGraph other):
+    cpdef xor(self, Graph other):
         """
         """
         Graph Exclusive disjunction (XOR).
         Graph Exclusive disjunction (XOR).
 
 
-        :param SimpleGraph other: The other graph to perform XOR with.
+        :param Graph other: The other graph to perform XOR with.
 
 
-        :rtype: SimpleGraph
-        :return: A new SimpleGraph instance.
+        :rtype: Graph
+        :return: A new Graph instance.
         """
         """
         cdef:
         cdef:
             void *cur
             void *cur
@@ -500,14 +436,14 @@ cdef class SimpleGraph:
         return new_gr
         return new_gr
 
 
 
 
-    cdef void ip_xor(self, SimpleGraph other) except *:
+    cdef void ip_xor(self, Graph other) except *:
         """
         """
         In-place graph XOR.
         In-place graph XOR.
 
 
         Triples in common with another graph are removed from the current one,
         Triples in common with another graph are removed from the current one,
         and triples not in common will be added from the other one.
         and triples not in common will be added from the other one.
 
 
-        :param SimpleGraph other: The other graph to perform XOR with.
+        :param Graph other: The other graph to perform XOR with.
 
 
         :rtype: void
         :rtype: void
         """
         """
@@ -516,7 +452,7 @@ cdef class SimpleGraph:
             cc.HashSetIter it
             cc.HashSetIter it
             # TODO This could be more efficient to stash values in a simple
             # TODO This could be more efficient to stash values in a simple
             # array, but how urgent is it to improve an in-place XOR?
             # array, but how urgent is it to improve an in-place XOR?
-            SimpleGraph tmp = SimpleGraph()
+            Graph tmp = Graph()
 
 
         # Add *to the tmp graph* triples in other graph and not in this graph.
         # Add *to the tmp graph* triples in other graph and not in this graph.
         cc.hashset_iter_init(&it, other._triples)
         cc.hashset_iter_init(&it, other._triples)
@@ -581,37 +517,6 @@ cdef class SimpleGraph:
         return dtrp
         return dtrp
 
 
 
 
-    cdef inline void add_triple(
-        self, const BufferTriple* trp, bint copy=False
-    ) except *:
-        """
-        Add a triple from 3 (TPL) serialized terms.
-
-        Each of the terms is added to the term set if not existing. The triple
-        also is only added if not existing.
-
-        :param BufferTriple* trp: The triple to add.
-        :param bint copy: if ``True``, the triple and term data will be
-            allocated and copied into the graph memory pool.
-        """
-        if copy:
-            trp = self.store_triple(trp)
-
-        cc.hashset_add(self._terms, trp.s)
-        cc.hashset_add(self._terms, trp.p)
-        cc.hashset_add(self._terms, trp.o)
-
-        if cc.hashset_add(self._triples, trp) != cc.CC_OK:
-            raise RuntimeError('Error inserting triple in graph.')
-
-
-    cdef int remove_triple(self, const BufferTriple* btrp) except -1:
-        """
-        Remove one triple from the graph.
-        """
-        return cc.hashset_remove(self._triples, btrp, NULL)
-
-
     cdef bint trp_contains(self, const BufferTriple* btrp):
     cdef bint trp_contains(self, const BufferTriple* btrp):
         cdef:
         cdef:
             cc.HashSetIter it
             cc.HashSetIter it
@@ -694,13 +599,13 @@ cdef class SimpleGraph:
 
 
         Any and all of the lookup terms msy be ``None``.
         Any and all of the lookup terms msy be ``None``.
 
 
-        :rtype: SimpleGraph
-        "return: New SimpleGraph instance with matching triples.
+        :rtype: Graph
+        "return: New Graph instance with matching triples.
         """
         """
         cdef:
         cdef:
             void* cur
             void* cur
             BufferTriple trp
             BufferTriple trp
-            SimpleGraph res_gr = SimpleGraph()
+            Graph res_gr = Graph()
 
 
         self._match_ptn_callback(pattern, res_gr, cb.add_trp_callback, NULL)
         self._match_ptn_callback(pattern, res_gr, cb.add_trp_callback, NULL)
 
 
@@ -708,7 +613,7 @@ cdef class SimpleGraph:
 
 
 
 
     cdef void _match_ptn_callback(
     cdef void _match_ptn_callback(
-        self, pattern, SimpleGraph gr,
+        self, pattern, Graph gr,
         lookup_callback_fn_t callback_fn, void* ctx=NULL
         lookup_callback_fn_t callback_fn, void* ctx=NULL
     ) except *:
     ) except *:
         """
         """
@@ -775,11 +680,11 @@ cdef class SimpleGraph:
 
 
 
 
 
 
-cdef class Imr(SimpleGraph):
+cdef class Imr(Graph):
     """
     """
     In-memory resource data container.
     In-memory resource data container.
 
 
-    This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
+    This is an extension of :py:class:`~Graph` that adds a subject URI to
     the data set and some convenience methods.
     the data set and some convenience methods.
 
 
     An instance of this class can be converted to a ``rdflib.Resource``
     An instance of this class can be converted to a ``rdflib.Resource``
@@ -799,9 +704,9 @@ cdef class Imr(SimpleGraph):
         :param rdflib.URIRef uri: The graph URI.
         :param rdflib.URIRef uri: The graph URI.
             This will serve as the subject for some queries.
             This will serve as the subject for some queries.
         :param args: Positional arguments inherited from
         :param args: Positional arguments inherited from
-            ``SimpleGraph.__init__``.
+            ``Graph.__init__``.
         :param kwargs: Keyword arguments inherited from
         :param kwargs: Keyword arguments inherited from
-            ``SimpleGraph.__init__``.
+            ``Graph.__init__``.
         """
         """
         self.id = str(uri)
         self.id = str(uri)
         #super().__init(*args, **kwargs)
         #super().__init(*args, **kwargs)

+ 5 - 0
lakesuperior/model/structures/keyset.pxd

@@ -12,6 +12,11 @@ cdef class Keyset:
         size_t ct
         size_t ct
         size_t _cur # Index cursor used to look up values.
         size_t _cur # Index cursor used to look up values.
         size_t _free_i # Index of next free slot.
         size_t _free_i # Index of next free slot.
+        float expand_ratio # By how much storage is automatically expanded when
+                           # full. 1 means the size doubles, 0.5 a 50%
+                           # increase. 0 means that storage won't be
+                           # automatically expanded and adding above capacity
+                           # will raise an error.
 
 
         void seek(self, size_t idx=*)
         void seek(self, size_t idx=*)
         size_t tell(self)
         size_t tell(self)

+ 14 - 9
lakesuperior/model/structures/keyset.pyx

@@ -15,13 +15,14 @@ cdef class Keyset:
     """
     """
     Pre-allocated array (not set, as the name may suggest) of ``TripleKey``s.
     Pre-allocated array (not set, as the name may suggest) of ``TripleKey``s.
     """
     """
-    def __cinit__(self, size_t ct=0):
+    def __cinit__(self, size_t ct=0, expand_ratio=.5, *args, **kwargs):
         """
         """
         Initialize and allocate memory for the data set.
         Initialize and allocate memory for the data set.
 
 
         :param size_t ct: Number of elements to be accounted for.
         :param size_t ct: Number of elements to be accounted for.
         """
         """
         self.ct = ct
         self.ct = ct
+        self.expand_ratio = expand_ratio
         self.data = <TripleKey*>PyMem_Malloc(self.ct * TRP_KLEN)
         self.data = <TripleKey*>PyMem_Malloc(self.ct * TRP_KLEN)
         if ct and not self.data:
         if ct and not self.data:
             raise MemoryError('Error allocating Keyset data.')
             raise MemoryError('Error allocating Keyset data.')
@@ -37,12 +38,7 @@ cdef class Keyset:
         This is called when the Python instance is garbage collected, which
         This is called when the Python instance is garbage collected, which
         makes it handy to safely pass a Keyset instance across functions.
         makes it handy to safely pass a Keyset instance across functions.
         """
         """
-        #logger.debug(
-        #    'Releasing {0} ({1}x{2}) bytes of Keyset @ {3:x}...'.format(
-        #        self.size, self.conf.capacity, self.itemsize,
-        #        <unsigned long>self.data))
         PyMem_Free(self.data)
         PyMem_Free(self.data)
-        #logger.debug('...done releasing.')
 
 
 
 
     # Access methods.
     # Access methods.
@@ -96,12 +92,21 @@ cdef class Keyset:
         return True
         return True
 
 
 
 
-    cdef void add(self, const TripleKey* val) except *:
+    cdef void add(self, const TripleKey* val, check_dup=False) except *:
         """
         """
         Add a triple key to the array.
         Add a triple key to the array.
         """
         """
-        if self._free_i >= self.ct:
-            raise MemoryError('No slots left in key set.')
+        # Optionally check for duplicates.
+        if check_dup and self.contains(val):
+            return
+
+        if self._free_i >= self.threshod:
+            if self.expand_ratio > 0:
+                # In some edge casees, a very small ratio may round down to a
+                # zero increase, so the baseline increase is 1 element.
+                self.resize(1 + self.ct * (1 + self.expand_ratio))
+            else:
+                raise MemoryError('No space left in key set.')
 
 
         self.data[self._free_i] = val[0]
         self.data[self._free_i] = val[0]
 
 

+ 3 - 3
lakesuperior/store/ldp_rs/lmdb_triplestore.pxd

@@ -5,7 +5,7 @@ cimport lakesuperior.cy_include.cytpl as tpl
 from lakesuperior.model.base cimport (
 from lakesuperior.model.base cimport (
     Key, DoubleKey, TripleKey, Buffer
     Key, DoubleKey, TripleKey, Buffer
 )
 )
-from lakesuperior.model.graph.graph cimport SimpleGraph
+from lakesuperior.model.graph.graph cimport Graph
 from lakesuperior.model.structures.keyset cimport Keyset
 from lakesuperior.model.structures.keyset cimport Keyset
 from lakesuperior.store.base_lmdb_store cimport BaseLmdbStore
 from lakesuperior.store.base_lmdb_store cimport BaseLmdbStore
 
 
@@ -28,7 +28,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
     cpdef void _remove(self, tuple triple_pattern, context=*) except *
     cpdef void _remove(self, tuple triple_pattern, context=*) except *
     cpdef void _remove_graph(self, object gr_uri) except *
     cpdef void _remove_graph(self, object gr_uri) except *
     cpdef tuple all_namespaces(self)
     cpdef tuple all_namespaces(self)
-    cpdef SimpleGraph graph_lookup(
+    cpdef Graph graph_lookup(
         self, triple_pattern, context=*, uri=*, copy=*
         self, triple_pattern, context=*, uri=*, copy=*
     )
     )
 
 
@@ -44,7 +44,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             self, unsigned char idx1, unsigned char idx2, DoubleKey tks
             self, unsigned char idx1, unsigned char idx2, DoubleKey tks
         )
         )
         object from_key(self, const Key tk)
         object from_key(self, const Key tk)
-        Key _to_key(self, term) except -1
+        Key to_key(self, term) except -1
         void all_contexts(self, Key** ctx, size_t* sz, triple=*) except *
         void all_contexts(self, Key** ctx, size_t* sz, triple=*) except *
         Key _append(
         Key _append(
                 self, Buffer *value,
                 self, Buffer *value,

+ 17 - 68
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx

@@ -21,7 +21,7 @@ from lakesuperior.model.base cimport (
     Key, DoubleKey, TripleKey, QuadKey,
     Key, DoubleKey, TripleKey, QuadKey,
     Buffer, buffer_dump
     Buffer, buffer_dump
 )
 )
-from lakesuperior.model.graph.graph cimport SimpleGraph, Imr
+from lakesuperior.model.graph.graph cimport Graph, Imr
 from lakesuperior.model.graph.term cimport Term
 from lakesuperior.model.graph.term cimport Term
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.graph.triple cimport BufferTriple
 
 
@@ -172,7 +172,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             Key ck
             Key ck
 
 
         if context is not None:
         if context is not None:
-            ck = self._to_key(context)
+            ck = self.to_key(context)
             key_v.mv_data = &ck
             key_v.mv_data = &ck
             key_v.mv_size = KLEN
             key_v.mv_size = KLEN
 
 
@@ -364,7 +364,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
 
         if context is not None:
         if context is not None:
             try:
             try:
-                ck = self._to_key(context)
+                ck = self.to_key(context)
             except KeyNotFoundError:
             except KeyNotFoundError:
                 # If context is specified but not found, return to avoid
                 # If context is specified but not found, return to avoid
                 # deleting the wrong triples.
                 # deleting the wrong triples.
@@ -563,7 +563,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
 
         # Gather information on the graph prior to deletion.
         # Gather information on the graph prior to deletion.
         try:
         try:
-            ck = self._to_key(gr_uri)
+            ck = self.to_key(gr_uri)
         except KeyNotFoundError:
         except KeyNotFoundError:
             return
             return
 
 
@@ -677,55 +677,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             self._cur_close(cur)
             self._cur_close(cur)
 
 
 
 
-    cpdef SimpleGraph graph_lookup(
-            self, triple_pattern, context=None, uri=None, copy=False
-    ):
-        """
-        Create a SimpleGraph or Imr instance from buffers from the store.
-
-        The instance is only valid within the LMDB transaction that created it.
-
-        :param tuple triple_pattern: 3 RDFLib terms
-        :param context: Context graph, if available.
-        :type context: rdflib.Graph or None
-        :param str uri: URI for the resource. If provided, the resource
-            returned will be an Imr, otherwise a SimpleGraph.
-
-        :rtype: Iterator
-        :return: Generator over triples and contexts in which each result has
-            the following format::
-
-                (s, p, o), generator(contexts)
-
-        Where the contexts generator lists all context that the triple appears
-        in.
-        """
-        cdef:
-            Buffer buffers[3]
-            BufferTriple btrp
-            SimpleGraph gr
-            TripleKey spok
-
-        btrp.s = buffers
-        btrp.p = buffers + 1
-        btrp.o = buffers + 2
-
-        gr = Imr(uri=uri) if uri else SimpleGraph()
-
-        match = self.triple_keys(triple_pattern, context)
-
-        match.seek()
-        while match.get_next(&spok):
-            self.lookup_term(spok, buffers)
-            self.lookup_term(spok + 1, buffers + 1)
-            self.lookup_term(spok + 2, buffers + 2)
-
-            gr.add_triple(&btrp, True)
-
-        return gr
-
-
-    cdef Keyset triple_keys(self, tuple triple_pattern, context=None):
+    cdef Keyset triple_keys(self, tuple triple_pattern, context=None, uri):
         """
         """
         Top-level lookup method.
         Top-level lookup method.
 
 
@@ -748,7 +700,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
 
         if context is not None:
         if context is not None:
             try:
             try:
-                ck = self._to_key(context)
+                ck = self.to_key(context)
             except KeyNotFoundError:
             except KeyNotFoundError:
                 # Context not found.
                 # Context not found.
                 return Keyset()
                 return Keyset()
@@ -763,7 +715,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                 if all(triple_pattern):
                 if all(triple_pattern):
                     for i, term in enumerate(triple_pattern):
                     for i, term in enumerate(triple_pattern):
                         try:
                         try:
-                            tk = self._to_key(term)
+                            tk = self.to_key(term)
                         except KeyNotFoundError:
                         except KeyNotFoundError:
                             # A term key was not found.
                             # A term key was not found.
                             return Keyset()
                             return Keyset()
@@ -866,11 +818,11 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
 
         try:
         try:
             if s is not None:
             if s is not None:
-                sk = self._to_key(s)
+                sk = self.to_key(s)
             if p is not None:
             if p is not None:
-                pk = self._to_key(p)
+                pk = self.to_key(p)
             if o is not None:
             if o is not None:
-                ok = self._to_key(o)
+                ok = self.to_key(o)
         except KeyNotFoundError:
         except KeyNotFoundError:
             return Keyset()
             return Keyset()
 
 
@@ -1209,9 +1161,9 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                     self.txn, lmdb.mdb_cursor_dbi(cur), &stat))
                     self.txn, lmdb.mdb_cursor_dbi(cur), &stat))
 
 
                 spok = [
                 spok = [
-                    self._to_key(triple[0]),
-                    self._to_key(triple[1]),
-                    self._to_key(triple[2]),
+                    self.to_key(triple[0]),
+                    self.to_key(triple[1]),
+                    self.to_key(triple[2]),
                 ]
                 ]
                 key_v.mv_data = spok
                 key_v.mv_data = spok
                 key_v.mv_size = TRP_KLEN
                 key_v.mv_size = TRP_KLEN
@@ -1300,15 +1252,12 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         return deserialize_to_rdflib(&pk_t)
         return deserialize_to_rdflib(&pk_t)
 
 
 
 
-    cdef inline Key _to_key(self, term) except -1:
+    cdef inline Key to_key(self, term) except -1:
         """
         """
-        Convert a triple, quad or term into a key index (bare number).
-
-        The key is the checksum of the serialized object, therefore unique for
-        that object.
+        Convert a term into a key index (bare number).
 
 
         :param rdflib.Term term: An RDFLib term (URIRef, BNode, Literal).
         :param rdflib.Term term: An RDFLib term (URIRef, BNode, Literal).
-        :param Key key: Pointer to the key that will be produced.
+        :param Key key: Key that will be produced.
 
 
         :rtype: void
         :rtype: void
         """
         """
@@ -1340,7 +1289,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         :param list(bytes) values: Value(s) to append.
         :param list(bytes) values: Value(s) to append.
 
 
         :rtype: Key
         :rtype: Key
-        :return: Index of key inserted.
+        :return: Key inserted.
         """
         """
         cdef:
         cdef:
             lmdb.MDB_cursor *cur
             lmdb.MDB_cursor *cur

+ 13 - 16
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -252,7 +252,7 @@ class RsrcCentricLayout:
 
 
         :rtype: SimpleGraph
         :rtype: SimpleGraph
         """
         """
-        return self.store.graph_lookup((subject, None, None), ctx, copy=True)
+        return self.store.triple_keys((subject, None, None), ctx)
 
 
 
 
     def count_rsrc(self):
     def count_rsrc(self):
@@ -294,7 +294,7 @@ class RsrcCentricLayout:
         imr = Imr(uri=nsc['fcres'][uid])
         imr = Imr(uri=nsc['fcres'][uid])
 
 
         for ctx in contexts:
         for ctx in contexts:
-            gr = self.store.graph_lookup((None, None, None), ctx, copy=True)
+            gr = self.store.triple_keys((None, None, None), ctx)
             imr |= gr
             imr |= gr
 
 
         # Include inbound relationships.
         # Include inbound relationships.
@@ -332,11 +332,10 @@ class RsrcCentricLayout:
         logger.debug('Getting metadata for: {}'.format(uid))
         logger.debug('Getting metadata for: {}'.format(uid))
         if ver_uid:
         if ver_uid:
             uid = self.snapshot_uid(uid, ver_uid)
             uid = self.snapshot_uid(uid, ver_uid)
-        imr = self.store.graph_lookup(
+        imr = self.store.triple_keys(
             (None, None, None),
             (None, None, None),
             context=nsc['fcadmin'][uid],
             context=nsc['fcadmin'][uid],
-            uri=nsc['fcres'][uid],
-            copy=True
+            uri=nsc['fcres'][uid]
         )
         )
 
 
         if strict:
         if strict:
@@ -356,11 +355,10 @@ class RsrcCentricLayout:
         # graph. If multiple user-provided graphs will be supported, this
         # graph. If multiple user-provided graphs will be supported, this
         # should use another query to get all of them.
         # should use another query to get all of them.
         uri = nsc['fcres'][uid]
         uri = nsc['fcres'][uid]
-        userdata = self.store.graph_lookup(
+        userdata = self.store.triple_keys(
             (None, None, None),
             (None, None, None),
             context=nsc['fcmain'][uid],
             context=nsc['fcmain'][uid],
-            uri=uri,
-            copy=True
+            uri=uri
         )
         )
 
 
         return userdata
         return userdata
@@ -380,18 +378,18 @@ class RsrcCentricLayout:
         vmeta = Imr(uri=nsc['fcres'][uid])
         vmeta = Imr(uri=nsc['fcres'][uid])
 
 
         #Get version graphs proper.
         #Get version graphs proper.
-        for vtrp in self.store.graph_lookup(
+        for vtrp in self.store.triple_keys(
             (nsc['fcres'][uid], nsc['fcrepo'].hasVersion, None),
             (nsc['fcres'][uid], nsc['fcrepo'].hasVersion, None),
             nsc['fcadmin'][uid]
             nsc['fcadmin'][uid]
         ):
         ):
             # Add the hasVersion triple to the result graph.
             # Add the hasVersion triple to the result graph.
             vmeta.add((vtrp,))
             vmeta.add((vtrp,))
-            vmeta_gr = self.store.graph_lookup(
+            vmeta_gr = self.store.triple_keys(
                 (None, nsc['foaf'].primaryTopic, vtrp[2]), HIST_GR_URI
                 (None, nsc['foaf'].primaryTopic, vtrp[2]), HIST_GR_URI
             )
             )
             # Get triples in the meta graph filtering out undesired triples.
             # Get triples in the meta graph filtering out undesired triples.
             for vmtrp in vmeta_gr:
             for vmtrp in vmeta_gr:
-                for trp in self.store.graph_lookup(
+                for trp in self.store.triple_keys(
                     (vmtrp[0], None, None), HIST_GR_URI
                     (vmtrp[0], None, None), HIST_GR_URI
                 ):
                 ):
                     if (
                     if (
@@ -418,7 +416,7 @@ class RsrcCentricLayout:
         :return: Inbound triples or subjects.
         :return: Inbound triples or subjects.
         """
         """
         # Only return non-historic graphs.
         # Only return non-historic graphs.
-        # TODO self.store.graph_lookup?
+        # TODO self.store.triple_keys?
         meta_gr = self.ds.graph(META_GR_URI)
         meta_gr = self.ds.graph(META_GR_URI)
         ptopic_uri = nsc['foaf'].primaryTopic
         ptopic_uri = nsc['foaf'].primaryTopic
 
 
@@ -444,7 +442,7 @@ class RsrcCentricLayout:
         ctx_uri = nsc['fcstruct'][uid]
         ctx_uri = nsc['fcstruct'][uid]
         cont_p = nsc['ldp'].contains
         cont_p = nsc['ldp'].contains
         def _recurse(dset, s, c):
         def _recurse(dset, s, c):
-            new_dset = self.store.graph_lookup(
+            new_dset = self.store.triple_keys(
                 (s, cont_p, None), c
                 (s, cont_p, None), c
             )[s : cont_p]
             )[s : cont_p]
             #new_dset = set(ds.graph(c)[s : cont_p])
             #new_dset = set(ds.graph(c)[s : cont_p])
@@ -465,9 +463,8 @@ class RsrcCentricLayout:
             return _recurse(set(), subj_uri, ctx_uri)
             return _recurse(set(), subj_uri, ctx_uri)
         else:
         else:
             #return ds.graph(ctx_uri)[subj_uri : cont_p : ])
             #return ds.graph(ctx_uri)[subj_uri : cont_p : ])
-            return self.store.graph_lookup(
-                (subj_uri, cont_p, None), ctx_uri,
-                copy=True
+            return self.store.triple_keys(
+                (subj_uri, cont_p, None), ctx_uri
             )[subj_uri : cont_p]
             )[subj_uri : cont_p]