Browse Source

Merge pull request #84 from scossu/size_t_keys

Size t keys
Stefano Cossu 6 years ago
parent
commit
b59a2224d6

+ 1 - 1
.travis.yml

@@ -10,7 +10,7 @@ matrix:
       sudo: true
       sudo: true
 
 
 install:
 install:
-  - pip install Cython==0.29 cymem
+  - pip install Cython==0.29.6 cymem
   - pip install -e .
   - pip install -e .
 script:
 script:
   - python setup.py test
   - python setup.py test

+ 1 - 1
ext/lmdb

@@ -1 +1 @@
-Subproject commit 2a5eaad6919ce6941dec4f0d5cce370707a00ba7
+Subproject commit 5033a08c86fb6ef0adddabad327422a1c0c0069a

+ 0 - 88
lakesuperior/cy_include/calg.pxd

@@ -1,88 +0,0 @@
-cdef extern from 'set.h':
-    #ctypedef _Set Set
-    #ctypedef _SetEntry SetEntry
-    ctypedef void *SetValue
-
-    ctypedef unsigned int (*SetHashFunc)(SetValue value)
-    ctypedef bint (*SetEqualFunc)(SetValue value1, SetValue value2)
-    ctypedef void (*SetFreeFunc)(SetValue value)
-
-    ctypedef struct SetEntry:
-        SetValue data
-        SetEntry *next
-
-    ctypedef struct _Set:
-        SetEntry **table
-        unsigned int entries
-        unsigned int table_size
-        unsigned int prime_index
-        SetHashFunc hash_func
-        SetEqualFunc equal_func
-        SetFreeFunc free_func
-
-    ctypedef _Set Set
-
-    ctypedef struct SetIterator:
-        pass
-
-    Set *set_new(SetHashFunc hash_func, SetEqualFunc equal_func)
-    void set_free(Set *set)
-    # TODO This should return an int, ideally. See
-    # https://github.com/fragglet/c-algorithms/issues/20
-    bint set_insert(Set *set, SetValue data)
-    bint set_insert_or_assign(Set *set, SetValue *data)
-    bint set_query(Set *set, SetValue data)
-    bint set_enlarge(Set *set)
-    unsigned int set_num_entries(Set *set)
-    SetValue *set_to_array(Set *set)
-    Set *set_union(Set *set1, Set *set2)
-    Set *set_intersection(Set *set1, Set *set2)
-    void set_iterate(Set *set, SetIterator *iter)
-    bint set_iter_has_more(SetIterator *iterator)
-    SetValue set_iter_next(SetIterator *iterator)
-
-
-cdef extern from 'hash-table.h':
-    ctypedef void *HashTableKey
-    ctypedef void *HashTableValue
-
-    ctypedef struct HashTablePair:
-        HashTableKey key
-        HashTableKey value
-
-    ctypedef struct HashTableEntry:
-        HashTablePair pair
-        HashTableEntry *next
-
-    ctypedef struct HashTable:
-        HashTableEntry **table
-        unsigned int table_size
-        unsigned int entries
-        unsigned int prime_index
-
-    ctypedef struct HashTableIterator:
-        pass
-
-    ctypedef unsigned int (*HashTableHashFunc)(HashTableKey value)
-    ctypedef bint (*HashTableEqualFunc)(
-            HashTableKey value1, HashTableKey value2)
-    ctypedef void (*HashTableKeyFreeFunc)(HashTableKey value)
-    ctypedef void (*HashTableValueFreeFunc)(HashTableValue value)
-
-
-    HashTable *hash_table_new(
-            HashTableHashFunc hash_func, HashTableEqualFunc equal_func)
-    void hash_table_free(HashTable *hash_table)
-    void hash_table_register_free_functions(
-            HashTable *hash_table, HashTableKeyFreeFunc key_free_func,
-            HashTableValueFreeFunc value_free_func)
-    int hash_table_insert(
-            HashTable *hash_table, HashTableKey key, HashTableValue value)
-    HashTableValue hash_table_lookup(
-            HashTable *hash_table, HashTableKey key)
-    bint hash_table_remove(HashTable *hash_table, HashTableKey key)
-    unsigned int hash_table_num_entries(HashTable *hash_table)
-    void hash_table_iterate(HashTable *hash_table, HashTableIterator *iter)
-    bint hash_table_iter_has_more(HashTableIterator *iterator)
-    HashTablePair hash_table_iter_next(HashTableIterator *iterator)
-

+ 40 - 33
lakesuperior/cy_include/collections.pxd

@@ -9,7 +9,7 @@ ctypedef int (*key_compare_ft)(const void* key1, const void* key2)
 
 
 cdef extern from "common.h":
 cdef extern from "common.h":
 
 
-    cdef enum cc_stat:
+    enum cc_stat:
         CC_OK
         CC_OK
         CC_ERR_ALLOC
         CC_ERR_ALLOC
         CC_ERR_INVALID_CAPACITY
         CC_ERR_INVALID_CAPACITY
@@ -19,47 +19,50 @@ cdef extern from "common.h":
         CC_ERR_VALUE_NOT_FOUND
         CC_ERR_VALUE_NOT_FOUND
         CC_ERR_OUT_OF_RANGE
         CC_ERR_OUT_OF_RANGE
         CC_ITER_END
         CC_ITER_END
+
+    key_compare_ft CC_CMP_STRING
+    key_compare_ft CC_CMP_POINTER
 #
 #
 #    int cc_common_cmp_str(const void* key1, const void* key2)
 #    int cc_common_cmp_str(const void* key1, const void* key2)
 #
 #
 #    int cc_common_cmp_ptr(const void* key1, const void* key2)
 #    int cc_common_cmp_ptr(const void* key1, const void* key2)
 
 
-#cdef extern from "array.h":
+cdef extern from "array.h":
+
+    ctypedef struct Array:
+        pass
+
+    ctypedef struct ArrayConf:
+        size_t          capacity
+        float           exp_factor
+        mem_alloc_ft  mem_alloc
+        mem_calloc_ft mem_calloc
+        mem_free_ft   mem_free
+
+    ctypedef struct ArrayIter:
+        Array* ar
+        size_t index
+        bint last_removed
 
 
-#    ctypedef struct Array:
-#        pass
-#
-#    ctypedef struct ArrayConf:
-#        size_t          capacity
-#        float           exp_factor
-#        mem_alloc_ft  mem_alloc
-#        mem_calloc_ft mem_calloc
-#        mem_free_ft   mem_free
-#
-#    ctypedef struct ArrayIter:
-#        Array* ar
-#        size_t index
-#        bint last_removed
-#
 #    ctypedef struct ArrayZipIter:
 #    ctypedef struct ArrayZipIter:
 #        Array* ar1
 #        Array* ar1
 #        Array* ar2
 #        Array* ar2
 #        size_t index
 #        size_t index
 #        bint last_removed
 #        bint last_removed
 #
 #
-#    cc_stat array_new(Array** out)
-#
-#    cc_stat array_new_conf(ArrayConf* conf, Array** out)
-#
-#    void array_conf_init(ArrayConf* conf)
-#
-#    void array_destroy(Array* ar)
-#
+    cc_stat array_new(Array** out)
+
+    cc_stat array_new_conf(ArrayConf* conf, Array** out)
+
+    void array_conf_init(ArrayConf* conf)
+
+    void array_destroy(Array* ar)
+
 #    ctypedef void (*_array_destroy_cb_cb_ft)(void*)
 #    ctypedef void (*_array_destroy_cb_cb_ft)(void*)
 #
 #
 #    void array_destroy_cb(Array* ar, _array_destroy_cb_cb_ft cb)
 #    void array_destroy_cb(Array* ar, _array_destroy_cb_cb_ft cb)
 #
 #
-#    #cc_stat array_add(Array* ar, void* element)
+    cc_stat array_add(Array* ar, void* element)
 #
 #
 #    #cc_stat array_add_at(Array* ar, void* element, size_t index)
 #    #cc_stat array_add_at(Array* ar, void* element, size_t index)
 #
 #
@@ -125,9 +128,9 @@ cdef extern from "common.h":
 #
 #
 #    cc_stat array_filter(Array* ar, _array_filter_predicate_ft predicate, Array** out)
 #    cc_stat array_filter(Array* ar, _array_filter_predicate_ft predicate, Array** out)
 #
 #
-#    void array_iter_init(ArrayIter* iter, Array* ar)
-#
-#    cc_stat array_iter_next(ArrayIter* iter, void** out)
+    void array_iter_init(ArrayIter* iter, Array* ar)
+
+    cc_stat array_iter_next(ArrayIter* iter, void** out)
 #
 #
 #    cc_stat array_iter_remove(ArrayIter* iter, void** out)
 #    cc_stat array_iter_remove(ArrayIter* iter, void** out)
 #
 #
@@ -181,6 +184,10 @@ cdef extern from "hashtable.h":
         TableEntry* prev_entry
         TableEntry* prev_entry
         TableEntry* next_entry
         TableEntry* next_entry
 
 
+    hash_ft GENERAL_HASH
+    hash_ft STRING_HASH
+    hash_ft POINTER_HASH
+
 #    size_t get_table_index(HashTable *table, void *key)
 #    size_t get_table_index(HashTable *table, void *key)
 #
 #
 #    void hashtable_conf_init(HashTableConf* conf)
 #    void hashtable_conf_init(HashTableConf* conf)
@@ -209,10 +216,10 @@ cdef extern from "hashtable.h":
 #
 #
 #    cc_stat hashtable_get_values(HashTable* table, Array** out)
 #    cc_stat hashtable_get_values(HashTable* table, Array** out)
 #
 #
-#    size_t hashtable_hash_string(void* key, int len, uint32_t seed)
-#
-#    size_t hashtable_hash(void* key, int len, uint32_t seed)
-#
+    size_t hashtable_hash_string(void* key, int len, uint32_t seed)
+
+    size_t hashtable_hash(void* key, int len, uint32_t seed)
+
     size_t hashtable_hash_ptr(void* key, int len, uint32_t seed)
     size_t hashtable_hash_ptr(void* key, int len, uint32_t seed)
 #
 #
 #    ctypedef void (*_hashtable_foreach_key_op_ft)(void*)
 #    ctypedef void (*_hashtable_foreach_key_op_ft)(void*)

+ 15 - 1
lakesuperior/model/base.pxd

@@ -1,5 +1,19 @@
-from lakesuperior.cy_include cimport cytpl as tpl
+cimport lakesuperior.cy_include.cytpl as tpl
 
 
 ctypedef tpl.tpl_bin Buffer
 ctypedef tpl.tpl_bin Buffer
 
 
+# NOTE This may change in the future, e.g. if a different key size is to
+# be forced.
+ctypedef size_t Key
+
+ctypedef Key DoubleKey[2]
+ctypedef Key TripleKey[3]
+ctypedef Key QuadKey[4]
+
+cdef enum:
+    KLEN = sizeof(Key)
+    DBL_KLEN = 2 * sizeof(Key)
+    TRP_KLEN = 3 * sizeof(Key)
+    QUAD_KLEN = 4 * sizeof(Key)
+
 cdef bytes buffer_dump(Buffer* buf)
 cdef bytes buffer_dump(Buffer* buf)

+ 1 - 0
lakesuperior/model/graph/callbacks.pxd

@@ -13,6 +13,7 @@ cdef:
     int trp_cmp_fn(const void* key1, const void* key2)
     int trp_cmp_fn(const void* key1, const void* key2)
     size_t term_hash_fn(const void* key, int l, uint32_t seed)
     size_t term_hash_fn(const void* key, int l, uint32_t seed)
     size_t trp_hash_fn(const void* key, int l, uint32_t seed)
     size_t trp_hash_fn(const void* key, int l, uint32_t seed)
+
     bint lookup_none_cmp_fn(
     bint lookup_none_cmp_fn(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2
     )
     )

+ 3 - 2
lakesuperior/model/graph/callbacks.pyx

@@ -3,8 +3,9 @@ import logging
 from libc.stdint cimport uint32_t, uint64_t
 from libc.stdint cimport uint32_t, uint64_t
 from libc.string cimport memcmp
 from libc.string cimport memcmp
 
 
-from lakesuperior.cy_include cimport collections as cc
-from lakesuperior.cy_include cimport spookyhash as sph
+cimport lakesuperior.cy_include.collections as cc
+cimport lakesuperior.cy_include.spookyhash as sph
+
 from lakesuperior.model.base cimport Buffer, buffer_dump
 from lakesuperior.model.base cimport Buffer, buffer_dump
 from lakesuperior.model.graph cimport graph
 from lakesuperior.model.graph cimport graph
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.graph.triple cimport BufferTriple

+ 2 - 1
lakesuperior/model/graph/graph.pxd

@@ -2,7 +2,8 @@ from libc.stdint cimport uint32_t, uint64_t
 
 
 from cymem.cymem cimport Pool
 from cymem.cymem cimport Pool
 
 
-from lakesuperior.cy_include cimport collections as cc
+cimport lakesuperior.cy_include.collections as cc
+
 from lakesuperior.model.base cimport Buffer
 from lakesuperior.model.base cimport Buffer
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.graph.triple cimport BufferTriple
 
 

+ 6 - 6
lakesuperior/model/graph/graph.pyx

@@ -12,9 +12,10 @@ from libc.stdlib cimport free
 
 
 from cymem.cymem cimport Pool
 from cymem.cymem cimport Pool
 
 
-from lakesuperior.cy_include cimport collections as cc
+cimport lakesuperior.cy_include.collections as cc
+cimport lakesuperior.model.graph.callbacks as cb
+
 from lakesuperior.model.base cimport Buffer, buffer_dump
 from lakesuperior.model.base cimport Buffer, buffer_dump
-from lakesuperior.model.graph cimport callbacks as cb
 from lakesuperior.model.graph cimport term
 from lakesuperior.model.graph cimport term
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.structures.hash cimport term_hash_seed32
 from lakesuperior.model.structures.hash cimport term_hash_seed32
@@ -29,10 +30,9 @@ cdef class SimpleGraph:
     Most functions should mimic RDFLib's graph with less overhead. It uses
     Most functions should mimic RDFLib's graph with less overhead. It uses
     the same funny but functional slicing notation.
     the same funny but functional slicing notation.
 
 
-    A SimpleGraph can be instantiated from a store lookup or obtained from a
-    :py:class:`lakesuperior.store.keyset.Keyset`. This makes it possible to use
-    a Keyset to perform initial filtering via identity by key, then the
-    filtered Keyset can be converted into a set of meaningful terms.
+    A SimpleGraph can be instantiated from a store lookup. This makes it
+    possible to use a Keyset to perform initial filtering via identity by key,
+    then the filtered Keyset can be converted into a set of meaningful terms.
 
 
     An instance of this class can also be converted to and from a
     An instance of this class can also be converted to and from a
     ``rdflib.Graph`` instance.
     ``rdflib.Graph`` instance.

+ 21 - 0
lakesuperior/model/structures/callbacks.pxd

@@ -0,0 +1,21 @@
+from lakesuperior.model.base cimport Key, TripleKey
+
+cdef:
+    bint lookup_sk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_pk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_ok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_skpk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_skok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_pkok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )

+ 33 - 0
lakesuperior/model/structures/callbacks.pyx

@@ -0,0 +1,33 @@
+from lakesuperior.model.base cimport Key, TripleKey
+
+cdef bint lookup_sk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[0] == k1
+
+cdef bint lookup_pk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[1] == k1
+
+cdef bint lookup_ok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[2] == k1
+
+cdef bint lookup_skpk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[0] == k1 and spok[1] == k2
+
+cdef bint lookup_skok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[0] == k1 and spok[2] == k2
+
+cdef bint lookup_pkok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[1] == k1 and spok[2] == k2
+
+

+ 23 - 9
lakesuperior/model/structures/keyset.pxd

@@ -1,12 +1,26 @@
+from lakesuperior.model.base cimport (
+    Key, Key, DoubleKey, TripleKey, Buffer
+)
+
+ctypedef bint (*key_cmp_fn_t)(
+    const TripleKey* spok, const Key* k1, const Key* k2
+)
+
 cdef class Keyset:
 cdef class Keyset:
     cdef:
     cdef:
-        readonly unsigned char *data
-        readonly unsigned char itemsize
-        readonly size_t ct, size
-        size_t _cur
-
-        void resize(self, size_t ct) except *
-        unsigned char *get_item(self, i)
-        bint iter_next(self, unsigned char** val)
-        bint contains(self, const void *val)
+        TripleKey* data
+        size_t ct
+        size_t _cur # Index cursor used to look up values.
+        size_t _free_i # Index of next free slot.
 
 
+        void seek(self, size_t idx=*)
+        size_t tell(self)
+        bint get_at(self, size_t i, TripleKey* item)
+        bint get_next(self, TripleKey* item)
+        void add(self, const TripleKey* val) except *
+        bint contains(self, const TripleKey* val)
+        Keyset copy(self)
+        void resize(self, size_t size=*) except *
+        Keyset lookup(
+            self, const Key* sk, const Key* pk, const Key* ok
+        )

+ 149 - 101
lakesuperior/model/structures/keyset.pyx

@@ -1,47 +1,34 @@
-from libc.string cimport memcmp
+import logging
+
+from libc.string cimport memcmp, memcpy
 from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
 from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
 
 
-cdef class Keyset:
-    """
-    Pre-allocated result set.
+cimport lakesuperior.model.structures.callbacks as cb
+
+from lakesuperior.model.base cimport TripleKey, TRP_KLEN
+
 
 
-    Data in the set are stored as a 1D contiguous array of characters.
-    Access to elements at an arbitrary index position is achieved by using the
-    ``itemsize`` property multiplied by the index number.
+logger = logging.getLogger(__name__)
 
 
-    Key properties:
 
 
-    ``ct``: number of elements in the set.
-    ``itemsize``: size of each element, in bytes. All elements have the same
-        size.
-    ``size``: Total size, in bytes, of the data set. This is the product of
-        ``itemsize`` and ``ct``.
+cdef class Keyset:
+    """
+    Pre-allocated array (not set, as the name may suggest) of ``TripleKey``s.
     """
     """
-    def __cinit__(self, size_t ct, unsigned char itemsize):
+    def __cinit__(self, size_t ct=0):
         """
         """
         Initialize and allocate memory for the data set.
         Initialize and allocate memory for the data set.
 
 
         :param size_t ct: Number of elements to be accounted for.
         :param size_t ct: Number of elements to be accounted for.
-        :param unsigned char itemsize: Size of an individual item.
-            Note that the ``itemsize`` is an unsigned char,
-            i.e. an item can be at most 255 bytes. This is for economy reasons,
-            since many multiplications are done between ``itemsize`` and other
-            char variables.
         """
         """
         self.ct = ct
         self.ct = ct
-        self.itemsize = itemsize
-        self.size = self.itemsize * self.ct
-        self._cur = 0
+        self.data = <TripleKey*>PyMem_Malloc(self.ct * TRP_KLEN)
+        logger.info(f'data address: 0x{<size_t>self.data:02x}')
+        if ct and not self.data:
+            raise MemoryError('Error allocating Keyset data.')
 
 
-        #logger.debug('Got malloc sizes: {}, {}'.format(ct, itemsize))
-        #logger.debug(
-        #    'Allocating {0} ({1}x{2}) bytes of Keyset data...'.format(
-        #        self.size, self.ct, self.itemsize))
-        self.data = <unsigned char *>PyMem_Malloc(ct * itemsize)
-        if not self.data:
-            raise MemoryError()
-        #logger.debug('...done allocating @ {0:x}.'.format(
-        #        <unsigned long>self.data))
+        self._cur = 0
+        self._free_i = 0
 
 
 
 
     def __dealloc__(self):
     def __dealloc__(self):
@@ -53,92 +40,44 @@ cdef class Keyset:
         """
         """
         #logger.debug(
         #logger.debug(
         #    'Releasing {0} ({1}x{2}) bytes of Keyset @ {3:x}...'.format(
         #    'Releasing {0} ({1}x{2}) bytes of Keyset @ {3:x}...'.format(
-        #        self.size, self.ct, self.itemsize,
+        #        self.size, self.conf.capacity, self.itemsize,
         #        <unsigned long>self.data))
         #        <unsigned long>self.data))
         PyMem_Free(self.data)
         PyMem_Free(self.data)
         #logger.debug('...done releasing.')
         #logger.debug('...done releasing.')
 
 
 
 
-    cdef void resize(self, size_t ct) except *:
-        """
-        Resize the result set. Uses ``PyMem_Realloc``.
-
-        Note that resizing to a smaller size does not copy or reallocate the
-        data, resizing to a larger size does.
-
-        Also, note that only the number of items can be changed, the item size
-        cannot.
-
-        :param size_t ct: Number of items in the result set.
-        """
-        cdef unsigned char *tmp
-        self.ct = ct
-        self.size = self.itemsize * self.ct
-
-        #logger.debug(
-        #    'Resizing Keyset to {0} ({1}x{2}) bytes @ {3:x}...'.format(
-        #        self.itemsize * ct, ct, self.itemsize,
-        #        <unsigned long>self.data))
-        tmp = <unsigned char *>PyMem_Realloc(self.data, ct * self.itemsize)
-        if not tmp:
-            raise MemoryError()
-        #logger.debug('...done resizing.')
-
-        self.data = tmp
-
-
     # Access methods.
     # Access methods.
 
 
-    def to_tuple(self):
-        """
-        Return the data set as a Python tuple.
-
-        :rtype: tuple
-        """
-        return tuple(
-                self.data[i: i + self.itemsize]
-                for i in range(0, self.size, self.itemsize))
-
-
-    def get_item_obj(self, i):
+    cdef void seek(self, size_t idx=0):
         """
         """
-        Get an item at a given index position.
-
-        :rtype: bytes
-        """
-        if i >= self.ct:
-            raise ValueError(f'Index {i} out of range.')
-
-        return self.get_item(i)[: self.itemsize]
-
-
-    def iter_init(self):
+        Place the cursor at a certain index, 0 by default.
         """
         """
-        Reset the cursor to the initial position.
-        """
-        self._cur = 0
+        self._cur = idx
 
 
 
 
-    def tell(self):
+    cdef size_t tell(self):
         """
         """
         Tell the position of the cursor in the keyset.
         Tell the position of the cursor in the keyset.
         """
         """
         return self._cur
         return self._cur
 
 
 
 
-    cdef unsigned char *get_item(self, i):
+    cdef bint get_at(self, size_t i, TripleKey* item):
         """
         """
         Get an item at a given index position. Cython-level method.
         Get an item at a given index position. Cython-level method.
 
 
-        The item size is known by the ``itemsize`` property of the object.
-
-        :rtype: unsigned char*
+        :rtype: TripleKey
         """
         """
+        if i >= self._free_i:
+            return False
+
         self._cur = i
         self._cur = i
-        return self.data + self.itemsize * i
+        item[0] = self.data[i]
+
+        return True
 
 
 
 
-    cdef bint iter_next(self, unsigned char** val):
+    cdef bint get_next(self, TripleKey* item):
         """
         """
         Populate the current value and advance the cursor by 1.
         Populate the current value and advance the cursor by 1.
 
 
@@ -149,25 +88,134 @@ cdef class Keyset:
         :return: True if a value was found, False if the end of the buffer
         :return: True if a value was found, False if the end of the buffer
             has been reached.
             has been reached.
         """
         """
-        if self._cur >= self.ct:
-            val = NULL
+        if self._cur >= self._free_i:
             return False
             return False
 
 
-        val[0] = self.data + self.itemsize * self._cur
+        item[0] = self.data[self._cur]
         self._cur += 1
         self._cur += 1
 
 
         return True
         return True
 
 
 
 
-    cdef bint contains(self, const void *val):
+    cdef void add(self, const TripleKey* val) except *:
+        """
+        Add a triple key to the array.
+        """
+        if self._free_i >= self.ct:
+            raise MemoryError('No slots left in key set.')
+
+        self.data[self._free_i] = val[0]
+
+        self._free_i += 1
+
+
+    cdef bint contains(self, const TripleKey* val):
         """
         """
         Whether a value exists in the set.
         Whether a value exists in the set.
         """
         """
-        cdef unsigned char* stored_val
+        cdef TripleKey stored_val
 
 
-        self.iter_init()
-        while self.iter_next(&stored_val):
-            if memcmp(val, stored_val, self.itemsize) == 0:
+        self.seek()
+        while self.get_next(&stored_val):
+            if memcmp(val, stored_val, TRP_KLEN) == 0:
                 return True
                 return True
         return False
         return False
 
 
+
+    cdef Keyset copy(self):
+        """
+        Copy a Keyset.
+        """
+        cdef Keyset new_ks = Keyset(self.ct)
+        memcpy(new_ks.data, self.data, self.ct * TRP_KLEN)
+        new_ks.seek()
+
+        return new_ks
+
+
+    cdef void resize(self, size_t size=0) except *:
+        """
+        Change the array capacity.
+
+        :param size_t size: The new capacity size. If not specified or 0, the
+            array is shrunk to the last used item. The resulting size
+            therefore will always be greater than 0. The only exception
+            to this is if the specified size is 0 and no items have been added
+            to the array, in which case the array will be effectively shrunk
+            to 0.
+        """
+        if not size:
+            size = self._free_i
+
+        tmp = <TripleKey*>PyMem_Realloc(self.data, size * TRP_KLEN)
+
+        if not tmp:
+            raise MemoryError('Could not reallocate Keyset data.')
+
+        self.data = tmp
+        self.ct = size
+        self.seek()
+
+
+    cdef Keyset lookup(
+            self, const Key* sk, const Key* pk, const Key* ok
+    ):
+        """
+        Look up triple keys.
+
+        This works in a similar way that the ``SimpleGraph`` and ``LmdbStore``
+        methods work.
+
+        Any and all the terms may be NULL. A NULL term is treated as unbound.
+
+        :param const Key* sk: s key pointer.
+        :param const Key* pk: p key pointer.
+        :param const Key* ok: o key pointer.
+        """
+        cdef:
+            TripleKey spok
+            Keyset ret = Keyset(self.ct)
+            Key* k1 = NULL
+            Key* k2 = NULL
+            key_cmp_fn_t cmp_fn
+
+        if sk and pk and ok: # s p o
+            pass # TODO
+
+        elif sk:
+            k1 = sk
+            if pk: # s p ?
+                k2 = pk
+                cmp_fn = cb.lookup_skpk_cmp_fn
+
+            elif ok: # s ? o
+                k2 = ok
+                cmp_fn = cb.lookup_skok_cmp_fn
+
+            else: # s ? ?
+                cmp_fn = cb.lookup_sk_cmp_fn
+
+        elif pk:
+            k1 = pk
+            if ok: # ? p o
+                k2 = ok
+                cmp_fn = cb.lookup_pkok_cmp_fn
+
+            else: # ? p ?
+                cmp_fn = cb.lookup_pk_cmp_fn
+
+        elif ok: # ? ? o
+            k1 = ok
+            cmp_fn = cb.lookup_ok_cmp_fn
+
+        else: # ? ? ?
+            return self.copy()
+
+        self.seek()
+        while self.get_next(&spok):
+            if cmp_fn(<TripleKey*>spok, k1, k2):
+                ret.add(&spok)
+
+        ret.resize()
+
+        return ret

+ 2 - 1
lakesuperior/store/base_lmdb_store.pxd

@@ -1,4 +1,4 @@
-cimport lakesuperior.cy_include.cylmdb as lmdb
+from lakesuperior.cy_include cimport cylmdb as lmdb
 
 
 cdef:
 cdef:
     int rc
     int rc
@@ -13,6 +13,7 @@ cdef:
 cdef class BaseLmdbStore:
 cdef class BaseLmdbStore:
     cdef:
     cdef:
         readonly bint is_txn_open
         readonly bint is_txn_open
+        readonly bint is_txn_rw
         public bint _open
         public bint _open
         unsigned int _readers
         unsigned int _readers
         readonly str env_path
         readonly str env_path

+ 25 - 58
lakesuperior/store/ldp_rs/lmdb_triplestore.pxd

@@ -1,59 +1,25 @@
+cimport lakesuperior.cy_include.collections as cc
 cimport lakesuperior.cy_include.cylmdb as lmdb
 cimport lakesuperior.cy_include.cylmdb as lmdb
 cimport lakesuperior.cy_include.cytpl as tpl
 cimport lakesuperior.cy_include.cytpl as tpl
 
 
-from lakesuperior.model.base cimport Buffer
+from lakesuperior.model.base cimport (
+    Key, DoubleKey, TripleKey, Buffer
+)
 from lakesuperior.model.graph.graph cimport SimpleGraph
 from lakesuperior.model.graph.graph cimport SimpleGraph
 from lakesuperior.model.structures.keyset cimport Keyset
 from lakesuperior.model.structures.keyset cimport Keyset
 from lakesuperior.store.base_lmdb_store cimport BaseLmdbStore
 from lakesuperior.store.base_lmdb_store cimport BaseLmdbStore
 
 
-#Fixed length for term keys.
-#
-#4 or 5 is a safe range. 4 allows for ~4 billion (256 ** 4) unique terms
-#in the store. 5 allows ~1 trillion terms. While these numbers may seem
-#huge (the total number of Internet pages indexed by Google as of 2018 is 45
-#billions), it must be reminded that the keys cannot be reused, so a
-#repository that deletes a lot of triples may burn through a lot of terms.
-#
-#If a repository runs ot of keys it can no longer store new terms and must
-#be migrated to a new database, which will regenerate and compact the keys.
-#
-#For smaller repositories it should be safe to set this value to 4, which
-#could improve performance since keys make up the vast majority of record
-#exchange between the store and the application. However it is sensible not
-#to expose this value as a configuration option.
-#
-#TODO: Explore the option to use size_t (8 bits, or in some architectures,
-#4 bits). If the overhead of handling 8
-#vs. 5 bytes is not huge (and maybe counterbalanced by x86_64 arch optimizations
-#for 8-byte words) it may be worth using those instead of char[5] to simplify
-#the code significantly.
-DEF _KLEN = 5
-DEF _DBL_KLEN = _KLEN * 2
-DEF _TRP_KLEN = _KLEN * 3
-DEF _QUAD_KLEN = _KLEN * 4
-# Lexical sequence start. ``\\x01`` is fine since no special characters are
-# used, but it's good to leave a spare for potential future use.
-DEF _KEY_START = b'\x01'
-
-cdef enum:
-    KLEN = _KLEN
-    DBL_KLEN = _DBL_KLEN
-    TRP_KLEN = _TRP_KLEN
-    QUAD_KLEN = _QUAD_KLEN
-
-ctypedef unsigned char Key[KLEN]
-ctypedef unsigned char DoubleKey[DBL_KLEN]
-ctypedef unsigned char TripleKey[TRP_KLEN]
-ctypedef unsigned char QuadKey[QUAD_KLEN]
-
 cdef:
 cdef:
-    unsigned char KEY_START = _KEY_START
-    unsigned char FIRST_KEY[KLEN]
+    enum:
+        IDX_OP_ADD = 1
+        IDX_OP_REMOVE = -1
+
     unsigned char lookup_rank[3]
     unsigned char lookup_rank[3]
     unsigned char lookup_ordering[3][3]
     unsigned char lookup_ordering[3][3]
     unsigned char lookup_ordering_2bound[3][3]
     unsigned char lookup_ordering_2bound[3][3]
 
 
 
 
+
 cdef class LmdbTriplestore(BaseLmdbStore):
 cdef class LmdbTriplestore(BaseLmdbStore):
     cpdef dict stats(self)
     cpdef dict stats(self)
     cpdef size_t _len(self, context=*) except -1
     cpdef size_t _len(self, context=*) except -1
@@ -62,27 +28,28 @@ cdef class LmdbTriplestore(BaseLmdbStore):
     cpdef void _remove(self, tuple triple_pattern, context=*) except *
     cpdef void _remove(self, tuple triple_pattern, context=*) except *
     cpdef void _remove_graph(self, object gr_uri) except *
     cpdef void _remove_graph(self, object gr_uri) except *
     cpdef tuple all_namespaces(self)
     cpdef tuple all_namespaces(self)
-    cpdef tuple all_contexts(self, triple=*)
     cpdef SimpleGraph graph_lookup(
     cpdef SimpleGraph graph_lookup(
         self, triple_pattern, context=*, uri=*, copy=*
         self, triple_pattern, context=*, uri=*, copy=*
     )
     )
 
 
     cdef:
     cdef:
-        void _add_graph(self, Buffer *pk_gr) except *
-        void _index_triple(self, str op, TripleKey spok) except *
+        void _add_graph(self, Buffer* pk_gr) except *
+        void _index_triple(self, int op, TripleKey spok) except *
         Keyset triple_keys(self, tuple triple_pattern, context=*)
         Keyset triple_keys(self, tuple triple_pattern, context=*)
-        Keyset _all_term_keys(self, term_type)
-        inline void lookup_term(self, const Key key, Buffer* data) except *
+        void _all_term_keys(self, term_type, cc.HashSet** tkeys) except *
+        void lookup_term(self, const Key* tk, Buffer* data) except *
         Keyset _lookup(self, tuple triple_pattern)
         Keyset _lookup(self, tuple triple_pattern)
-        Keyset _lookup_1bound(self, unsigned char idx, term)
+        Keyset _lookup_1bound(self, unsigned char idx, Key luk)
         Keyset _lookup_2bound(
         Keyset _lookup_2bound(
-                self, unsigned char idx1, term1, unsigned char idx2, term2)
-        object from_key(self, const Key key)
-        tuple from_trp_key(self, TripleKey key)
-        inline void _to_key(self, term, Key *key) except *
-        inline void _to_triple_key(self, tuple terms, TripleKey *tkey) except *
-        void _append(
-                self, Buffer *value, Key *nkey,
+            self, unsigned char idx1, unsigned char idx2, DoubleKey tks
+        )
+        object from_key(self, const Key tk)
+        Key _to_key_idx(self, term) except -1
+        void all_contexts(self, Key** ctx, size_t* sz, triple=*) except *
+        Key _append(
+                self, Buffer *value,
                 unsigned char *dblabel=*, lmdb.MDB_txn *txn=*,
                 unsigned char *dblabel=*, lmdb.MDB_txn *txn=*,
-                unsigned int flags=*) except *
-        void _next_key(self, const Key key, Key *nkey) except *
+                unsigned int flags=*)
+
+        #Key bytes_to_idx(self, const unsigned char* bs)
+        #unsigned char* idx_to_bytes(Key idx)

File diff suppressed because it is too large
+ 318 - 287
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx


+ 106 - 29
lakesuperior/util/benchmark.py

@@ -1,11 +1,14 @@
 #!/usr/bin/env python3
 #!/usr/bin/env python3
 
 
+import logging
 import sys
 import sys
 
 
+from os import path
 from uuid import uuid4
 from uuid import uuid4
 
 
 import arrow
 import arrow
 import click
 import click
+import rdflib
 import requests
 import requests
 
 
 from matplotlib import pyplot as plt
 from matplotlib import pyplot as plt
@@ -17,16 +20,32 @@ __doc__ = '''
 Benchmark script to measure write performance.
 Benchmark script to measure write performance.
 '''
 '''
 
 
+def_mode = 'ldp'
 def_endpoint = 'http://localhost:8000/ldp'
 def_endpoint = 'http://localhost:8000/ldp'
 def_ct = 10000
 def_ct = 10000
 def_parent = '/pomegranate'
 def_parent = '/pomegranate'
 def_gr_size = 200
 def_gr_size = 200
 
 
+logging.disable(logging.WARN)
+
 
 
 @click.command()
 @click.command()
+@click.option(
+    '--mode', '-m', default=def_mode,
+    help=(
+        'Mode of ingestion. One of `ldp`, `python`. With the former, the '
+        'HTTP/LDP web server is used. With the latter, the Python API is '
+        'used, in which case the server need not be running. '
+        f'Default: {def_endpoint}'
+    )
+)
 @click.option(
 @click.option(
     '--endpoint', '-e', default=def_endpoint,
     '--endpoint', '-e', default=def_endpoint,
-    help=f'LDP endpoint. Default: {def_endpoint}')
+    help=(
+        'LDP endpoint. Only meaningful with `ldp` mode. '
+        f'Default: {def_endpoint}'
+    )
+)
 @click.option(
 @click.option(
     '--count', '-c', default=def_ct,
     '--count', '-c', default=def_ct,
     help='Number of resources to ingest. Default: {def_ct}')
     help='Number of resources to ingest. Default: {def_ct}')
@@ -40,9 +59,12 @@ def_gr_size = 200
     help='Delete container resource and its children if already existing. By '
     help='Delete container resource and its children if already existing. By '
     'default, the container is not deleted and new resources are added to it.')
     'default, the container is not deleted and new resources are added to it.')
 @click.option(
 @click.option(
-    '--method', '-m', default='put',
-    help='HTTP method to use. Case insensitive. Either PUT '
-    f'or POST. Default: PUT')
+    '--method', '-X', default='put',
+    help=(
+        'HTTP method to use. Case insensitive. Either PUT or POST. '
+        'Default: PUT'
+    )
+)
 @click.option(
 @click.option(
     '--graph-size', '-s', default=def_gr_size,
     '--graph-size', '-s', default=def_gr_size,
     help=f'Number of triples in each graph. Default: {def_gr_size}')
     help=f'Number of triples in each graph. Default: {def_gr_size}')
@@ -52,47 +74,73 @@ def_gr_size = 200
     '`n` (only  LDP-NR, i.e. binaries), or `b` (50/50% of both). '
     '`n` (only  LDP-NR, i.e. binaries), or `b` (50/50% of both). '
     'Default: r')
     'Default: r')
 @click.option(
 @click.option(
-    '--graph', '-g', is_flag=True, help='Plot a graph of ingest timings. '
+    '--plot', '-P', is_flag=True, help='Plot a graph of ingest timings. '
     'The graph figure is displayed on screen with basic manipulation and save '
     'The graph figure is displayed on screen with basic manipulation and save '
     'options.')
     'options.')
 
 
 def run(
 def run(
-        endpoint, count, parent, method, delete_container,
-        graph_size, resource_type, graph):
-
-    container_uri = endpoint + parent
+    mode, endpoint, count, parent, method, delete_container,
+    graph_size, resource_type, plot
+):
+    """
+    Run the benchmark.
+    """
 
 
     method = method.lower()
     method = method.lower()
     if method not in ('post', 'put'):
     if method not in ('post', 'put'):
-        raise ValueError(f'HTTP method not supported: {method}')
+        raise ValueError(f'Insertion method not supported: {method}')
+
+    mode = mode.lower()
+    if mode == 'ldp':
+        parent = '{}/{}'.format(endpoint.strip('/'), parent.strip('/'))
+
+        if delete_container:
+            requests.delete(parent, headers={'prefer': 'no-tombstone'})
+        requests.put(parent)
 
 
-    if delete_container:
-        requests.delete(container_uri, headers={'prefer': 'no-tombstone'})
-    requests.put(container_uri)
+    elif mode == 'python':
+        from lakesuperior import env_setup
+        from lakesuperior.api import resource as rsrc_api
+
+        if delete_container:
+            rsrc_api.delete(parent, soft=False)
+        rsrc_api.create_or_replace(parent)
+    else:
+        raise ValueError(f'Mode not supported: {mode}')
 
 
-    print(f'Inserting {count} children under {container_uri}.')
 
 
     # URI used to establish an in-repo relationship. This is set to
     # URI used to establish an in-repo relationship. This is set to
     # the most recently created resource in each loop.
     # the most recently created resource in each loop.
-    ref = container_uri
+    ref = parent
+
+    print(f'Inserting {count} children under {parent}.')
 
 
     wclock_start = arrow.utcnow()
     wclock_start = arrow.utcnow()
-    if graph:
+    if plot:
         print('Results will be plotted.')
         print('Results will be plotted.')
         # Plot coordinates: X is request count, Y is request timing.
         # Plot coordinates: X is request count, Y is request timing.
         px = []
         px = []
         py = []
         py = []
         plt.xlabel('Requests')
         plt.xlabel('Requests')
         plt.ylabel('ms per request')
         plt.ylabel('ms per request')
-        plt.title('FCREPO Benchmark')
+        plt.title('Lakesuperior / FCREPO Benchmark')
 
 
     try:
     try:
         for i in range(1, count + 1):
         for i in range(1, count + 1):
-            url = '{}/{}'.format(container_uri, uuid4()) if method == 'put' \
-                    else container_uri
+            #import pdb; pdb.set_trace()
+            if mode == 'ldp':
+                dest = (
+                    f'{parent}/{uuid4()}' if method == 'put'
+                    else parent
+                )
+            else:
+                dest = (
+                    path.join(parent, str(uuid4()))
+                    if method == 'put' else parent
+                )
 
 
             if resource_type == 'r' or (resource_type == 'b' and i % 2 == 0):
             if resource_type == 'r' or (resource_type == 'b' and i % 2 == 0):
-                data = random_graph(graph_size, ref).serialize(format='ttl')
+                data = random_graph(graph_size, ref)
                 headers = {'content-type': 'text/turtle'}
                 headers = {'content-type': 'text/turtle'}
             else:
             else:
                 img = random_image(name=uuid4(), ts=16, ims=512)
                 img = random_image(name=uuid4(), ts=16, ims=512)
@@ -103,19 +151,21 @@ def run(
                         'content-disposition': 'attachment; filename="{}"'
                         'content-disposition': 'attachment; filename="{}"'
                             .format(uuid4())}
                             .format(uuid4())}
 
 
-            #import pdb; pdb.set_trace()
             # Start timing after generating the data.
             # Start timing after generating the data.
             ckpt = arrow.utcnow()
             ckpt = arrow.utcnow()
             if i == 1:
             if i == 1:
                 tcounter = ckpt - ckpt
                 tcounter = ckpt - ckpt
                 prev_tcounter = tcounter
                 prev_tcounter = tcounter
 
 
-            rsp = requests.request(method, url, data=data, headers=headers)
-            tdelta = arrow.utcnow() - ckpt
-            tcounter += tdelta
+            ref = (
+                _ingest_graph_ldp(
+                    method, dest, data.serialize(format='ttl'), headers, ref
+                )
+                if mode == 'ldp'
+                else _ingest_graph_py(method, dest, data, ref)
+            )
+            tcounter += (arrow.utcnow() - ckpt)
 
 
-            rsp.raise_for_status()
-            ref = rsp.headers['location']
             if i % 10 == 0:
             if i % 10 == 0:
                 avg10 = (tcounter - prev_tcounter) / 10
                 avg10 = (tcounter - prev_tcounter) / 10
                 print(
                 print(
@@ -123,7 +173,7 @@ def run(
                     f'Per resource: {avg10}')
                     f'Per resource: {avg10}')
                 prev_tcounter = tcounter
                 prev_tcounter = tcounter
 
 
-                if graph:
+                if plot:
                     px.append(i)
                     px.append(i)
                     # Divide by 1000 for µs → ms
                     # Divide by 1000 for µs → ms
                     py.append(avg10.microseconds // 1000)
                     py.append(avg10.microseconds // 1000)
@@ -136,7 +186,7 @@ def run(
     print(f'Total time spent ingesting resources: {tcounter}')
     print(f'Total time spent ingesting resources: {tcounter}')
     print(f'Average time per resource: {tcounter.total_seconds()/i}')
     print(f'Average time per resource: {tcounter.total_seconds()/i}')
 
 
-    if graph:
+    if plot:
         if resource_type == 'r':
         if resource_type == 'r':
             type_label = 'LDP-RS'
             type_label = 'LDP-RS'
         elif resource_type == 'n':
         elif resource_type == 'n':
@@ -144,12 +194,39 @@ def run(
         else:
         else:
             type_label = 'LDP-RS + LDP-NR'
             type_label = 'LDP-RS + LDP-NR'
         label = (
         label = (
-            f'{container_uri}; {method.upper()}; {graph_size} trp/graph; '
+            f'{parent}; {method.upper()}; {graph_size} trp/graph; '
             f'{type_label}')
             f'{type_label}')
         plt.plot(px, py, label=label)
         plt.plot(px, py, label=label)
         plt.legend()
         plt.legend()
         plt.show()
         plt.show()
 
 
 
 
+def _ingest_graph_ldp(method, uri, data, headers, ref):
+    """
+    Ingest the graph via HTTP/LDP.
+    """
+    rsp = requests.request(method, uri, data=data, headers=headers)
+    rsp.raise_for_status()
+    return rsp.headers['location']
+
+
+def _ingest_graph_py(method, dest, data, ref):
+    from lakesuperior.api import resource as rsrc_api
+
+    kwargs = {}
+    if isinstance(data, rdflib.Graph):
+        kwargs['graph'] = data
+    else:
+        kwargs['stream'] = data
+        kwargs['mimetype'] = 'image/png'
+
+    if method == 'put':
+        _, rsrc = rsrc_api.create_or_replace(dest, **kwargs)
+    else:
+        _, rsrc = rsrc_api.create(dest, **kwargs)
+
+    return rsrc.uid
+
+
 if __name__ == '__main__':
 if __name__ == '__main__':
     run()
     run()

+ 2 - 1
requirements_dev.txt

@@ -1,5 +1,5 @@
 CoilMQ>=1.0.1
 CoilMQ>=1.0.1
-Cython==0.29
+Cython==0.29.6
 Flask>=0.12.2
 Flask>=0.12.2
 HiYaPyCo>=0.4.11
 HiYaPyCo>=0.4.11
 Pillow>=4.3.0
 Pillow>=4.3.0
@@ -9,6 +9,7 @@ click-log>=0.2.1
 click>=6.7
 click>=6.7
 gevent>=1.3.6
 gevent>=1.3.6
 gunicorn>=19.7.1
 gunicorn>=19.7.1
+matplotlib
 numpy>=1.15.1
 numpy>=1.15.1
 pytest-flask
 pytest-flask
 pytest>=3.2.2
 pytest>=3.2.2

+ 11 - 3
setup.py

@@ -16,7 +16,7 @@ from os import path
 import lakesuperior
 import lakesuperior
 
 
 # Use this version to build C files from .pyx sources.
 # Use this version to build C files from .pyx sources.
-CYTHON_VERSION='0.29'
+CYTHON_VERSION='0.29.6'
 
 
 KLEN = 5 # TODO Move somewhere else (config?)
 KLEN = 5 # TODO Move somewhere else (config?)
 
 
@@ -91,8 +91,8 @@ extensions = [
             path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'array.c'),
             path.join(coll_src_dir, 'array.c'),
-            path.join(coll_src_dir, 'hashset.c'),
             path.join(coll_src_dir, 'hashtable.c'),
             path.join(coll_src_dir, 'hashtable.c'),
+            path.join(coll_src_dir, 'hashset.c'),
             path.join('lakesuperior', 'model', 'structures', f'*.{ext}'),
             path.join('lakesuperior', 'model', 'structures', f'*.{ext}'),
         ],
         ],
         include_dirs=include_dirs,
         include_dirs=include_dirs,
@@ -108,8 +108,8 @@ extensions = [
             path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'array.c'),
             path.join(coll_src_dir, 'array.c'),
-            path.join(coll_src_dir, 'hashset.c'),
             path.join(coll_src_dir, 'hashtable.c'),
             path.join(coll_src_dir, 'hashtable.c'),
+            path.join(coll_src_dir, 'hashset.c'),
             path.join('lakesuperior', 'model', 'graph', f'*.{ext}'),
             path.join('lakesuperior', 'model', 'graph', f'*.{ext}'),
         ],
         ],
         include_dirs=include_dirs,
         include_dirs=include_dirs,
@@ -119,6 +119,10 @@ extensions = [
     Extension(
     Extension(
         'lakesuperior.store.base_lmdb_store',
         'lakesuperior.store.base_lmdb_store',
         [
         [
+            path.join(coll_src_dir, 'common.c'),
+            path.join(coll_src_dir, 'array.c'),
+            path.join(coll_src_dir, 'hashtable.c'),
+            path.join(coll_src_dir, 'hashset.c'),
             path.join(tpl_src_dir, 'tpl.c'),
             path.join(tpl_src_dir, 'tpl.c'),
             path.join(lmdb_src_dir, 'mdb.c'),
             path.join(lmdb_src_dir, 'mdb.c'),
             path.join(lmdb_src_dir, 'midl.c'),
             path.join(lmdb_src_dir, 'midl.c'),
@@ -129,6 +133,10 @@ extensions = [
     Extension(
     Extension(
         'lakesuperior.store.ldp_rs.lmdb_triplestore',
         'lakesuperior.store.ldp_rs.lmdb_triplestore',
         [
         [
+            path.join(coll_src_dir, 'common.c'),
+            path.join(coll_src_dir, 'array.c'),
+            path.join(coll_src_dir, 'hashtable.c'),
+            path.join(coll_src_dir, 'hashset.c'),
             path.join(lmdb_src_dir, 'mdb.c'),
             path.join(lmdb_src_dir, 'mdb.c'),
             path.join(lmdb_src_dir, 'midl.c'),
             path.join(lmdb_src_dir, 'midl.c'),
             path.join(
             path.join(

+ 43 - 7
tests/1_store/test_lmdb_store.py

@@ -258,6 +258,42 @@ class TestBasicOps:
 
 
 
 
 
 
+@pytest.mark.usefixtures('store', 'bogus_trp')
+class TestExtendedOps:
+    '''
+    Test additional store operations.
+    '''
+
+    def test_all_terms(self, store, bogus_trp):
+        """
+        Test the "all terms" mehods.
+        """
+        with store.txn_ctx(True):
+            for trp in bogus_trp:
+                store.add(trp)
+
+        with store.txn_ctx():
+            all_s = store.all_terms('s')
+            all_p = store.all_terms('p')
+            all_o = store.all_terms('o')
+
+        assert len(all_s) == 1
+        assert len(all_p) == 100
+        assert len(all_o) == 1000
+
+        assert URIRef('urn:test_mp:s1') in all_s
+        assert URIRef('urn:test_mp:s1') not in all_p
+        assert URIRef('urn:test_mp:s1') not in all_o
+
+        assert URIRef('urn:test_mp:p10') not in all_s
+        assert URIRef('urn:test_mp:p10') in all_p
+        assert URIRef('urn:test_mp:p10') not in all_o
+
+        assert URIRef('urn:test_mp:o99') not in all_s
+        assert URIRef('urn:test_mp:o99') not in all_p
+        assert URIRef('urn:test_mp:o99') in all_o
+
+
 @pytest.mark.usefixtures('store', 'bogus_trp')
 @pytest.mark.usefixtures('store', 'bogus_trp')
 class TestEntryCount:
 class TestEntryCount:
     '''
     '''
@@ -649,7 +685,7 @@ class TestContext:
 
 
         with store.txn_ctx(True):
         with store.txn_ctx(True):
             store.add_graph(gr_uri)
             store.add_graph(gr_uri)
-            assert gr_uri in {gr.uri for gr in store.contexts()}
+            assert gr_uri in store.contexts()
 
 
 
 
     def test_add_graph_with_triple(self, store):
     def test_add_graph_with_triple(self, store):
@@ -664,7 +700,7 @@ class TestContext:
             store.add(trp, ctx_uri)
             store.add(trp, ctx_uri)
 
 
         with store.txn_ctx():
         with store.txn_ctx():
-            assert ctx_uri in {gr.uri for gr in store.contexts(trp)}
+            assert ctx_uri in store.contexts(trp)
 
 
 
 
     def test_empty_context(self, store):
     def test_empty_context(self, store):
@@ -675,10 +711,10 @@ class TestContext:
 
 
         with store.txn_ctx(True):
         with store.txn_ctx(True):
             store.add_graph(gr_uri)
             store.add_graph(gr_uri)
-            assert gr_uri in {gr.uri for gr in store.contexts()}
+            assert gr_uri in store.contexts()
         with store.txn_ctx(True):
         with store.txn_ctx(True):
             store.remove_graph(gr_uri)
             store.remove_graph(gr_uri)
-            assert gr_uri not in {gr.uri for gr in store.contexts()}
+            assert gr_uri not in store.contexts()
 
 
 
 
     def test_context_ro_txn(self, store):
     def test_context_ro_txn(self, store):
@@ -698,10 +734,10 @@ class TestContext:
         # allow a lookup in the same transaction, but this does not seem to be
         # allow a lookup in the same transaction, but this does not seem to be
         # possible.
         # possible.
         with store.txn_ctx():
         with store.txn_ctx():
-            assert gr_uri in {gr.uri for gr in store.contexts()}
+            assert gr_uri in store.contexts()
         with store.txn_ctx(True):
         with store.txn_ctx(True):
             store.remove_graph(gr_uri)
             store.remove_graph(gr_uri)
-            assert gr_uri not in {gr.uri for gr in store.contexts()}
+            assert gr_uri not in store.contexts()
 
 
 
 
     def test_add_trp_to_ctx(self, store):
     def test_add_trp_to_ctx(self, store):
@@ -732,7 +768,7 @@ class TestContext:
             assert len(set(store.triples((None, None, None), gr_uri))) == 3
             assert len(set(store.triples((None, None, None), gr_uri))) == 3
             assert len(set(store.triples((None, None, None), gr2_uri))) == 1
             assert len(set(store.triples((None, None, None), gr2_uri))) == 1
 
 
-            assert gr2_uri in {gr.uri for gr in store.contexts()}
+            assert gr2_uri in store.contexts()
             assert trp1 in _clean(store.triples((None, None, None)))
             assert trp1 in _clean(store.triples((None, None, None)))
             assert trp1 not in _clean(store.triples((None, None, None),
             assert trp1 not in _clean(store.triples((None, None, None),
                     RDFLIB_DEFAULT_GRAPH_URI))
                     RDFLIB_DEFAULT_GRAPH_URI))

Some files were not shown because too many files changed in this diff