瀏覽代碼

Merge branch 'refactor_dstructs' of github_scossu:scossu/lakesuperior into refactor_dstructs

Stefano Cossu 6 年之前
父節點
當前提交
9aa66247c4

+ 1 - 1
.travis.yml

@@ -10,7 +10,7 @@ matrix:
       sudo: true
 
 install:
-  - pip install Cython==0.29 cymem
+  - pip install Cython==0.29.6 cymem
   - pip install -e .
 script:
   - python setup.py test

+ 1 - 1
ext/lmdb

@@ -1 +1 @@
-Subproject commit 2a5eaad6919ce6941dec4f0d5cce370707a00ba7
+Subproject commit 5033a08c86fb6ef0adddabad327422a1c0c0069a

+ 0 - 88
lakesuperior/cy_include/calg.pxd

@@ -1,88 +0,0 @@
-cdef extern from 'set.h':
-    #ctypedef _Set Set
-    #ctypedef _SetEntry SetEntry
-    ctypedef void *SetValue
-
-    ctypedef unsigned int (*SetHashFunc)(SetValue value)
-    ctypedef bint (*SetEqualFunc)(SetValue value1, SetValue value2)
-    ctypedef void (*SetFreeFunc)(SetValue value)
-
-    ctypedef struct SetEntry:
-        SetValue data
-        SetEntry *next
-
-    ctypedef struct _Set:
-        SetEntry **table
-        unsigned int entries
-        unsigned int table_size
-        unsigned int prime_index
-        SetHashFunc hash_func
-        SetEqualFunc equal_func
-        SetFreeFunc free_func
-
-    ctypedef _Set Set
-
-    ctypedef struct SetIterator:
-        pass
-
-    Set *set_new(SetHashFunc hash_func, SetEqualFunc equal_func)
-    void set_free(Set *set)
-    # TODO This should return an int, ideally. See
-    # https://github.com/fragglet/c-algorithms/issues/20
-    bint set_insert(Set *set, SetValue data)
-    bint set_insert_or_assign(Set *set, SetValue *data)
-    bint set_query(Set *set, SetValue data)
-    bint set_enlarge(Set *set)
-    unsigned int set_num_entries(Set *set)
-    SetValue *set_to_array(Set *set)
-    Set *set_union(Set *set1, Set *set2)
-    Set *set_intersection(Set *set1, Set *set2)
-    void set_iterate(Set *set, SetIterator *iter)
-    bint set_iter_has_more(SetIterator *iterator)
-    SetValue set_iter_next(SetIterator *iterator)
-
-
-cdef extern from 'hash-table.h':
-    ctypedef void *HashTableKey
-    ctypedef void *HashTableValue
-
-    ctypedef struct HashTablePair:
-        HashTableKey key
-        HashTableKey value
-
-    ctypedef struct HashTableEntry:
-        HashTablePair pair
-        HashTableEntry *next
-
-    ctypedef struct HashTable:
-        HashTableEntry **table
-        unsigned int table_size
-        unsigned int entries
-        unsigned int prime_index
-
-    ctypedef struct HashTableIterator:
-        pass
-
-    ctypedef unsigned int (*HashTableHashFunc)(HashTableKey value)
-    ctypedef bint (*HashTableEqualFunc)(
-            HashTableKey value1, HashTableKey value2)
-    ctypedef void (*HashTableKeyFreeFunc)(HashTableKey value)
-    ctypedef void (*HashTableValueFreeFunc)(HashTableValue value)
-
-
-    HashTable *hash_table_new(
-            HashTableHashFunc hash_func, HashTableEqualFunc equal_func)
-    void hash_table_free(HashTable *hash_table)
-    void hash_table_register_free_functions(
-            HashTable *hash_table, HashTableKeyFreeFunc key_free_func,
-            HashTableValueFreeFunc value_free_func)
-    int hash_table_insert(
-            HashTable *hash_table, HashTableKey key, HashTableValue value)
-    HashTableValue hash_table_lookup(
-            HashTable *hash_table, HashTableKey key)
-    bint hash_table_remove(HashTable *hash_table, HashTableKey key)
-    unsigned int hash_table_num_entries(HashTable *hash_table)
-    void hash_table_iterate(HashTable *hash_table, HashTableIterator *iter)
-    bint hash_table_iter_has_more(HashTableIterator *iterator)
-    HashTablePair hash_table_iter_next(HashTableIterator *iterator)
-

+ 40 - 33
lakesuperior/cy_include/collections.pxd

@@ -9,7 +9,7 @@ ctypedef int (*key_compare_ft)(const void* key1, const void* key2)
 
 cdef extern from "common.h":
 
-    cdef enum cc_stat:
+    enum cc_stat:
         CC_OK
         CC_ERR_ALLOC
         CC_ERR_INVALID_CAPACITY
@@ -19,47 +19,50 @@ cdef extern from "common.h":
         CC_ERR_VALUE_NOT_FOUND
         CC_ERR_OUT_OF_RANGE
         CC_ITER_END
+
+    key_compare_ft CC_CMP_STRING
+    key_compare_ft CC_CMP_POINTER
 #
 #    int cc_common_cmp_str(const void* key1, const void* key2)
 #
 #    int cc_common_cmp_ptr(const void* key1, const void* key2)
 
-#cdef extern from "array.h":
+cdef extern from "array.h":
+
+    ctypedef struct Array:
+        pass
+
+    ctypedef struct ArrayConf:
+        size_t          capacity
+        float           exp_factor
+        mem_alloc_ft  mem_alloc
+        mem_calloc_ft mem_calloc
+        mem_free_ft   mem_free
+
+    ctypedef struct ArrayIter:
+        Array* ar
+        size_t index
+        bint last_removed
 
-#    ctypedef struct Array:
-#        pass
-#
-#    ctypedef struct ArrayConf:
-#        size_t          capacity
-#        float           exp_factor
-#        mem_alloc_ft  mem_alloc
-#        mem_calloc_ft mem_calloc
-#        mem_free_ft   mem_free
-#
-#    ctypedef struct ArrayIter:
-#        Array* ar
-#        size_t index
-#        bint last_removed
-#
 #    ctypedef struct ArrayZipIter:
 #        Array* ar1
 #        Array* ar2
 #        size_t index
 #        bint last_removed
 #
-#    cc_stat array_new(Array** out)
-#
-#    cc_stat array_new_conf(ArrayConf* conf, Array** out)
-#
-#    void array_conf_init(ArrayConf* conf)
-#
-#    void array_destroy(Array* ar)
-#
+    cc_stat array_new(Array** out)
+
+    cc_stat array_new_conf(ArrayConf* conf, Array** out)
+
+    void array_conf_init(ArrayConf* conf)
+
+    void array_destroy(Array* ar)
+
 #    ctypedef void (*_array_destroy_cb_cb_ft)(void*)
 #
 #    void array_destroy_cb(Array* ar, _array_destroy_cb_cb_ft cb)
 #
-#    #cc_stat array_add(Array* ar, void* element)
+    cc_stat array_add(Array* ar, void* element)
 #
 #    #cc_stat array_add_at(Array* ar, void* element, size_t index)
 #
@@ -125,9 +128,9 @@ cdef extern from "common.h":
 #
 #    cc_stat array_filter(Array* ar, _array_filter_predicate_ft predicate, Array** out)
 #
-#    void array_iter_init(ArrayIter* iter, Array* ar)
-#
-#    cc_stat array_iter_next(ArrayIter* iter, void** out)
+    void array_iter_init(ArrayIter* iter, Array* ar)
+
+    cc_stat array_iter_next(ArrayIter* iter, void** out)
 #
 #    cc_stat array_iter_remove(ArrayIter* iter, void** out)
 #
@@ -181,6 +184,10 @@ cdef extern from "hashtable.h":
         TableEntry* prev_entry
         TableEntry* next_entry
 
+    hash_ft GENERAL_HASH
+    hash_ft STRING_HASH
+    hash_ft POINTER_HASH
+
 #    size_t get_table_index(HashTable *table, void *key)
 #
 #    void hashtable_conf_init(HashTableConf* conf)
@@ -209,10 +216,10 @@ cdef extern from "hashtable.h":
 #
 #    cc_stat hashtable_get_values(HashTable* table, Array** out)
 #
-#    size_t hashtable_hash_string(void* key, int len, uint32_t seed)
-#
-#    size_t hashtable_hash(void* key, int len, uint32_t seed)
-#
+    size_t hashtable_hash_string(void* key, int len, uint32_t seed)
+
+    size_t hashtable_hash(void* key, int len, uint32_t seed)
+
     size_t hashtable_hash_ptr(void* key, int len, uint32_t seed)
 #
 #    ctypedef void (*_hashtable_foreach_key_op_ft)(void*)

+ 8 - 9
lakesuperior/model/base.pxd

@@ -1,20 +1,19 @@
-from lakesuperior.cy_include cimport cytpl as tpl
+cimport lakesuperior.cy_include.cytpl as tpl
 
 ctypedef tpl.tpl_bin Buffer
 
 # NOTE This may change in the future, e.g. if a different key size is to
 # be forced.
-ctypedef size_t KeyIdx
+ctypedef size_t Key
 
-ctypedef KeyIdx Key[1]
-ctypedef KeyIdx DoubleKey[2]
-ctypedef KeyIdx TripleKey[3]
-ctypedef KeyIdx QuadKey[4]
+ctypedef Key DoubleKey[2]
+ctypedef Key TripleKey[3]
+ctypedef Key QuadKey[4]
 
 cdef enum:
     KLEN = sizeof(Key)
-    DBL_KLEN = sizeof(DoubleKey)
-    TRP_KLEN = sizeof(TripleKey)
-    QUAD_KLEN = sizeof(QuadKey)
+    DBL_KLEN = 2 * sizeof(Key)
+    TRP_KLEN = 3 * sizeof(Key)
+    QUAD_KLEN = 4 * sizeof(Key)
 
 cdef bytes buffer_dump(Buffer* buf)

+ 1 - 0
lakesuperior/model/graph/callbacks.pxd

@@ -13,6 +13,7 @@ cdef:
     int trp_cmp_fn(const void* key1, const void* key2)
     size_t term_hash_fn(const void* key, int l, uint32_t seed)
     size_t trp_hash_fn(const void* key, int l, uint32_t seed)
+
     bint lookup_none_cmp_fn(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2
     )

+ 3 - 2
lakesuperior/model/graph/callbacks.pyx

@@ -3,8 +3,9 @@ import logging
 from libc.stdint cimport uint32_t, uint64_t
 from libc.string cimport memcmp
 
-from lakesuperior.cy_include cimport collections as cc
-from lakesuperior.cy_include cimport spookyhash as sph
+cimport lakesuperior.cy_include.collections as cc
+cimport lakesuperior.cy_include.spookyhash as sph
+
 from lakesuperior.model.base cimport Buffer, buffer_dump
 from lakesuperior.model.graph cimport graph
 from lakesuperior.model.graph.triple cimport BufferTriple

+ 2 - 1
lakesuperior/model/graph/graph.pxd

@@ -2,7 +2,8 @@ from libc.stdint cimport uint32_t, uint64_t
 
 from cymem.cymem cimport Pool
 
-from lakesuperior.cy_include cimport collections as cc
+cimport lakesuperior.cy_include.collections as cc
+
 from lakesuperior.model.base cimport Buffer
 from lakesuperior.model.graph.triple cimport BufferTriple
 

+ 3 - 2
lakesuperior/model/graph/graph.pyx

@@ -12,9 +12,10 @@ from libc.stdlib cimport free
 
 from cymem.cymem cimport Pool
 
-from lakesuperior.cy_include cimport collections as cc
+cimport lakesuperior.cy_include.collections as cc
+cimport lakesuperior.model.graph.callbacks as cb
+
 from lakesuperior.model.base cimport Buffer, buffer_dump
-from lakesuperior.model.graph cimport callbacks as cb
 from lakesuperior.model.graph cimport term
 from lakesuperior.model.graph.triple cimport BufferTriple
 from lakesuperior.model.structures.hash cimport term_hash_seed32

+ 21 - 0
lakesuperior/model/structures/callbacks.pxd

@@ -0,0 +1,21 @@
+from lakesuperior.model.base cimport Key, TripleKey
+
+cdef:
+    bint lookup_sk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_pk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_ok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_skpk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_skok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )
+    bint lookup_pkok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    )

+ 33 - 0
lakesuperior/model/structures/callbacks.pyx

@@ -0,0 +1,33 @@
+from lakesuperior.model.base cimport Key, TripleKey
+
+cdef bint lookup_sk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[0] == k1
+
+cdef bint lookup_pk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[1] == k1
+
+cdef bint lookup_ok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[2] == k1
+
+cdef bint lookup_skpk_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[0] == k1 and spok[1] == k2
+
+cdef bint lookup_skok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[0] == k1 and spok[2] == k2
+
+cdef bint lookup_pkok_cmp_fn(
+        const TripleKey* spok, const Key* k1, const Key* k2
+    ):
+    return spok[1] == k1 and spok[2] == k2
+
+

+ 21 - 22
lakesuperior/model/structures/keyset.pxd

@@ -1,27 +1,26 @@
-from lakesuperior.cy_includes cimport collections as cc
 from lakesuperior.model.base cimport (
-    KeyIdx, Key, DoubleKey, TripleKey, Buffer
+    Key, Key, DoubleKey, TripleKey, Buffer
 )
-cdef class BaseKeyset:
-    cdef:
-        readonly cc.Array data
-        readonly size_t ct, size
-        size_t _cur
-        cc.ArrayConf conf
-
-        void resize(self, size_t ct) except *
-        unsigned char *get_item(self, i)
-        bint iter_next(self, unsigned char** val)
-        bint contains(self, const void *val)
-
-
-cdef class Keyset(BaseKeyset):
-    cdef size_t get_itemsize()
 
+ctypedef bint (*key_cmp_fn_t)(
+    const TripleKey* spok, const Key* k1, const Key* k2
+)
 
-cdef class DoubleKeyset(BaseKeyset):
-    cdef size_t get_itemsize()
-
+cdef class Keyset:
+    cdef:
+        TripleKey* data
+        size_t ct
+        size_t _cur # Index cursor used to look up values.
+        size_t _free_i # Index of next free slot.
 
-cdef class TripleKeyset(BaseKeyset):
-    cdef size_t get_itemsize()
+        void seek(self, size_t idx=*)
+        size_t tell(self)
+        bint get_at(self, size_t i, TripleKey* item)
+        bint get_next(self, TripleKey* item)
+        void add(self, const TripleKey* val) except *
+        bint contains(self, const TripleKey* val)
+        Keyset copy(self)
+        void resize(self, size_t size=*) except *
+        Keyset lookup(
+            self, const Key* sk, const Key* pk, const Key* ok
+        )

+ 142 - 60
lakesuperior/model/structures/keyset.pyx

@@ -1,50 +1,34 @@
-from libc.string cimport memcmp
+import logging
+
+from libc.string cimport memcmp, memcpy
 from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
 
-from lakesuperior.cy_includes cimport collections as cc
-from lakesuperior.model.base cimport (
-    KeyIdx, Key, DoubleKey, TripleKey, Buffer
-)
+cimport lakesuperior.model.structures.callbacks as cb
+
+from lakesuperior.model.base cimport TripleKey, TRP_KLEN
 
-cdef class BaseKeyset:
-    """
-    Pre-allocated result set.
 
-    Data in the set are stored as a 1D contiguous array of characters.
-    Access to elements at an arbitrary index position is achieved by using the
-    ``itemsize`` property multiplied by the index number.
+logger = logging.getLogger(__name__)
 
-    Key properties:
 
-    ``ct``: number of elements in the set.
-    ``itemsize``: size of each element, in bytes. All elements have the same
-        size.
-    ``size``: Total size, in bytes, of the data set. This is the product of
-        ``itemsize`` and ``ct``.
+cdef class Keyset:
     """
-    def __cinit__(self, size_t ct):
+    Pre-allocated array (not set, as the name may suggest) of ``TripleKey``s.
+    """
+    def __cinit__(self, size_t ct=0):
         """
         Initialize and allocate memory for the data set.
 
         :param size_t ct: Number of elements to be accounted for.
         """
-        self.conf.capacity = ct
-        self.itemsize = self.get_itemsize() # Set this in concrete classes
-        self.size = self.itemsize * self.conf.capacity
+        self.ct = ct
+        self.data = <TripleKey*>PyMem_Malloc(self.ct * TRP_KLEN)
+        logger.info(f'data address: 0x{<size_t>self.data:02x}')
+        if ct and not self.data:
+            raise MemoryError('Error allocating Keyset data.')
 
-        cc.array_conf_init(&self.conf)
-        self.conf.capacity = self.conf.capacity
-        cc.array_init_conf(&self.data
-        if not self.data:
-            raise MemoryError()
         self._cur = 0
-
-        #logger.debug('Got malloc sizes: {}, {}'.format(ct, itemsize))
-        #logger.debug(
-        #    'Allocating {0} ({1}x{2}) bytes of Keyset data...'.format(
-        #        self.size, self.conf.capacity, self.itemsize))
-        #logger.debug('...done allocating @ {0:x}.'.format(
-        #        <unsigned long>self.data))
+        self._free_i = 0
 
 
     def __dealloc__(self):
@@ -64,33 +48,36 @@ cdef class BaseKeyset:
 
     # Access methods.
 
-    def iter_init(self):
+    cdef void seek(self, size_t idx=0):
         """
-        Reset the cursor to the initial position.
+        Place the cursor at a certain index, 0 by default.
         """
-        self._cur = 0
+        self._cur = idx
 
 
-    def tell(self):
+    cdef size_t tell(self):
         """
         Tell the position of the cursor in the keyset.
         """
         return self._cur
 
 
-    cdef unsigned char *get_item(self, i):
+    cdef bint get_at(self, size_t i, TripleKey* item):
         """
         Get an item at a given index position. Cython-level method.
 
-        The item size is known by the ``itemsize`` property of the object.
-
-        :rtype: unsigned char*
+        :rtype: TripleKey
         """
+        if i >= self._free_i:
+            return False
+
         self._cur = i
-        return self.data + self.itemsize * i
+        item[0] = self.data[i]
+
+        return True
 
 
-    cdef bint iter_next(self, unsigned char** val):
+    cdef bint get_next(self, TripleKey* item):
         """
         Populate the current value and advance the cursor by 1.
 
@@ -101,39 +88,134 @@ cdef class BaseKeyset:
         :return: True if a value was found, False if the end of the buffer
             has been reached.
         """
-        if self._cur >= self.conf.capacity:
-            val = NULL
+        if self._cur >= self._free_i:
             return False
 
-        val[0] = self.data + self.itemsize * self._cur
+        item[0] = self.data[self._cur]
         self._cur += 1
 
         return True
 
 
-    cdef bint contains(self, const void *val):
+    cdef void add(self, const TripleKey* val) except *:
+        """
+        Add a triple key to the array.
+        """
+        if self._free_i >= self.ct:
+            raise MemoryError('No slots left in key set.')
+
+        self.data[self._free_i] = val[0]
+
+        self._free_i += 1
+
+
+    cdef bint contains(self, const TripleKey* val):
         """
         Whether a value exists in the set.
         """
-        cdef unsigned char* stored_val
+        cdef TripleKey stored_val
 
-        self.iter_init()
-        while self.iter_next(&stored_val):
-            if memcmp(val, stored_val, self.itemsize) == 0:
+        self.seek()
+        while self.get_next(&stored_val):
+            if memcmp(val, stored_val, TRP_KLEN) == 0:
                 return True
         return False
 
 
-class Keyset(BaseKeyset):
-    cdef size_t get_itemsize():
-        return KLEN
+    cdef Keyset copy(self):
+        """
+        Copy a Keyset.
+        """
+        cdef Keyset new_ks = Keyset(self.ct)
+        memcpy(new_ks.data, self.data, self.ct * TRP_KLEN)
+        new_ks.seek()
+
+        return new_ks
+
+
+    cdef void resize(self, size_t size=0) except *:
+        """
+        Change the array capacity.
+
+        :param size_t size: The new capacity size. If not specified or 0, the
+            array is shrunk to the last used item. The resulting size
+            therefore will always be greater than 0. The only exception
+            to this is if the specified size is 0 and no items have been added
+            to the array, in which case the array will be effectively shrunk
+            to 0.
+        """
+        if not size:
+            size = self._free_i
+
+        tmp = <TripleKey*>PyMem_Realloc(self.data, size * TRP_KLEN)
+
+        if not tmp:
+            raise MemoryError('Could not reallocate Keyset data.')
+
+        self.data = tmp
+        self.ct = size
+        self.seek()
+
+
+    cdef Keyset lookup(
+            self, const Key* sk, const Key* pk, const Key* ok
+    ):
+        """
+        Look up triple keys.
+
+        This works in a similar way that the ``SimpleGraph`` and ``LmdbStore``
+        methods work.
+
+        Any and all the terms may be NULL. A NULL term is treated as unbound.
+
+        :param const Key* sk: s key pointer.
+        :param const Key* pk: p key pointer.
+        :param const Key* ok: o key pointer.
+        """
+        cdef:
+            TripleKey spok
+            Keyset ret = Keyset(self.ct)
+            Key* k1 = NULL
+            Key* k2 = NULL
+            key_cmp_fn_t cmp_fn
+
+        if sk and pk and ok: # s p o
+            pass # TODO
+
+        elif sk:
+            k1 = sk
+            if pk: # s p ?
+                k2 = pk
+                cmp_fn = cb.lookup_skpk_cmp_fn
+
+            elif ok: # s ? o
+                k2 = ok
+                cmp_fn = cb.lookup_skok_cmp_fn
+
+            else: # s ? ?
+                cmp_fn = cb.lookup_sk_cmp_fn
+
+        elif pk:
+            k1 = pk
+            if ok: # ? p o
+                k2 = ok
+                cmp_fn = cb.lookup_pkok_cmp_fn
+
+            else: # ? p ?
+                cmp_fn = cb.lookup_pk_cmp_fn
+
+        elif ok: # ? ? o
+            k1 = ok
+            cmp_fn = cb.lookup_ok_cmp_fn
 
+        else: # ? ? ?
+            return self.copy()
 
-class DoubleKeyset(BaseKeyset):
-    cdef size_t get_itemsize():
-        return DBL_KLEN
+        self.seek()
+        while self.get_next(&spok):
+            if cmp_fn(<TripleKey*>spok, k1, k2):
+                ret.add(&spok)
 
+        ret.resize()
 
-class TripleKeyset(BaseKeyset):
-    cdef size_t get_itemsize():
-        return TRP_KLEN
+        return ret

+ 2 - 1
lakesuperior/store/base_lmdb_store.pxd

@@ -1,4 +1,4 @@
-cimport lakesuperior.cy_include.cylmdb as lmdb
+from lakesuperior.cy_include cimport cylmdb as lmdb
 
 cdef:
     int rc
@@ -13,6 +13,7 @@ cdef:
 cdef class BaseLmdbStore:
     cdef:
         readonly bint is_txn_open
+        readonly bint is_txn_rw
         public bint _open
         unsigned int _readers
         readonly str env_path

+ 21 - 27
lakesuperior/store/ldp_rs/lmdb_triplestore.pxd

@@ -1,32 +1,25 @@
+cimport lakesuperior.cy_include.collections as cc
 cimport lakesuperior.cy_include.cylmdb as lmdb
 cimport lakesuperior.cy_include.cytpl as tpl
 
 from lakesuperior.model.base cimport (
-    KeyIdx, Key, DoubleKey, TripleKey, Buffer
+    Key, DoubleKey, TripleKey, Buffer
 )
 from lakesuperior.model.graph.graph cimport SimpleGraph
 from lakesuperior.model.structures.keyset cimport Keyset
 from lakesuperior.store.base_lmdb_store cimport BaseLmdbStore
 
-cdef enum:
-    IDX_OP_ADD = 1
-    IDX_OP_REMOVE = -1
-
-    INT_KEY_MASK = (
-        lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED | lmdb.MDB_INTEGERKEY
-        | lmdb.MDB_REVERSEKEY # TODO Check endianness.
-    )
-    INT_DUP_MASK = (
-        lmdb.MDB_DUPSORT | lmdb.MDB_DUPFIXED | lmdb.MDB_INTEGERDUP
-        | lmdb.MDB_REVERSEDUP # TODO Check endianness.
-    )
-
 cdef:
+    enum:
+        IDX_OP_ADD = 1
+        IDX_OP_REMOVE = -1
+
     unsigned char lookup_rank[3]
     unsigned char lookup_ordering[3][3]
     unsigned char lookup_ordering_2bound[3][3]
 
 
+
 cdef class LmdbTriplestore(BaseLmdbStore):
     cpdef dict stats(self)
     cpdef size_t _len(self, context=*) except -1
@@ -35,27 +28,28 @@ cdef class LmdbTriplestore(BaseLmdbStore):
     cpdef void _remove(self, tuple triple_pattern, context=*) except *
     cpdef void _remove_graph(self, object gr_uri) except *
     cpdef tuple all_namespaces(self)
-    cpdef tuple all_contexts(self, triple=*)
     cpdef SimpleGraph graph_lookup(
         self, triple_pattern, context=*, uri=*, copy=*
     )
 
     cdef:
-        void _add_graph(self, Buffer *pk_gr) except *
-        void _index_triple(self, str op, TripleKey spok) except *
+        void _add_graph(self, Buffer* pk_gr) except *
+        void _index_triple(self, int op, TripleKey spok) except *
         Keyset triple_keys(self, tuple triple_pattern, context=*)
-        Keyset _all_term_keys(self, term_type)
-        inline void lookup_term(self, const Key key, Buffer* data) except *
+        void _all_term_keys(self, term_type, cc.HashSet** tkeys) except *
+        void lookup_term(self, const Key* tk, Buffer* data) except *
         Keyset _lookup(self, tuple triple_pattern)
-        Keyset _lookup_1bound(self, unsigned char idx, term)
+        Keyset _lookup_1bound(self, unsigned char idx, Key luk)
         Keyset _lookup_2bound(
-                self, unsigned char idx1, term1, unsigned char idx2, term2)
-        object from_key(self, const Key key)
-        tuple from_trp_key(self, TripleKey key)
-        Key _to_key(self, term)
-        void _to_triple_key(
-                self, tuple terms, TripleKey *tkey) except *
-        KeyIdx _append(
+            self, unsigned char idx1, unsigned char idx2, DoubleKey tks
+        )
+        object from_key(self, const Key tk)
+        Key _to_key_idx(self, term) except -1
+        void all_contexts(self, Key** ctx, size_t* sz, triple=*) except *
+        Key _append(
                 self, Buffer *value,
                 unsigned char *dblabel=*, lmdb.MDB_txn *txn=*,
                 unsigned int flags=*)
+
+        #Key bytes_to_idx(self, const unsigned char* bs)
+        #unsigned char* idx_to_bytes(Key idx)

File diff suppressed because it is too large
+ 312 - 292
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx


+ 1 - 1
requirements_dev.txt

@@ -1,5 +1,5 @@
 CoilMQ>=1.0.1
-Cython==0.29
+Cython==0.29.6
 Flask>=0.12.2
 HiYaPyCo>=0.4.11
 Pillow>=4.3.0

+ 11 - 3
setup.py

@@ -16,7 +16,7 @@ from os import path
 import lakesuperior
 
 # Use this version to build C files from .pyx sources.
-CYTHON_VERSION='0.29'
+CYTHON_VERSION='0.29.6'
 
 KLEN = 5 # TODO Move somewhere else (config?)
 
@@ -91,8 +91,8 @@ extensions = [
             path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'array.c'),
-            path.join(coll_src_dir, 'hashset.c'),
             path.join(coll_src_dir, 'hashtable.c'),
+            path.join(coll_src_dir, 'hashset.c'),
             path.join('lakesuperior', 'model', 'structures', f'*.{ext}'),
         ],
         include_dirs=include_dirs,
@@ -108,8 +108,8 @@ extensions = [
             path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'array.c'),
-            path.join(coll_src_dir, 'hashset.c'),
             path.join(coll_src_dir, 'hashtable.c'),
+            path.join(coll_src_dir, 'hashset.c'),
             path.join('lakesuperior', 'model', 'graph', f'*.{ext}'),
         ],
         include_dirs=include_dirs,
@@ -119,6 +119,10 @@ extensions = [
     Extension(
         'lakesuperior.store.base_lmdb_store',
         [
+            path.join(coll_src_dir, 'common.c'),
+            path.join(coll_src_dir, 'array.c'),
+            path.join(coll_src_dir, 'hashtable.c'),
+            path.join(coll_src_dir, 'hashset.c'),
             path.join(tpl_src_dir, 'tpl.c'),
             path.join(lmdb_src_dir, 'mdb.c'),
             path.join(lmdb_src_dir, 'midl.c'),
@@ -129,6 +133,10 @@ extensions = [
     Extension(
         'lakesuperior.store.ldp_rs.lmdb_triplestore',
         [
+            path.join(coll_src_dir, 'common.c'),
+            path.join(coll_src_dir, 'array.c'),
+            path.join(coll_src_dir, 'hashtable.c'),
+            path.join(coll_src_dir, 'hashset.c'),
             path.join(lmdb_src_dir, 'mdb.c'),
             path.join(lmdb_src_dir, 'midl.c'),
             path.join(

+ 43 - 7
tests/1_store/test_lmdb_store.py

@@ -258,6 +258,42 @@ class TestBasicOps:
 
 
 
+@pytest.mark.usefixtures('store', 'bogus_trp')
+class TestExtendedOps:
+    '''
+    Test additional store operations.
+    '''
+
+    def test_all_terms(self, store, bogus_trp):
+        """
+        Test the "all terms" mehods.
+        """
+        with store.txn_ctx(True):
+            for trp in bogus_trp:
+                store.add(trp)
+
+        with store.txn_ctx():
+            all_s = store.all_terms('s')
+            all_p = store.all_terms('p')
+            all_o = store.all_terms('o')
+
+        assert len(all_s) == 1
+        assert len(all_p) == 100
+        assert len(all_o) == 1000
+
+        assert URIRef('urn:test_mp:s1') in all_s
+        assert URIRef('urn:test_mp:s1') not in all_p
+        assert URIRef('urn:test_mp:s1') not in all_o
+
+        assert URIRef('urn:test_mp:p10') not in all_s
+        assert URIRef('urn:test_mp:p10') in all_p
+        assert URIRef('urn:test_mp:p10') not in all_o
+
+        assert URIRef('urn:test_mp:o99') not in all_s
+        assert URIRef('urn:test_mp:o99') not in all_p
+        assert URIRef('urn:test_mp:o99') in all_o
+
+
 @pytest.mark.usefixtures('store', 'bogus_trp')
 class TestEntryCount:
     '''
@@ -649,7 +685,7 @@ class TestContext:
 
         with store.txn_ctx(True):
             store.add_graph(gr_uri)
-            assert gr_uri in {gr.uri for gr in store.contexts()}
+            assert gr_uri in store.contexts()
 
 
     def test_add_graph_with_triple(self, store):
@@ -664,7 +700,7 @@ class TestContext:
             store.add(trp, ctx_uri)
 
         with store.txn_ctx():
-            assert ctx_uri in {gr.uri for gr in store.contexts(trp)}
+            assert ctx_uri in store.contexts(trp)
 
 
     def test_empty_context(self, store):
@@ -675,10 +711,10 @@ class TestContext:
 
         with store.txn_ctx(True):
             store.add_graph(gr_uri)
-            assert gr_uri in {gr.uri for gr in store.contexts()}
+            assert gr_uri in store.contexts()
         with store.txn_ctx(True):
             store.remove_graph(gr_uri)
-            assert gr_uri not in {gr.uri for gr in store.contexts()}
+            assert gr_uri not in store.contexts()
 
 
     def test_context_ro_txn(self, store):
@@ -698,10 +734,10 @@ class TestContext:
         # allow a lookup in the same transaction, but this does not seem to be
         # possible.
         with store.txn_ctx():
-            assert gr_uri in {gr.uri for gr in store.contexts()}
+            assert gr_uri in store.contexts()
         with store.txn_ctx(True):
             store.remove_graph(gr_uri)
-            assert gr_uri not in {gr.uri for gr in store.contexts()}
+            assert gr_uri not in store.contexts()
 
 
     def test_add_trp_to_ctx(self, store):
@@ -732,7 +768,7 @@ class TestContext:
             assert len(set(store.triples((None, None, None), gr_uri))) == 3
             assert len(set(store.triples((None, None, None), gr2_uri))) == 1
 
-            assert gr2_uri in {gr.uri for gr in store.contexts()}
+            assert gr2_uri in store.contexts()
             assert trp1 in _clean(store.triples((None, None, None)))
             assert trp1 not in _clean(store.triples((None, None, None),
                     RDFLIB_DEFAULT_GRAPH_URI))

Some files were not shown because too many files changed in this diff