Ver Fonte

[WIP] More Keyset teardown.

Stefano Cossu há 6 anos atrás
pai
commit
63b3d21742

+ 5 - 11
lakesuperior/model/structures/keyset.pxd

@@ -1,23 +1,17 @@
-from lakesuperior.cy_includes cimport collections as cc
+from lakesuperior.cy_include cimport collections as cc
 from lakesuperior.model.base cimport (
     KeyIdx, Key, DoubleKey, TripleKey, Buffer
 )
-cdef class BaseKeyset:
+cdef class Keyset:
     cdef:
         readonly size_t ct, size
         readonly cc.Array* data
         readonly cc.ArrayConf conf
 
-        size_t get_itemsize(self)
         unsigned char *get_item(self, i)
         bint iter_next(self, unsigned char** val)
         bint contains(self, const void *val)
 
-
-cdef class Keyset(BaseKeyset):
-
-
-cdef class DoubleKeyset(BaseKeyset):
-
-
-cdef class TripleKeyset(BaseKeyset):
+        Keyset lookup(
+            self, const KeyIdx* sk, const KeyIdx* pk, const KeyIdx* ok
+        )

+ 22 - 83
lakesuperior/model/structures/keyset.pyx

@@ -1,22 +1,22 @@
 from libc.string cimport memcmp
-from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
+from libc.mem cimport free
 
-from lakesuperior.cy_includes cimport collections as cc
+from lakesuperior.cy_include cimport collections as cc
 from lakesuperior.model.base cimport (
     KeyIdx, Key, DoubleKey, TripleKey, Buffer
 )
 
-cdef class BaseKeyset:
+cdef class Keyset:
     """
     Pre-allocated result set.
     """
-    def __cinit__(self, size_t ct):
+    def __cinit__(self, size_t ct=0):
         """
         Initialize and allocate memory for the data set.
 
         :param size_t ct: Number of elements to be accounted for.
         """
-        self.itemsize = self.get_itemsize() # Set this in concrete classes
+        self.itemsize = TRP_KLEN
 
         cc.array_conf_init(&self.conf)
         self.conf.capacity = ct
@@ -34,81 +34,20 @@ cdef class BaseKeyset:
         This is called when the Python instance is garbage collected, which
         makes it handy to safely pass a Keyset instance across functions.
         """
-        PyMem_Free(self.data)
+        if self.data:
+            free(self.data)
 
 
     # Access methods.
 
-    cdef size_t get_itemsize(self):
-        raise NotImplementedError()
-
-
-    cdef unsigned char *get_item(self, i):
-        """
-        Get an item at a given index position. Cython-level method.
-
-        The item size is known by the ``itemsize`` property of the object.
-
-        :rtype: unsigned char*
-        """
-        self._cur = i
-        return self.data + self.itemsize * i
-
-
-    cdef bint iter_next(self, unsigned char** val):
-        """
-        Populate the current value and advance the cursor by 1.
-
-        :param void *val: Addres of value returned. It is NULL if
-            the end of the buffer was reached.
-
-        :rtype: bint
-        :return: True if a value was found, False if the end of the buffer
-            has been reached.
-        """
-        if self._cur >= self.conf.capacity:
-            val = NULL
-            return False
-
-        val[0] = self.data + self.itemsize * self._cur
-        self._cur += 1
-
-        return True
-
-
-    cdef bint contains(self, const void *val):
-        """
-        Whether a value exists in the set.
-        """
-        cdef unsigned char* stored_val
-
-        self.iter_init()
-        while self.iter_next(&stored_val):
-            if memcmp(val, stored_val, self.itemsize) == 0:
-                return True
-        return False
-
-
-class Keyset(BaseKeyset):
-    cdef size_t get_itemsize():
-        return KLEN
-
-
-class DoubleKeyset(BaseKeyset):
-    cdef size_t get_itemsize():
-        return DBL_KLEN
-
-
-class TripleKeyset(BaseKeyset):
-    cdef size_t get_itemsize():
-        return TRP_KLEN
-
-    cdef TripleKeyset lookup(
+    cdef Keyset lookup(
             self, const KeyIdx* sk, const KeyIdx* pk, const KeyIdx* ok
     ):
         """
-        Look up triple keys in a similar way that the ``SimpleGraph`` and
-        ``LmdbStore`` methods work.
+        Look up triple keys.
+
+        This works in a similar way that the ``SimpleGraph`` and ``LmdbStore``
+        methods work.
 
         Any and all the terms may be NULL. A NULL term is treated as unbound.
 
@@ -120,8 +59,8 @@ class TripleKeyset(BaseKeyset):
             void* cur
             cc.ArrayIter it
             TripleKey spok
-            TripleKeyset ret
-            KeyIdx bk1 = NULL, bk2 = NULL
+            Keyset ret
+            KeyIdx k1 = NULL, k2 = NULL
 
         cc.array_iter_init(&it, self.data)
 
@@ -129,37 +68,37 @@ class TripleKeyset(BaseKeyset):
             pass # TODO
 
         elif sk:
-            bt1 = sk[0]
+            k1 = sk[0]
             if pk: # s p ?
-                bt2 = pk[0]
+                k2 = pk[0]
                 cmp_fn = cb.lookup_skpk_cmp_fn
 
             elif ok: # s ? o
-                bt2 = ok[0]
+                k2 = ok[0]
                 cmp_fn = cb.lookup_skok_cmp_fn
 
             else: # s ? ?
                 cmp_fn = cb.lookup_sk_cmp_fn
 
         elif pk:
-            bt1 = pk[0]
+            k1 = pk[0]
             if ok: # ? p o
-                bt2 = ok[0]
+                k2 = ok[0]
                 cmp_fn = cb.lookup_pkok_cmp_fn
 
             else: # ? p ?
                 cmp_fn = cb.lookup_pk_cmp_fn
 
         elif ok: # ? ? o
-            bt1 = ok[0]
+            k1 = ok[0]
             cmp_fn = cb.lookup_ok_cmp_fn
 
         else: # ? ? ?
             return self # TODO Placeholder. This should actually return a copy.
 
-        ret = TripleKeyset(256) # TODO Totally arbitrary.
+        ret = Keyset(256) # TODO Totally arbitrary.
         while cc.array_iter_next(&it, &cur) != cc.CC_ITER_END:
-            if cmp_fn(<TripleKey*>spok, t1, t2):
+            if cmp_fn(<TripleKey*>spok, k1, k2):
                 if cc.array_add(ret.data, spok) != cc.CC_OK:
                     raise RuntimeError('Error adding triple key.')
 

+ 1 - 1
lakesuperior/store/ldp_rs/lmdb_triplestore.pxd

@@ -54,7 +54,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         Key _to_key(self, term)
         void _to_triple_key(
                 self, tuple terms, TripleKey* tkey) except *
-        void all_contexts(self, cc.Array* ctx, triple=*) except *
+        void all_contexts(self, KeyIdx** ctx, size_t* sz, triple=*) except *
         KeyIdx _append(
                 self, Buffer *value,
                 unsigned char *dblabel=*, lmdb.MDB_txn *txn=*,

+ 21 - 39
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx

@@ -12,7 +12,8 @@ from cython.parallel import prange
 from libc.stdlib cimport free
 from libc.string cimport memcpy
 
-cimport lakesuperior.cy_include.cylmdb as lmdb
+from lakesuperior.cy_include cimport collections as cc
+from lakesuperior.cy_include cimport cylmdb as lmdb
 from lakesuperior.model.base cimport (
     KLEN, DBL_KLEN, TRP_KLEN, QUAD_KLEN,
     KeyIdx, Key, DoubleKey, TripleKey,
@@ -623,8 +624,14 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
         :rtype: Iterator(lakesuperior.model.graph.graph.Imr)
         """
-        for ctx_uri in self.all_contexts(triple):
-            yield Imr(uri=self.from_key(ctx_uri), store=self)
+        cdef:
+            size_t sz, i = 0
+            KeyIdx* match
+
+        self.all_contexts(&match, &sz, triple)
+        while i < sz:
+            yield URIRef(self.from_key(match[i]))
+            cur += 1
 
 
     def triples(self, triple_pattern, context=None):
@@ -762,7 +769,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         """
         # TODO: Improve performance by allowing passing contexts as a tuple.
         cdef:
-            size_t ct = 0, flt_j = 0, i = 0, j = 0, pg_offset = 0, c_size
+            size_t ct = 0, flt_j = 0, i = 0, j = 0, c_size
             lmdb.MDB_cursor *icur
             lmdb.MDB_val key_v, data_v
             Key tk, ck
@@ -775,7 +782,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                 ck = self._to_key(context)
             except KeyNotFoundError:
                 # Context not found.
-                return Keyset(0, 3)
+                return Keyset()
 
             icur = self._cur_open('c:spo')
 
@@ -791,10 +798,10 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                             tk = self._to_key(term)
                         except KeyNotFoundError:
                             # Context not found.
-                            return Keyset(0, 3)
+                            return Keyset()
                         if tk is NULL:
                             # A term in the triple is not found.
-                            return Keyset(0, 3)
+                            return Keyset()
                         spok[i] = tk[0]
                     data_v.mv_data = spok
                     data_v.mv_size = TRP_KLEN
@@ -808,9 +815,9 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                     except KeyNotFoundError:
                         # Triple not found.
                         #logger.debug('spok / ck pair not found.')
-                        return Keyset(0, 3)
-                    ret = Keyset(1, 3)
-                    ret.data[0] = spok
+                        return Keyset()
+                    ret = Keyset(1)
+                    cc.array_add(ret.data, &spok)
 
                     return ret
 
@@ -823,10 +830,10 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                             icur, &key_v, &data_v, lmdb.MDB_SET))
                     except KeyNotFoundError:
                         # Triple not found.
-                        return Keyset(0, 3)
+                        return Keyset()
 
                     _check(lmdb.mdb_cursor_count(icur, &ct))
-                    ret = Keyset(ct, 3)
+                    ret = Keyset(ct)
                     #logger.debug(f'Entries in c:spo: {ct}')
                     #logger.debug(f'Allocated {ret.size} bytes.')
 
@@ -835,11 +842,9 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                     _check(lmdb.mdb_cursor_get(
                         icur, &key_v, &data_v, lmdb.MDB_GET_MULTIPLE))
                     while True:
-                        #logger.debug(f'Data offset: {pg_offset} Page size: {data_v.mv_size} bytes')
                         #logger.debug('Data page: {}'.format(
                         #        (<unsigned char *>data_v.mv_data)[: data_v.mv_size]))
-                        memcpy(ret.data + pg_offset, data_v.mv_data, data_v.mv_size)
-                        pg_offset += data_v.mv_size
+                        cc.array_add(ret.data, data_v.mv_data)
 
                         try:
                             _check(lmdb.mdb_cursor_get(
@@ -1237,7 +1242,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                 _check(lmdb.mdb_cursor_get(
                     icur, &key_v, NULL, lmdb.MDB_FIRST))
             except KeyNotFoundError:
-                return Keyset(0, 2)
+                return Keyset()
 
             while True:
                 memcpy(ret.data + ret.itemsize * i, key_v.mv_data, KLEN)
@@ -1349,29 +1354,6 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
                 sz[0] += 1
 
-
-                    # # # # #
-            else:
-                _check(lmdb.mdb_stat(
-                    self.txn, lmdb.mdb_cursor_dbi(cur), &stat))
-                ret = Keyset(stat.ms_entries, 1)
-
-                try:
-                    _check(lmdb.mdb_cursor_get(
-                            cur, &key_v, &data_v, lmdb.MDB_FIRST))
-                except KeyNotFoundError:
-                    return tuple()
-
-                while True:
-                    ret.data[i] = key_v.mv_data
-                    try:
-                        _check(lmdb.mdb_cursor_get(
-                            cur, &key_v, NULL, lmdb.MDB_NEXT))
-                    except KeyNotFoundError:
-                        break
-
-                    i += 1
-
             # FIXME This needs to get the triples and convert them.
             return ret