瀏覽代碼

Initial triple serializer implementation (incomplete).

Stefano Cossu 5 年之前
父節點
當前提交
93ccd8d402

+ 73 - 15
lakesuperior/store/ldp_rs/graph.pyx

@@ -5,8 +5,16 @@ from functools import wraps
 from rdflib import Graph
 from rdflib.term import Node
 
+from lakesuperior.cy_include cimport calg
 from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
         TRP_KLEN, LmdbTriplestore)
+from lakesuperior.store.ldp_rs.term import SerializedTriple, serialize_triple
+from lakesuperior.util.hash cimport hash64
+
+
+ctypedef struct SetItem:
+    unsigned char *data
+    size_t size
 
 
 logger = logging.getLogger(__name__)
@@ -25,6 +33,26 @@ def use_data(fn):
     return _wrapper
 
 
+cdef unsigned int set_item_hash_fn(calg.SetValue data):
+    """
+    Hash function for the CAlg set implementation.
+
+    https://fragglet.github.io/c-algorithms/doc/set_8h.html#6c7986a2a80d7a3cb7b9d74e1c6fef97
+
+    :param SetItem *data: Pointer to a SetItem structure.
+    """
+    return hash64((<SetItem>data).data, (<SetItem>data).size)
+
+
+cdef bint set_item_cmp_fn(calg.SetValue v1, calg.SetValue v2):
+    """
+    Compare function for two CAlg set items.
+
+    https://fragglet.github.io/c-algorithms/doc/set_8h.html#40fa2c86d5b003c1b0b0e8dd1e4df9f4
+    """
+    pass
+
+
 cdef class SimpleGraph:
     """
     Fast and simple implementation of a graph.
@@ -36,36 +64,53 @@ cdef class SimpleGraph:
     """
 
     cdef:
-        readonly set data
+        calg.Set *_data
 
     def __init__(
-            self, set data=set(), tuple lookup=(), store=None):
+            self, calg.Set *cdata=NULL, Keyset keyset=NULL, set data=set()):
         """
         Initialize the graph with pre-existing data or by looking up a store.
 
-        Either ``data``, or both ``lookup`` and ``store``, can be provided.
-        ``lookup`` and ``store`` have precedence. If none of them is specified,
-        an empty graph is initialized.
+        One of ``cdata``, ``keyset``, or ``data`` can be provided. If more than
+        one of these is provided, precedence is given in the mentioned order.
+        If none of them is specified, an empty graph is initialized.
 
         :param rdflib.URIRef uri: The graph URI.
             This will serve as the subject for some queries.
+        :param calg.Set cdata: Initial data as a C ``Set`` struct.
+        :param Keyset keyset: Keyset to create the graph from. Keys will be
+            converted to set elements.
         :param set data: Initial data as a set of 3-tuples of RDFLib terms.
         :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
             E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
             Any and all elements may be ``None``.
         :param lmdbStore store: the store to look data up.
         """
-        if data:
-            self.data = set(data)
+        cdef:
+            SerializedTriple strp
+            TripleKey spok
+
+        if cdata is not NULL:
+            self._data = cdata
         else:
-            if not lookup:
-                self.data = set()
+            self._data = calg.set_new(set_item_hash_fn, set_item_cmp_fn)
+            if keyset is not NULL:
+                while keyset.next(spok):
+                    self._data = LmdbStore.from_triple_key
             else:
-                if store is None:
-                    raise ValueError('Store not specified for triple lookup.')
-                trp_ptn = lookup[0]
-                ctx = lookup[1] if len(lookup) > 1 else None
-                self._data_from_lookup(store, trp_ptn, ctx)
+                for trp in data:
+                    strp = serialize_triple(trp)
+                    calg.set_insert(self._data, strp)
+
+
+    @property
+    def data(self):
+        """
+        Triple data as a Python set.
+
+        :rtype: set
+        """
+        return self._data_as_set()
 
 
     cdef void _data_from_lookup(
@@ -81,12 +126,25 @@ cdef class SimpleGraph:
             size_t i
             unsigned char spok[TRP_KLEN]
 
-        self.data = set()
+        self._data = calg.set_new(set_item_hash_fn, set_item_cmp_fn)
         with store.txn_ctx():
             keyset = store.triple_keys(trp_ptn, ctx)
             for i in range(keyset.ct):
                 spok = keyset.data + i * TRP_KLEN
                 self.data.add(store.from_trp_key(spok[: TRP_KLEN]))
+                strp = serialize_triple(trp)
+                calg.set_insert(self._data, strp)
+
+
+    cdef _data_as_set(self):
+        """
+        Convert triple data to a Python set.
+
+        Internally the data are stored as a C struct.
+
+        :rtype: set
+        """
+        pass
 
 
     # Basic set operations.

+ 0 - 2
lakesuperior/store/ldp_rs/term.pxd

@@ -1,5 +1,3 @@
-from lakesuperior.cy_include cimport cytpl as tpl
-
 cdef:
     #unsigned char *pack_data
     unsigned char term_type

+ 10 - 0
lakesuperior/store/ldp_rs/triple.pxd

@@ -0,0 +1,10 @@
+from lakesuperior.cy_include cimport cytpl as tpl
+
+    struct SerializedTriple:
+        tpl.tpl_bin s
+        tpl.tpl_bin p
+        tpl.tpl_bin o
+
+
+    int serialize(tuple trp, *tpl.tpl_bin data) except -1
+    deserialize(tpl.tpl_bin data)

+ 38 - 0
lakesuperior/store/ldp_rs/triple.pyx

@@ -0,0 +1,38 @@
+from lakesuperior.store.ldp_rs cimport term
+
+cdef int serialize(tuple trp, *tpl.tpl_bin data) except -1:
+    """
+    Serialize a triple expressed as a tuple of RDFlib terms.
+
+    :param tuple trp: 3-tuple of RDFlib terms.
+
+    :rtype: SerializedTriple
+    """
+    cdef:
+        SerializedTriple strp
+        tpl.tpl_bin s, p, o
+
+    strp.s = s
+    strp.p = p
+    strp.o = o
+
+    term.serialize(trp[0], &s.addr, &s.sz)
+    term.serialize(trp[1], &p.addr, &p.sz)
+    term.serialize(trp[2], &o.addr, &o.sz)
+
+    return strp
+
+
+cdef tuple deserialize(SerializedTriple strp):
+    """
+    Deserialize a ``SerializedTriple`` structure into a tuple of terms.
+
+    :rtype: tuple
+    """
+    s = term.deserialize(strp.s.addr, strp.s.sz)
+    p = term.deserialize(strp.p.addr, strp.p.sz)
+    o = term.deserialize(strp.o.addr, strp.o.sz)
+
+    return s, p, o
+
+

+ 9 - 0
setup.py

@@ -86,6 +86,15 @@ extensions = [
         extra_compile_args=['-fopenmp'],
         extra_link_args=['-fopenmp']
     ),
+    Extension(
+        'lakesuperior.store.ldp_rs.triple',
+        [
+            path.join('lakesuperior', 'store', 'ldp_rs', f'triple.{ext}'),
+        ],
+        include_dirs=include_dirs,
+        extra_compile_args=['-fopenmp'],
+        extra_link_args=['-fopenmp']
+    ),
     Extension(
         'lakesuperior.store.ldp_rs.keyset',
         [