Browse Source

WIP Create Cython Term, Triple, Graph data structures.

Stefano Cossu 6 năm trước cách đây
mục cha
commit
5e178593fd

+ 0 - 0
lakesuperior/store/ldp_rs/__init__.pxd


+ 35 - 0
lakesuperior/store/ldp_rs/graph.pxd

@@ -0,0 +1,35 @@
+from lakesuperior.cy_include cimport calg
+from lakesuperior.store.ldp_rs.triple cimport Triple
+from lakesuperior.store.ldp_rs.lmdb_triplestore cimport LmdbTriplestore
+
+ctypedef struct SetItem:
+    unsigned char *data
+    size_t size
+
+cdef:
+    unsigned int set_item_hash_fn(calg.SetValue data)
+    bint set_item_cmp_fn(calg.SetValue v1, calg.SetValue v2)
+
+cdef class SimpleGraph:
+    cdef:
+        calg.Set *_data
+        Triple *_trp
+        LmdbTriplestore store
+
+        void _data_from_lookup(
+            self, LmdbTriplestore store, tuple trp_ptn, ctx=*) except *
+        _data_as_set(self)
+
+    cpdef void set(self, tuple trp) except *
+    cpdef void remove_triples(self, pattern) except *
+    cpdef object as_rdflib(self)
+    cdef _slice(self, s, p, o)
+    cpdef lookup(self, s, p, o)
+    cpdef set terms(self, str type)
+
+cdef class Imr(SimpleGraph):
+    cdef:
+        readonly str uri
+
+    cpdef as_rdflib(self)
+

+ 40 - 23
lakesuperior/store/ldp_rs/graph.pyx

@@ -7,19 +7,17 @@ from rdflib.term import Node
 
 from lakesuperior import env
 
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
 from libc.string cimport memcmp
 
 from lakesuperior.cy_include cimport calg
+from lakesuperior.cy_include cimport cylmdb as lmdb
+from lakesuperior.store.ldp_rs cimport term
 from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
-        TRP_KLEN, TripleKey, LmdbTriplestore)
+        KLEN, DBL_KLEN, TRP_KLEN, TripleKey, LmdbTriplestore)
 from lakesuperior.store.ldp_rs.keyset cimport Keyset
 from lakesuperior.store.ldp_rs.triple cimport Triple
-from lakesuperior.util.hash cimport hash64
-
-
-ctypedef struct SetItem:
-    unsigned char *data
-    size_t size
+from lakesuperior.util.hash cimport Hash64, hash64
 
 
 logger = logging.getLogger(__name__)
@@ -46,7 +44,16 @@ cdef unsigned int set_item_hash_fn(calg.SetValue data):
 
     :param SetItem *data: Pointer to a SetItem structure.
     """
-    return hash64((<SetItem>data).data, (<SetItem>data).size)
+    cdef:
+        Hash64 hash
+        term.Buffer sr_data
+
+    sr_data.addr = (<SetItem>data).data
+    sr_data.sz = (<SetItem>data).size
+
+    hash64(&sr_data, &hash)
+
+    return hash
 
 
 cdef bint set_item_cmp_fn(calg.SetValue v1, calg.SetValue v2):
@@ -81,10 +88,6 @@ cdef class SimpleGraph:
     ``rdflib.Graph`` instance.
     """
 
-    cdef:
-        calg.Set *_data
-
-
     def __cinit__(
             self, calg.Set *cdata=NULL, Keyset keyset=None, store=None,
             set data=set()):
@@ -112,8 +115,9 @@ cdef class SimpleGraph:
         self.store = store or env.app_defaults.rdf_store
 
         cdef:
-            Triple strp
+            size_t i = 0
             TripleKey spok
+            term.Buffer pk_t
 
         if cdata is not NULL:
             # Build data from provided C set.
@@ -123,14 +127,30 @@ cdef class SimpleGraph:
             # Initialize empty data set.
             self._data = calg.set_new(set_item_hash_fn, set_item_cmp_fn)
             if keyset is not None:
-                # Populate with provided key set.
+                # Populate with triples extracted from provided key set.
                 while keyset.next(spok):
-                    calg.set_insert(self._data, self.store.from_trp_key(spok))
+                    self.store.lookup_term(spok[:KLEN], &pk_t)
+                    term.deserialize(&pk_t, self._trp.s)
+
+                    self.store.lookup_term(spok[KLEN:DBL_KLEN], &pk_t)
+                    term.deserialize(&pk_t, self._trp.p)
+
+                    self.store.lookup_term(spok[DBL_KLEN:TRP_KLEN], &pk_t)
+                    term.deserialize(&pk_t, self._trp.o)
+
+                    calg.set_insert(self._data, &self._trp)
             else:
                 # Populate with provided Python set.
-                for trp in data:
-                    strp = serialize_triple(trp)
-                    calg.set_insert(self._data, strp)
+                self._trp = <Triple *>PyMem_Malloc(sizeof(Triple) * len(data))
+                for s, p, o in data:
+                    term.from_rdflib(s, self._trp[i].s)
+                    term.from_rdflib(p, self._trp[i].p)
+                    term.from_rdflib(o, self._trp[i].o)
+                    calg.set_insert(self._data, self._trp)
+
+
+    def __dealloc__(self):
+        PyMem_Free(self._trp)
 
 
     @property
@@ -162,7 +182,7 @@ cdef class SimpleGraph:
             for i in range(keyset.ct):
                 spok = keyset.data + i * TRP_KLEN
                 self.data.add(store.from_trp_key(spok[: TRP_KLEN]))
-                strp = serialize_triple(trp)
+                strp = serialize_triple(self._trp)
                 calg.set_insert(self._data, strp)
 
 
@@ -396,10 +416,7 @@ cdef class Imr(SimpleGraph):
     Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
     will create a new ``Imr`` instance with the same subject URI.
     """
-    cdef:
-        readonly object uri
-
-    def __init__(self, uri, *args, **kwargs):
+    def __init__(self, str uri, *args, **kwargs):
         """
         Initialize the graph with pre-existing data or by looking up a store.
 

+ 1 - 1
lakesuperior/store/ldp_rs/keyset.pxd

@@ -7,5 +7,5 @@ cdef class Keyset:
 
         void resize(self, size_t ct) except *
         unsigned char *get_item(self, i)
-        bint next(self, unsigned char *val)
+        bint next(self, void *val)
 

+ 5 - 5
lakesuperior/store/ldp_rs/keyset.pyx

@@ -115,7 +115,7 @@ cdef class Keyset:
         """
         Tell the position of the cursor in the keyset.
         """
-        return _cur
+        return self._cur
 
 
     cdef unsigned char *get_item(self, i):
@@ -130,18 +130,18 @@ cdef class Keyset:
         return self.data + self.itemsize * i
 
 
-    cdef bint next(self, unsigned char *val):
+    cdef bint next(self, void *val):
         """
-        Return current value and advance the cursor by 1.
+        Populate the current value and advance the cursor by 1.
 
-        :param unsigned char *val: Addres of value returned. It is void if
+        :param void *val: Addres of value returned. It is NULL if
             the end of the buffer was reached.
 
         :rtype: bint
         :return: True if a value was found, False if the end of the buffer
             has been reached.
         """
-        if _cur >= self.ct:
+        if self._cur >= self.ct:
             val = NULL
             return False
 

+ 5 - 4
lakesuperior/store/ldp_rs/lmdb_triplestore.pxd

@@ -1,8 +1,9 @@
 cimport lakesuperior.cy_include.cylmdb as lmdb
-#cimport lakesuperior.cy_include.cytpl as tpl
+cimport lakesuperior.cy_include.cytpl as tpl
 
 from lakesuperior.store.base_lmdb_store cimport BaseLmdbStore
 from lakesuperior.store.ldp_rs.keyset cimport Keyset
+from lakesuperior.store.ldp_rs.term cimport Buffer
 
 #Fixed length for term keys.
 #
@@ -63,11 +64,11 @@ cdef class LmdbTriplestore(BaseLmdbStore):
     cpdef tuple all_contexts(self, triple=*)
 
     cdef:
-        void _add_graph(
-                self, unsigned char *pk_c, size_t pk_size) except *
+        void _add_graph(self, Buffer *pk_gr) except *
         void _index_triple(self, str op, TripleKey spok) except *
         Keyset triple_keys(self, tuple triple_pattern, context=*)
         Keyset _all_term_keys(self, term_type)
+        inline int lookup_term(self, Key key, Buffer *data) except -1
         Keyset _lookup(self, tuple triple_pattern)
         Keyset _lookup_1bound(self, unsigned char idx, term)
         Keyset _lookup_2bound(
@@ -77,7 +78,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         inline void _to_key(self, term, Key *key) except *
         inline void _to_triple_key(self, tuple terms, TripleKey *tkey) except *
         void _append(
-                self, unsigned char *value, size_t vlen, Key *nkey,
+                self, Buffer *value, Key *nkey,
                 unsigned char *dblabel=*, lmdb.MDB_txn *txn=*,
                 unsigned int flags=*) except *
         void _next_key(self, const Key key, Key *nkey) except *

+ 36 - 52
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx

@@ -12,13 +12,13 @@ from libc.stdlib cimport free
 from libc.string cimport memcpy
 
 cimport lakesuperior.cy_include.cylmdb as lmdb
-cimport lakesuperior.cy_include.cytpl as tpl
 from lakesuperior.store.ldp_rs.term cimport Term
 
 from lakesuperior.store.base_lmdb_store cimport (
         BaseLmdbStore, data_v, dbi, key_v)
 from lakesuperior.store.ldp_rs.keyset cimport Keyset
-from lakesuperior.store.ldp_rs.term cimport deserialize, serialize
+from lakesuperior.store.ldp_rs.term cimport (
+        Buffer, deserialize_to_rdflib, serialize_from_rdflib)
 from lakesuperior.util.hash cimport HLEN, Hash128, hash128
 
 
@@ -181,13 +181,11 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             lmdb.MDB_cursor *icur
             lmdb.MDB_val spo_v, c_v, null_v
             unsigned char i
-            unsigned char *pk_t
             Hash128 thash
             # Using Key or TripleKey here breaks Cython. This might be a bug.
             # See https://github.com/cython/cython/issues/2517
             unsigned char spock[QUAD_KLEN]
             unsigned char nkey[KLEN]
-            size_t term_size
 
         c = self._normalize_context(context)
         if c is None:
@@ -201,22 +199,21 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         #logger.debug('Trying to add a triple.')
         icur = self._cur_open('th:t')
         try:
-            for i, term in enumerate((s, p, o, c)):
-                serialize(term, &pk_t, &term_size)
-                thash = hash128(pk_t, term_size)
+            for i, term_obj in enumerate((s, p, o, c)):
+                pk_t = serialize_from_rdflib(term_obj)
+                hash128(pk_t, &thash)
                 try:
-                    key_v.mv_data = &thash
+                    key_v.mv_data = thash
                     key_v.mv_size = HLEN
                     _check(lmdb.mdb_get(
                             self.txn, self.get_dbi('th:t'), &key_v, &data_v))
                     memcpy(spock + (i * KLEN), data_v.mv_data, KLEN)
                     #logger.debug('Hash {} found. Not adding.'.format(thash[: HLEN]))
                 except KeyNotFoundError:
-                    # If term is not found, add it...
+                    # If term_obj is not found, add it...
                     #logger.debug('Hash {} not found. Adding to DB.'.format(
                     #        thash[: HLEN]))
-                    self._append(pk_t, term_size, &nkey, dblabel=b't:st')
-                    free(pk_t)
+                    self._append(pk_t, &nkey, dblabel=b't:st')
                     memcpy(spock + (i * KLEN), nkey, KLEN)
 
                     # ...and index it.
@@ -285,27 +282,22 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
         :param rdflib.URIRef graph: URI of the named graph to add.
         """
-        cdef:
-            unsigned char *pk_c
-            size_t pk_size
+        cdef Buffer *_sc
 
         if isinstance(graph, Graph):
             graph = graph.identifier
 
-        serialize(graph, &pk_c, &pk_size)
-        self._add_graph(pk_c, pk_size)
-        free(pk_c)
+        _sc = serialize_from_rdflib(graph)
+        self._add_graph(_sc)
+
 
+    cdef void _add_graph(self, Buffer *pk_gr) except *:
 
-    cdef void _add_graph(
-            self, unsigned char *pk_c, size_t pk_size) except *:
         """
         Add a graph.
 
-        :param pk_c: Pickled context URIRef object.
-        :type pk_c: unsigned char*
-        :param pk_size: Size of pickled string.
-        :type pk_size: size_t
+        :param pk_gr: Pickled context URIRef object.
+        :type pk_gr: Buffer*
         """
         cdef:
             Hash128 chash
@@ -315,7 +307,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             lmdb.MDB_cursor *pk_cur
             lmdb.MDB_cursor *ck_cur
 
-        chash = hash128(pk_c, pk_size)
+        hash128(pk_gr, &chash)
         #logger.debug('Adding a graph.')
         if not self._key_exists(chash, HLEN, b'th:t'):
             # Insert context term if not existing.
@@ -323,7 +315,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                 #logger.debug('Working in existing RW transaction.')
                 # Use existing R/W transaction.
                 # Main entry.
-                self._append(pk_c, pk_size, &ck, b't:st')
+                self._append(pk_gr, &ck, b't:st')
                 # Index.
                 self._put(chash, HLEN, ck, KLEN, b'th:t')
                 # Add to list of contexts.
@@ -333,8 +325,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                 #logger.debug('Opening a temporary RW transaction.')
                 _check(lmdb.mdb_txn_begin(self.dbenv, NULL, 0, &tmp_txn))
                 try:
-                    self._append(
-                            pk_c, pk_size, &ck, dblabel=b't:st', txn=tmp_txn)
+                    self._append(pk_gr, &ck, b't:st', txn=tmp_txn)
                     # Index.
                     self._put(chash, HLEN, ck, KLEN, b'th:t', txn=tmp_txn)
                     # Add to list of contexts.
@@ -588,8 +579,6 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         cdef:
             Hash128 chash
             unsigned char ck[KLEN]
-            unsigned char *pk_c
-            size_t c_size
             lmdb.MDB_val ck_v, chash_v
 
         #logger.debug('Deleting context: {}'.format(gr_uri))
@@ -608,9 +597,8 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         self._remove((None, None, gr_uri))
 
         # Clean up all terms related to the graph.
-        serialize(gr_uri, &pk_c, &c_size)
-        chash = hash128(pk_c, c_size)
-        free(pk_c)
+        pk_c = serialize_from_rdflib(gr_uri)
+        hash128(pk_c, &chash)
 
         ck_v.mv_size = KLEN
         chash_v.mv_size = HLEN
@@ -627,7 +615,6 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
     # Lookup methods.
 
-    # TODO Deprecate RDFLib API?
     def contexts(self, triple=None):
         """
         Get a list of all contexts.
@@ -1332,20 +1319,19 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
         :param Key key: The key to be converted.
         """
-        ser_term = self.lookup_term(key)
+        cdef Buffer pk_t
 
-        return deserialize(
-                <unsigned char *>ser_term.mv_data, ser_term.mv_size)
+        self.lookup_term(key, &pk_t)
 
+        return deserialize_to_rdflib(&pk_t)
 
-    cdef inline lmdb.MDB_val lookup_term(self, Key key):
+
+    cdef inline int lookup_term(self, Key key, data) except -1:
         """
         look up a term by key.
 
         :param Key key: The key to be looked up.
-
-        :rtype: lmdb.MDB_val
-        :return: LMDB value structure containing the serialized term.
+        :param Buffer *data: Buffer structure containing the serialized term.
         """
         cdef:
             lmdb.MDB_val key_v, data_v
@@ -1355,10 +1341,12 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
         _check(
                 lmdb.mdb_get(self.txn, self.get_dbi('t:st'), &key_v, &data_v),
-                'Error getting data for key \'{}\'.'.format(key))
+                f'Error getting data for key \'{key}\'.')
 
-        return data_v
+        data[0].addr = data_v.mv_data
+        data[0].sz = data_v.mv_size
 
+        return 0
 
     cdef tuple from_trp_key(self, TripleKey key):
         """
@@ -1386,16 +1374,12 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         :rtype: void
         """
         cdef:
-            unsigned char *pk_t
-            size_t term_size
             Hash128 thash
 
-        serialize(term, &pk_t, &term_size)
-        #logger.debug('Hashing pickle: {} with lentgh: {}'.format(pk_t, term_size))
-        thash = hash128(pk_t, term_size)
-        free(pk_t)
+        pk_t = serialize_from_rdflib(term)
+        hash128(pk_t, &thash)
         #logger.debug('Hash to search for: {}'.format(thash[: HLEN]))
-        key_v.mv_data = &thash
+        key_v.mv_data = thash
         key_v.mv_size = HLEN
 
         dbi = self.get_dbi('th:t')
@@ -1425,7 +1409,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
 
     cdef void _append(
-            self, unsigned char *value, size_t vlen, Key *nkey,
+            self, Buffer *value, Key *nkey,
             unsigned char *dblabel=b'', lmdb.MDB_txn *txn=NULL,
             unsigned int flags=0) except *:
         """
@@ -1459,8 +1443,8 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
         key_v.mv_data = nkey
         key_v.mv_size = KLEN
-        data_v.mv_data = value
-        data_v.mv_size = vlen
+        data_v.mv_data = value[0].addr
+        data_v.mv_size = value[0].sz
         #logger.debug('Appending value {} to db {} with key: {}'.format(
         #    value[: vlen], dblabel.decode(), nkey[0][:KLEN]))
         #logger.debug('data size: {}'.format(data_v.mv_size))

+ 16 - 11
lakesuperior/store/ldp_rs/term.pxd

@@ -1,15 +1,20 @@
 from lakesuperior.cy_include cimport cytpl as tpl
 
-cdef class Term:
-    cdef:
-        char type
-        char *data
-        char *datatype
-        char *lang
+ctypedef tpl.tpl_bin Buffer
+ctypedef struct Term:
+    char type
+    char *data
+    char *datatype
+    char *lang
 
-        # Temporary vars that get cleaned up on object deallocation.
-        char *_fmt
-        char *_pk
-
-        tpl.tpl_bin serialize(self)
+cdef:
+    # Temporary TPL variable.
+    char *_pk
 
+    int serialize(const Term *term, tpl.tpl_bin *sterm) except -1
+    int deserialize(const Buffer *data, Term *term) except -1
+    int from_rdflib(term_obj, Term *term) except -1
+    Buffer *serialize_from_rdflib(term_obj)
+    object deserialize_to_rdflib(const Buffer *data)
+    object to_rdflib(const Term *term)
+    object to_bytes(const Term *term)

+ 99 - 76
lakesuperior/store/ldp_rs/term.pyx

@@ -2,7 +2,7 @@ from rdflib import URIRef, BNode, Literal
 
 #from cpython.mem cimport PyMem_Malloc, PyMem_Free
 from libc.stdint cimport uint64_t
-from libc.stdlib cimport malloc, free
+from libc.stdlib cimport free
 
 from lakesuperior.cy_include cimport cytpl as tpl
 
@@ -10,85 +10,108 @@ from lakesuperior.cy_include cimport cytpl as tpl
 DEF LSUP_TERM_TYPE_URIREF = 1
 DEF LSUP_TERM_TYPE_BNODE = 2
 DEF LSUP_TERM_TYPE_LITERAL = 3
-DEF LSUP_TERM_PK_FMT = b'csss'
+DEF LSUP_TERM_PK_FMT = b'csss' # Reflects the Term structure
 DEF LSUP_TERM_STRUCT_PK_FMT = b'S(' + LSUP_TERM_PK_FMT + b')'
 
 
-cdef class Term:
+cdef int serialize(const Term *term, tpl.tpl_bin *sterm) except -1:
     """
-    RDF term: URI reference, blank node or literal.
+    Serialize a Term into a binary buffer.
+
+    The returned result is dynamically allocated and must be manually freed.
+    """
+    tpl.tpl_jot(
+            tpl.TPL_MEM, &(sterm.addr), &(sterm.sz),
+            LSUP_TERM_STRUCT_PK_FMT, term)
+
+
+cdef int deserialize(const Buffer *data, Term *term) except -1:
+    """
+    Return a term from serialized binary data.
+    """
+    _pk = tpl.tpl_peek(
+            tpl.TPL_MEM | tpl.TPL_DATAPEEK, data[0].addr, data[0].sz,
+            LSUP_TERM_PK_FMT, &(term[0].type), &(term[0].data),
+            &(term[0].datatype), &(term[0].lang))
+
+    if _pk is NULL:
+        raise MemoryError('Error deserializing term.')
+    else:
+        free(_pk)
+
+
+cdef int from_rdflib(term_obj, Term *term) except -1:
+    """
+    Return a Term struct obtained from a Python/RDFLiib term.
     """
-    def __cinit__(self, const tpl.tpl_bin *data):
-        """
-        Initialize a Term from pack data.
-
-        :param tpl.tpl_bin *data: a pointer to a TPL binary buffer packed
-            according to the term structure format.
-        """
-        self._pk = tpl.tpl_peek(
-                tpl.TPL_MEM | tpl.TPL_DATAPEEK, data[0].addr, data[0].sz,
-                LSUP_TERM_PK_FMT, &self.term_type, &self.data, &self.datatype,
-                &self.lang)
-
-
-    def __dealloc__(self):
-        free(self.data)
-        free(self.datatype)
-        free(self.lang)
-        free(self._pk)
-        free(self._fmt)
-
-
-    def to_py_term(self):
-        """
-        Return an RDFLib term.
-        """
-        data = (<bytes>self.data).decode()
-        if self.term_type == LSUP_TERM_TYPE_LITERAL:
-            return Literal(
-                data, datatype=datatype, lang=lang)
+    _data = str(term_obj).encode()
+    term[0].data = _data
+
+    if isinstance(term_obj, Literal):
+        _datatype = (getattr(term_obj, 'datatype') or '').encode()
+        _lang = (getattr(term_obj, 'language') or '').encode()
+        term[0].type = LSUP_TERM_TYPE_LITERAL
+        term[0].datatype = _datatype
+        term[0].lang = _lang
+    else:
+        if isinstance(term_obj, URIRef):
+            term[0].type = LSUP_TERM_TYPE_URIREF
+        elif isinstance(term_obj, BNode):
+            term[0].type = LSUP_TERM_TYPE_BNODE
         else:
-            uri = term_data.decode()
-            if self.term_type == LSUP_TERM_TYPE_URIREF:
-                return URIRef(uri)
-            elif self.term_type == LSUP_TERM_TYPE_BNODE:
-                return BNode(uri)
-            else:
-                raise IOError(f'Unknown term type code: {self.term_type}')
-
-
-    def to_bytes(self):
-        """
-        Return a Python bytes object of the serialized term.
-        """
-        ser_data = self.serialize()
-        return <bytes>ser_data.data[:ser_data.sz]
-
-
-    cdef tpl.tpl_bin serialize(self):
-            #term_obj, unsigned char **pack_data, size_t *pack_size) except -1:
-        cdef:
-            bytes term_data = term_obj.encode()
-            bytes term_datatype
-            bytes term_lang
-            term_obj term
-
-        if isinstance(term_obj, Literal):
-            term_datatype = (getattr(term_obj, 'datatype') or '').encode()
-            term_lang = (getattr(term_obj, 'language') or '').encode()
-
-            term.type = LSUP_TERM_TYPE_LITERAL
-            term.data = term_data
-            term.datatype = <unsigned char *>term_datatype
-            term.lang = <unsigned char *>term_lang
+            raise ValueError(f'Unsupported term type: {type(term_obj)}')
+
+
+cdef Buffer *serialize_from_rdflib(term_obj):
+    """
+    Return a Buffer struct from a Python/RDFLib term.
+    """
+    cdef:
+        Term term
+        Buffer data
+
+    from_rdflib(term_obj, &term)
+    serialize(&term, &data)
+
+    return &data
+
+
+cdef object to_rdflib(const Term *term):
+    """
+    Return an RDFLib term.
+    """
+    data = (<bytes>term[0].data).decode()
+    if term[0].type == LSUP_TERM_TYPE_LITERAL:
+        return Literal(data, datatype=term[0].datatype, lang=term[0].lang)
+    else:
+        if term[0].type == LSUP_TERM_TYPE_URIREF:
+            return URIRef(data)
+        elif term[0].type == LSUP_TERM_TYPE_BNODE:
+            return BNode(data)
         else:
-            if isinstance(term_obj, URIRef):
-                term.type = LSUP_TERM_TYPE_URIREF
-            elif isinstance(term_obj, BNode):
-                term.type = LSUP_TERM_TYPE_BNODE
-            else:
-                raise ValueError(f'Unsupported term type: {type(term_obj)}')
-            term.data = term_data
-
-        tpl.tpl_jot(
-            tpl.TPL_MEM, pack_data, pack_size, LSUP_TERM_STRUCT_PK_FMT, &term)
+            raise IOError(f'Unknown term type code: {term[0].type}')
+
+
+cdef object deserialize_to_rdflib(const Buffer *data):
+    """
+    Return a Python/RDFLib term from a serialized Cython term.
+    """
+    cdef Term term
+
+    deserialize(data, &term)
+
+    return to_rdflib(&term)
+
+
+cdef object to_bytes(const Term *term):
+    """
+    Return a Python bytes object of the serialized term.
+    """
+    cdef:
+        Buffer pk_t
+        unsigned char *bytestream
+
+    serialize(term, &pk_t)
+    bytestream = <unsigned char *>pk_t.addr
+
+    return <bytes>(bytestream)[:pk_t.sz]

+ 6 - 6
lakesuperior/store/ldp_rs/triple.pxd

@@ -2,11 +2,11 @@ from lakesuperior.cy_include cimport cytpl as tpl
 from lakesuperior.store.ldp_rs.term cimport Term
 
 ctypedef struct Triple:
-    Term s
-    Term p
-    Term o
+    Term *s
+    Term *p
+    Term *o
 
 
-cdef:
-    int serialize(tuple trp, tpl.tpl_bin *data) except -1
-    deserialize(tpl.tpl_bin data)
+#cdef:
+#    int serialize(tuple trp, tpl.tpl_bin *data) except -1
+#    tuple deserialize(tpl.tpl_bin data)

+ 41 - 38
lakesuperior/store/ldp_rs/triple.pyx

@@ -1,38 +1,41 @@
-from lakesuperior.store.ldp_rs cimport term
-
-cdef int serialize(tuple trp, tpl.tpl_bin *data) except -1:
-    """
-    Serialize a triple expressed as a tuple of RDFlib terms.
-
-    :param tuple trp: 3-tuple of RDFlib terms.
-
-    :rtype: Triple
-    """
-    cdef:
-        Triple strp
-        tpl.tpl_bin s, p, o
-
-    strp.s = s
-    strp.p = p
-    strp.o = o
-
-    term.serialize(trp[0], &s.addr, &s.sz)
-    term.serialize(trp[1], &p.addr, &p.sz)
-    term.serialize(trp[2], &o.addr, &o.sz)
-
-    return strp
-
-
-cdef tuple deserialize(Triple strp):
-    """
-    Deserialize a ``Triple`` structure into a tuple of terms.
-
-    :rtype: tuple
-    """
-    s = term.deserialize(strp.s.addr, strp.s.sz)
-    p = term.deserialize(strp.p.addr, strp.p.sz)
-    o = term.deserialize(strp.o.addr, strp.o.sz)
-
-    return s, p, o
-
-
+#from lakesuperior.store.ldp_rs cimport term
+#
+#cdef int serialize(tuple trp, tpl.tpl_bin *data) except -1:
+#    """
+#    Serialize a triple expressed as a tuple of RDFlib terms.
+#
+#    :param tuple trp: 3-tuple of RDFlib terms.
+#
+#    :rtype: Triple
+#    """
+#    cdef:
+#        Triple strp
+#        Term *s
+#        Term *p
+#        Term *o
+#
+#    strp.s = s
+#    strp.p = p
+#    strp.o = o
+#
+##    term.serialize(s)
+##    term.serialize(p)
+##    term.serialize(o)
+#
+#    return strp
+#
+#
+#cdef tuple deserialize(Triple strp):
+#    """
+#    Deserialize a ``Triple`` structure into a tuple of terms.
+#
+#    :rtype: tuple
+#    """
+#    pass
+##    s = term.deserialize(strp.s.addr, strp.s.sz)
+##    p = term.deserialize(strp.p.addr, strp.p.sz)
+##    o = term.deserialize(strp.o.addr, strp.o.sz)
+##
+##    return s, p, o
+#
+#

+ 13 - 7
lakesuperior/util/hash.pxd

@@ -1,9 +1,17 @@
 from libc.stdint cimport uint64_t
 
+from lakesuperior.store.ldp_rs.term cimport Buffer
+
 
 DEF _SEED_LEN = 8 # sizeof(uint64_t)
 DEF _HLEN = _SEED_LEN * 2
 
+# Seed for computing the term hash.
+#
+# This is a 16-byte string that will be split up into two ``uint64``
+# numbers to make up the ``spookyhash_128`` seeds.
+DEF _TERM_HASH_SEED = b'\xff\xf2Q\xf2j\x0bG\xc1\x8a}\xca\x92\x98^y\x12'
+
 cdef enum:
     SEED_LEN = _SEED_LEN
     HLEN = _HLEN
@@ -13,12 +21,10 @@ ctypedef uint64_t DoubleHash64[2]
 ctypedef unsigned char Hash128[_HLEN]
 
 cdef:
-    uint64_t term_hash_seed1
-    uint64_t term_hash_seed2SetValue
-    unsigned char *term_hash_seed
+    uint64_t term_hash_seed1, term_hash_seed2
+    unsigned char TERM_HASH_SEED[16]
 
-    Hash128 hash128(
-        const unsigned char *message, size_t message_size)
-    Hash64 hash64(
-        const unsigned char *message, size_t message_size)
+    int hash128(const Buffer *message, Hash128 *hash) except -1
+    int hash64(const Buffer *message, Hash64 *hash) except -1
 
+TERM_HASH_SEED = _TERM_HASH_SEED

+ 10 - 16
lakesuperior/util/hash.pyx

@@ -1,15 +1,11 @@
 from libc.stdint cimport uint64_t
 from libc.string cimport memcpy
 
-term_hash_seed = b'\xff\xf2Q\xf2j\x0bG\xc1\x8a}\xca\x92\x98^y\x12'
-"""
-Seed for computing the term hash.
+from lakesuperior.store.ldp_rs.term cimport Buffer
 
-This is a 16-byte string that will be split up into two ``uint64``
-numbers to make up the ``spookyhash_128`` seeds.
-"""
-memcpy(&term_hash_seed1, term_hash_seed, SEED_LEN)
-memcpy(&term_hash_seed2, term_hash_seed + SEED_LEN, SEED_LEN)
+
+memcpy(&term_hash_seed1, TERM_HASH_SEED, SEED_LEN)
+memcpy(&term_hash_seed2, TERM_HASH_SEED + SEED_LEN, SEED_LEN)
 
 # We only need a couple of functions from spookyhash. No need for a pxd file.
 cdef extern from 'spookyhash_api.h':
@@ -19,8 +15,7 @@ cdef extern from 'spookyhash_api.h':
     uint64_t spookyhash_64(const void *input, size_t input_size, uint64_t seed)
 
 
-cdef inline Hash128 hash128(
-        const unsigned char *message, size_t message_size):
+cdef inline int hash128(const Buffer *message, Hash128 *hash) except -1:
     """
     Get the hash value of a byte string with a defined size.
 
@@ -36,14 +31,13 @@ cdef inline Hash128 hash128(
         DoubleHash64 seed = [term_hash_seed1, term_hash_seed2]
         Hash128 digest
 
-    spookyhash_128(message, message_size, seed, seed + 1)
+    spookyhash_128(message[0].addr, message[0].sz, seed, seed + 1)
 
-    # This casts the 2 contiguous uint64_t's into a char pointer.
-    return <Hash128>seed
+    # This casts the 2 contiguous uint64_t's into a char[16] pointer.
+    hash[0] = <Hash128>seed
 
 
-cdef inline Hash64 hash64(
-        const unsigned char *message, size_t message_size):
+cdef inline int hash64(const Buffer *message, Hash64 *hash) except -1:
     """
     Get a 64-bit (unsigned long) hash value of a byte string.
 
@@ -52,4 +46,4 @@ cdef inline Hash64 hash64(
     """
     cdef uint64_t seed = term_hash_seed1
 
-    return spookyhash_64(message, message_size, seed)
+    hash[0] = spookyhash_64(message[0].addr, message[0].sz, seed)