Bläddra i källkod

WIP Refactor triple, term, graph.

[ci skip]
Stefano Cossu 5 år sedan
förälder
incheckning
71515781a6

+ 9 - 7
lakesuperior/store/ldp_rs/graph.pyx

@@ -7,8 +7,9 @@ from rdflib.term import Node
 
 from lakesuperior.cy_include cimport calg
 from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
-        TRP_KLEN, LmdbTriplestore)
-from lakesuperior.store.ldp_rs.term import SerializedTriple, serialize_triple
+        TRP_KLEN, TripleKey, LmdbTriplestore)
+from lakesuperior.store.ldp_rs.keyset cimport Keyset
+from lakesuperior.store.ldp_rs.triple cimport Triple
 from lakesuperior.util.hash cimport hash64
 
 
@@ -66,8 +67,8 @@ cdef class SimpleGraph:
     cdef:
         calg.Set *_data
 
-    def __init__(
-            self, calg.Set *cdata=NULL, Keyset keyset=NULL, set data=set()):
+    def __cinit__(
+            self, calg.Set *cdata=NULL, Keyset keyset=None, set data=set()):
         """
         Initialize the graph with pre-existing data or by looking up a store.
 
@@ -87,16 +88,17 @@ cdef class SimpleGraph:
         :param lmdbStore store: the store to look data up.
         """
         cdef:
-            SerializedTriple strp
+            Triple strp
             TripleKey spok
 
         if cdata is not NULL:
             self._data = cdata
         else:
             self._data = calg.set_new(set_item_hash_fn, set_item_cmp_fn)
-            if keyset is not NULL:
+            if keyset is not None:
                 while keyset.next(spok):
-                    self._data = LmdbStore.from_triple_key
+                    self._data = LmdbTriplestore.from_trp_key(
+                    )
             else:
                 for trp in data:
                     strp = serialize_triple(trp)

+ 13 - 18
lakesuperior/store/ldp_rs/term.pxd

@@ -1,22 +1,17 @@
-cdef:
-    #unsigned char *pack_data
-    unsigned char term_type
-    unsigned char *pack_fmt
-    unsigned char *term_data
-    unsigned char *term_datatype
-    unsigned char *term_lang
-    #size_t pack_size
+from lakesuperior.cy_include cimport cytpl as tpl
 
-    struct IdentifierTerm:
-        char type
-        unsigned char *data
+cdef class Term:
+    char type
+    char *data
+    char *datatype
+    char *lang
 
-    struct LiteralTerm:
-        char type
-        unsigned char *data
-        unsigned char *datatype
-        unsigned char *lang
+    # Temporary vars that get cleaned up on object deallocation.
+    char *_fmt
+    char *_pk
 
-    int serialize(term, unsigned char **pack_data, size_t *pack_size) except -1
-    deserialize(unsigned char *data, size_t size)
+    tpl.tpl_bin serialize(self)
+    object to_python()
+
+    Term from_buffer(const unsigned char *data, const size_t size)
 

+ 76 - 73
lakesuperior/store/ldp_rs/term.pyx

@@ -10,82 +10,85 @@ from lakesuperior.cy_include cimport cytpl as tpl
 DEF LSUP_TERM_TYPE_URIREF = 1
 DEF LSUP_TERM_TYPE_BNODE = 2
 DEF LSUP_TERM_TYPE_LITERAL = 3
-DEF LSUP_PK_FMT_ID = b'S(cs)'
-DEF LSUP_PK_FMT_LIT = b'S(csss)'
-
-
-cdef int serialize(
-        term, unsigned char **pack_data, size_t *pack_size) except -1:
-    cdef:
-        bytes term_data = term.encode()
-        bytes term_datatype
-        bytes term_lang
-        IdentifierTerm id_t
-        LiteralTerm lit_t
-
-    if isinstance(term, Literal):
-        term_datatype = (getattr(term, 'datatype') or '').encode()
-        term_lang = (getattr(term, 'language') or '').encode()
-
-        lit_t.type = LSUP_TERM_TYPE_LITERAL
-        lit_t.data = term_data
-        lit_t.datatype = <unsigned char *>term_datatype
-        lit_t.lang = <unsigned char *>term_lang
-
-        tpl.tpl_jot(tpl.TPL_MEM, pack_data, pack_size, LSUP_PK_FMT_LIT, &lit_t)
-    else:
-        if isinstance(term, URIRef):
-            id_t.type = LSUP_TERM_TYPE_URIREF
-        elif isinstance(term, BNode):
-            id_t.type = LSUP_TERM_TYPE_BNODE
+DEF LSUP_TERM_PK_FMT = b'csss'
+DEF LSUP_TERM_STRUCT_PK_FMT = b'S(' + LSUP_TERM_PK_FMT + b')'
+
+
+cdef class Term:
+    """
+    RDF term: URI reference, blank node or literal.
+    """
+    def __cinit__(self, const tpl.tpl_bin data):
+        """
+        Initialize a Term from pack data.
+
+        :param tpl.tpl_bin data: a TPL binary buffer packed according to the
+            term structure format.
+        """
+        self._pk = tpl.tpl_peek(
+                tpl.TPL_MEM | tpl.TPL_DATAPEEK, data.addr, data.sz,
+                LSUP_TERM_PK_FMT, &self.term_type, &self.data, &self.datatype,
+                &self.lang)
+
+
+    def __dealloc__(self):
+        free(self.data)
+        free(self.datatype)
+        free(self.lang)
+        free(self._pk)
+        free(self._fmt)
+
+
+    def to_py_term(self):
+        """
+        Return an RDFLib term.
+        """
+        data = (<bytes>self.data).decode()
+        if self.term_type == LSUP_TERM_TYPE_LITERAL:
+            return Literal(
+                data, datatype=datatype, lang=lang)
         else:
-            raise ValueError(f'Unsupported term type: {type(term)}')
-        id_t.data = term_data
-        tpl.tpl_jot(tpl.TPL_MEM, pack_data, pack_size, LSUP_PK_FMT_ID, &id_t)
-
-
-cdef deserialize(const unsigned char *data, const size_t data_size):
-    cdef:
-        char term_type
-        char *fmt = NULL
-        char *_pk = NULL
-        unsigned char *term_data = NULL
-        unsigned char *term_lang = NULL
-        unsigned char *term_datatype = NULL
-
-    datatype = None
-    lang = None
-
-    fmt = tpl.tpl_peek(tpl.TPL_MEM, data, data_size)
-    try:
-        if fmt == LSUP_PK_FMT_LIT:
-            _pk = tpl.tpl_peek(
-                    tpl.TPL_MEM | tpl.TPL_DATAPEEK, data, data_size, b'csss',
-                    &term_type, &term_data, &term_datatype, &term_lang)
-            if len(term_datatype) > 0:
-                datatype = term_datatype.decode()
-            elif len(term_lang) > 0:
-                lang = term_lang.decode()
-
-            return Literal(term_data.decode(), datatype=datatype, lang=lang)
-
-        elif fmt == LSUP_PK_FMT_ID:
-            _pk = tpl.tpl_peek(
-                    tpl.TPL_MEM | tpl.TPL_DATAPEEK, data, data_size, b'cs',
-                    &term_type, &term_data)
             uri = term_data.decode()
-            if term_type == LSUP_TERM_TYPE_URIREF:
+            if self.term_type == LSUP_TERM_TYPE_URIREF:
                 return URIRef(uri)
-            elif term_type == LSUP_TERM_TYPE_BNODE:
+            elif self.term_type == LSUP_TERM_TYPE_BNODE:
                 return BNode(uri)
             else:
-                raise IOError(f'Unknown term type code: {term_type}')
+                raise IOError(f'Unknown term type code: {self.term_type}')
+
+
+    def to_bytes(self):
+        """
+        Return a Python bytes object of the serialized term.
+        """
+        ser_data = self.serialize()
+        return <bytes>ser_data.data[:ser_data.sz]
+
+
+    cdef tpl.tpl_bin serialize(self):
+            #term_obj, unsigned char **pack_data, size_t *pack_size) except -1:
+        cdef:
+            bytes term_data = term_obj.encode()
+            bytes term_datatype
+            bytes term_lang
+            term_obj term
+
+        if isinstance(term_obj, Literal):
+            term_datatype = (getattr(term_obj, 'datatype') or '').encode()
+            term_lang = (getattr(term_obj, 'language') or '').encode()
+
+            term.type = LSUP_TERM_TYPE_LITERAL
+            term.data = term_data
+            term.datatype = <unsigned char *>term_datatype
+            term.lang = <unsigned char *>term_lang
         else:
-            msg = f'Unknown structure pack format: {fmt}'
-            raise IOError(msg)
-    finally:
-        free(term_data)
-        free(term_datatype)
-        free(term_lang)
-        free(_pk)
-        free(fmt)
+            if isinstance(term_obj, URIRef):
+                term.type = LSUP_TERM_TYPE_URIREF
+            elif isinstance(term_obj, BNode):
+                term.type = LSUP_TERM_TYPE_BNODE
+            else:
+                raise ValueError(f'Unsupported term type: {type(term_obj)}')
+            term.data = term_data
+
+        tpl.tpl_jot(
+            tpl.TPL_MEM, pack_data, pack_size, LSUP_TERM_STRUCT_PK_FMT, &term)

+ 6 - 5
lakesuperior/store/ldp_rs/triple.pxd

@@ -1,10 +1,11 @@
 from lakesuperior.cy_include cimport cytpl as tpl
 
-    struct SerializedTriple:
-        tpl.tpl_bin s
-        tpl.tpl_bin p
-        tpl.tpl_bin o
+ctypedef struct Triple:
+    tpl.tpl_bin s
+    tpl.tpl_bin p
+    tpl.tpl_bin o
 
 
-    int serialize(tuple trp, *tpl.tpl_bin data) except -1
+cdef:
+    int serialize(tuple trp, tpl.tpl_bin *data) except -1
     deserialize(tpl.tpl_bin data)

+ 5 - 5
lakesuperior/store/ldp_rs/triple.pyx

@@ -1,15 +1,15 @@
 from lakesuperior.store.ldp_rs cimport term
 
-cdef int serialize(tuple trp, *tpl.tpl_bin data) except -1:
+cdef int serialize(tuple trp, tpl.tpl_bin *data) except -1:
     """
     Serialize a triple expressed as a tuple of RDFlib terms.
 
     :param tuple trp: 3-tuple of RDFlib terms.
 
-    :rtype: SerializedTriple
+    :rtype: Triple
     """
     cdef:
-        SerializedTriple strp
+        Triple strp
         tpl.tpl_bin s, p, o
 
     strp.s = s
@@ -23,9 +23,9 @@ cdef int serialize(tuple trp, *tpl.tpl_bin data) except -1:
     return strp
 
 
-cdef tuple deserialize(SerializedTriple strp):
+cdef tuple deserialize(Triple strp):
     """
-    Deserialize a ``SerializedTriple`` structure into a tuple of terms.
+    Deserialize a ``Triple`` structure into a tuple of terms.
 
     :rtype: tuple
     """