Browse Source

WIP fix segfault in add().

[ci skip]
Stefano Cossu 5 years ago
parent
commit
c9ca467cd2

+ 5 - 6
lakesuperior/store/ldp_rs/graph.pxd

@@ -10,10 +10,10 @@ ctypedef bint (*lookup_fn_t)(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2)
 
 cdef:
-    unsigned int term_hash_fn(calg.SetValue data)
-    bint buffer_cmp_fn(calg.SetValue v1, calg.SetValue v2)
-    unsigned int trp_hash_fn(calg.SetValue btrp)
-    bint triple_cmp_fn(calg.SetValue v1, calg.SetValue v2)
+    unsigned int term_hash_fn(const calg.SetValue data)
+    bint buffer_cmp_fn(const calg.SetValue v1, const calg.SetValue v2)
+    unsigned int trp_hash_fn(const calg.SetValue btrp)
+    bint triple_cmp_fn(const calg.SetValue v1, const calg.SetValue v2)
 
 
 cdef class SimpleGraph:
@@ -25,10 +25,9 @@ cdef class SimpleGraph:
         void _data_from_lookup(self, tuple trp_ptn, ctx=*) except *
         void _data_from_keyset(self, Keyset data) except *
         inline void _add_from_spok(self, const TripleKey spok) except *
-        void _add_from_rdflib(self, s, p, o) except *
         inline void _add_triple(
             self, const Buffer *ss, const Buffer *sp, const Buffer *so
-            ) except *
+        ) except *
         set _data_as_set(self)
 
     cpdef void set(self, tuple trp) except *

+ 51 - 41
lakesuperior/store/ldp_rs/graph.pyx

@@ -37,7 +37,7 @@ def use_data(fn):
     return _wrapper
 
 
-cdef unsigned int term_hash_fn(calg.SetValue data):
+cdef unsigned int term_hash_fn(const calg.SetValue data):
     """
     Hash function for sets of terms.
 
@@ -48,7 +48,7 @@ cdef unsigned int term_hash_fn(calg.SetValue data):
     cdef:
         Hash32 hash
 
-    hash32(<Buffer *>&data, &hash)
+    hash32(<const Buffer *>&data, &hash)
 
     return hash
 
@@ -78,7 +78,7 @@ cdef unsigned int trp_hash_fn(calg.SetValue btrp):
     return hash
 
 
-cdef bint buffer_cmp_fn(calg.SetValue v1, calg.SetValue v2):
+cdef bint buffer_cmp_fn(const calg.SetValue v1, const calg.SetValue v2):
     """
     Compare function for two Buffer objects.
 
@@ -98,7 +98,7 @@ cdef bint buffer_cmp_fn(calg.SetValue v1, calg.SetValue v2):
     return memcmp(b1.addr, b2.addr, b1.sz) == 0
 
 
-cdef bint triple_cmp_fn(calg.SetValue v1, calg.SetValue v2):
+cdef bint triple_cmp_fn(const calg.SetValue v1, const calg.SetValue v2):
     """
     Compare function for two triples in a CAlg set.
 
@@ -133,38 +133,38 @@ cdef inline bint lookup_s_cmp_fn(
     ``t2`` is not used and is declared only for compatibility with the
     other interchangeable functions.
     """
-    return buffer_cmp_fn(t1, &(trp[0].s))
+    return buffer_cmp_fn(t1, trp[0].s)
 
 
 cdef inline bint lookup_p_cmp_fn(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
-    return buffer_cmp_fn(t1, &(trp[0].p))
+    return buffer_cmp_fn(t1, trp[0].p)
 
 
 cdef inline bint lookup_o_cmp_fn(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
-    return buffer_cmp_fn(t1, &(trp[0].o))
+    return buffer_cmp_fn(t1, trp[0].o)
 
 
 cdef inline bint lookup_sp_cmp_fn(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
     return (
-            buffer_cmp_fn(t1, &(trp[0].s))
-            and buffer_cmp_fn(t2, &(trp[0].p)))
+            buffer_cmp_fn(t1, trp[0].s)
+            and buffer_cmp_fn(t2, trp[0].p))
 
 
 cdef inline bint lookup_so_cmp_fn(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
     return (
-            buffer_cmp_fn(t1, &(trp[0].s))
-            and buffer_cmp_fn(t2, &(trp[0].o)))
+            buffer_cmp_fn(t1, trp[0].s)
+            and buffer_cmp_fn(t2, trp[0].o))
 
 
 cdef inline bint lookup_po_cmp_fn(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
     return (
-            buffer_cmp_fn(t1, &(trp[0].p))
-            and buffer_cmp_fn(t2, &(trp[0].o)))
+            buffer_cmp_fn(t1, trp[0].p)
+            and buffer_cmp_fn(t2, trp[0].o))
 
 
 
@@ -217,9 +217,9 @@ cdef class SimpleGraph:
             Any and all elements may be ``None``.
         :param lmdbStore store: the store to look data up.
         """
-        self.store = store or env.app_defaults.rdf_store
-        self._terms = NULL
-        self._triples = NULL
+        self.store = store or env.app_globals.rdf_store
+        self._terms = calg.set_new(term_hash_fn, buffer_cmp_fn)
+        self._triples = calg.set_new(trp_hash_fn, triple_cmp_fn)
 
         cdef:
             size_t i = 0
@@ -276,8 +276,6 @@ cdef class SimpleGraph:
         """Populate a graph from a Keyset."""
         cdef TripleKey spok
 
-        self._terms = calg.set_new(term_hash_fn, buffer_cmp_fn)
-        self._triples = calg.set_new(trp_hash_fn, triple_cmp_fn)
         while data.next(spok):
             self._add_from_spok(spok)
 
@@ -298,16 +296,6 @@ cdef class SimpleGraph:
         self._add_triple(ss, sp, so)
 
 
-    cdef void _add_from_rdflib(self, s, p, o) except *:
-        """
-        Add a triple from 3 rdflib terms.
-        """
-        ss = term.serialize_from_rdflib(s)
-        sp = term.serialize_from_rdflib(p)
-        so = term.serialize_from_rdflib(o)
-        self._add_triple(ss, sp, so)
-
-
     cdef inline void _add_triple(
             self, const Buffer *ss, const Buffer *sp, const Buffer *so
             ) except *:
@@ -319,15 +307,25 @@ cdef class SimpleGraph:
         """
         cdef BufferTriple trp
 
+        print('Adding terms.')
+        print('ss: ')
+        print((<unsigned char *>ss[0].addr)[:ss[0].sz])
         calg.set_insert(self._terms, ss)
+        print('sp: ')
+        print((<unsigned char *>sp[0].addr)[:sp[0].sz])
         calg.set_insert(self._terms, sp)
+        print('so: ')
+        print((<unsigned char *>so[0].addr)[:so[0].sz])
         calg.set_insert(self._terms, so)
+        print('Added terms.')
 
         trp.s = ss
         trp.p = sp
         trp.o = so
 
+        print('Adding triple.')
         calg.set_insert(self._triples, &trp)
+        print('Added triple.')
 
 
     cdef set _data_as_set(self):
@@ -359,7 +357,19 @@ cdef class SimpleGraph:
 
     def add(self, triple):
         """ Add one triple to the graph. """
-        pass # TODO
+        cdef Buffer ss, sp, so
+
+        s, p, o = triple
+        #print('Serializing s.')
+        term.serialize_from_rdflib(s, &ss)
+        #print('Serializing p.')
+        term.serialize_from_rdflib(p, &sp)
+        #print('Serializing o.')
+        term.serialize_from_rdflib(o, &so)
+
+        print('Adding triple from rdflib.')
+        self._add_triple(&ss, &sp, &so)
+        print('Added triple from rdflib.')
 
 
     def remove(self, item):
@@ -545,8 +555,8 @@ cdef class SimpleGraph:
             BufferTriple trp
             BufferTriple *trp_p
             calg.SetIterator ti
-            const Buffer *t1 = NULL
-            const Buffer *t2 = NULL
+            const Buffer t1
+            const Buffer t2
             lookup_fn_t fn
 
         res = set()
@@ -554,9 +564,9 @@ cdef class SimpleGraph:
         # Decide comparison logic outside the loop.
         if s is not None and p is not None and o is not None:
             # Return immediately if 3-term match is requested.
-            trp.s = term.serialize_from_rdflib(s)
-            trp.p = term.serialize_from_rdflib(p)
-            trp.o = term.serialize_from_rdflib(o)
+            term.serialize_from_rdflib(s, trp.s)
+            term.serialize_from_rdflib(p, trp.p)
+            term.serialize_from_rdflib(o, trp.o)
 
             if calg.set_query(self._triples, &trp):
                 res.add((s, p, o))
@@ -564,25 +574,25 @@ cdef class SimpleGraph:
             return res
 
         elif s is not None:
-            t1 = term.serialize_from_rdflib(s)
+            term.serialize_from_rdflib(s, &t1)
             if p is not None:
                 fn = lookup_sp_cmp_fn
-                t2 = term.serialize_from_rdflib(p)
+                term.serialize_from_rdflib(p, &t2)
             elif o is not None:
                 fn = lookup_so_cmp_fn
-                t2 = term.serialize_from_rdflib(o)
+                term.serialize_from_rdflib(o, &t2)
             else:
                 fn = lookup_s_cmp_fn
         elif p is not None:
-            t1 = term.serialize_from_rdflib(p)
+            term.serialize_from_rdflib(p, &t1)
             if o is not None:
                 fn = lookup_po_cmp_fn
-                t2 = term.serialize_from_rdflib(o)
+                term.serialize_from_rdflib(o, &t2)
             else:
                 fn = lookup_p_cmp_fn
         elif o is not None:
             fn = lookup_o_cmp_fn
-            t1 = term.serialize_from_rdflib(o)
+            term.serialize_from_rdflib(o, &t1)
         else:
             fn = lookup_none_cmp_fn
 
@@ -590,7 +600,7 @@ cdef class SimpleGraph:
         calg.set_iterate(self._triples, &ti)
         while calg.set_iter_has_more(&ti):
             trp_p = <BufferTriple *>calg.set_iter_next(&ti)
-            if fn(trp_p, t1, t2):
+            if fn(trp_p, &t1, &t2):
                 res.add((
                     term.deserialize_to_rdflib(trp_p[0].s),
                     term.deserialize_to_rdflib(trp_p[0].p),

+ 17 - 12
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx

@@ -186,6 +186,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             # See https://github.com/cython/cython/issues/2517
             unsigned char spock[QUAD_KLEN]
             unsigned char nkey[KLEN]
+            Buffer pk_t
 
         c = self._normalize_context(context)
         if c is None:
@@ -200,8 +201,8 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         icur = self._cur_open('th:t')
         try:
             for i, term_obj in enumerate((s, p, o, c)):
-                pk_t = serialize_from_rdflib(term_obj)
-                hash128(pk_t, &thash)
+                serialize_from_rdflib(term_obj, &pk_t)
+                hash128(&pk_t, &thash)
                 try:
                     key_v.mv_data = thash
                     key_v.mv_size = HLEN
@@ -213,7 +214,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
                     # If term_obj is not found, add it...
                     #logger.debug('Hash {} not found. Adding to DB.'.format(
                     #        thash[: HLEN]))
-                    self._append(pk_t, &nkey, dblabel=b't:st')
+                    self._append(&pk_t, &nkey, dblabel=b't:st')
                     memcpy(spock + (i * KLEN), nkey, KLEN)
 
                     # ...and index it.
@@ -282,13 +283,14 @@ cdef class LmdbTriplestore(BaseLmdbStore):
 
         :param rdflib.URIRef graph: URI of the named graph to add.
         """
-        cdef Buffer *_sc
+        cdef Buffer _sc
 
         if isinstance(graph, Graph):
             graph = graph.identifier
 
-        _sc = serialize_from_rdflib(graph)
-        self._add_graph(_sc)
+        # FIXME This is all wrong.
+        serialize_from_rdflib(graph, &_sc)
+        self._add_graph(&_sc)
 
 
     cdef void _add_graph(self, Buffer *pk_gr) except *:
@@ -580,6 +582,7 @@ cdef class LmdbTriplestore(BaseLmdbStore):
             Hash128 chash
             unsigned char ck[KLEN]
             lmdb.MDB_val ck_v, chash_v
+            Buffer pk_c
 
         #logger.debug('Deleting context: {}'.format(gr_uri))
         #logger.debug('Pickled context: {}'.format(serialize(gr_uri)))
@@ -597,8 +600,8 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         self._remove((None, None, gr_uri))
 
         # Clean up all terms related to the graph.
-        pk_c = serialize_from_rdflib(gr_uri)
-        hash128(pk_c, &chash)
+        serialize_from_rdflib(gr_uri, &pk_c)
+        hash128(&pk_c, &chash)
 
         ck_v.mv_size = KLEN
         chash_v.mv_size = HLEN
@@ -1376,9 +1379,10 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         """
         cdef:
             Hash128 thash
+            Buffer pk_t
 
-        pk_t = serialize_from_rdflib(term)
-        hash128(pk_t, &thash)
+        serialize_from_rdflib(term, &pk_t)
+        hash128(&pk_t, &thash)
         #logger.debug('Hash to search for: {}'.format(thash[: HLEN]))
         key_v.mv_data = thash
         key_v.mv_size = HLEN
@@ -1391,12 +1395,13 @@ cdef class LmdbTriplestore(BaseLmdbStore):
         key[0] = <Key>data_v.mv_data
 
 
-    cdef inline void _to_triple_key(self, tuple terms, TripleKey *tkey) except *:
+    cdef inline void _to_triple_key(
+            self, tuple terms, TripleKey *tkey) except *:
         """
         Convert a tuple of 3 terms into a triple key.
         """
         cdef:
-            char i = 0
+            unsigned char i = 0
             Key key
 
         while  i < 3:

+ 2 - 2
lakesuperior/store/ldp_rs/term.pxd

@@ -11,10 +11,10 @@ cdef:
     # Temporary TPL variable.
     char *_pk
 
-    int serialize(const Term *term, tpl.tpl_bin *sterm) except -1
+    int serialize(const Term *term, Buffer *sterm) except -1
     int deserialize(const Buffer *data, Term *term) except -1
     int from_rdflib(term_obj, Term *term) except -1
-    Buffer *serialize_from_rdflib(term_obj) except NULL
+    int serialize_from_rdflib(term_obj, Buffer *data) except -1
     object deserialize_to_rdflib(const Buffer *data)
     object to_rdflib(const Term *term)
     object to_bytes(const Term *term)

+ 77 - 14
lakesuperior/store/ldp_rs/term.pyx

@@ -14,15 +14,27 @@ DEF LSUP_TERM_PK_FMT = b'csss' # Reflects the Term structure
 DEF LSUP_TERM_STRUCT_PK_FMT = b'S(' + LSUP_TERM_PK_FMT + b')'
 
 
-cdef int serialize(const Term *term, tpl.tpl_bin *sterm) except -1:
+cdef int serialize(const Term *term, Buffer *sterm) except -1:
     """
     Serialize a Term into a binary buffer.
 
     The returned result is dynamically allocated and must be manually freed.
     """
-    tpl.tpl_jot(
-            tpl.TPL_MEM, &(sterm.addr), &(sterm.sz),
-            LSUP_TERM_STRUCT_PK_FMT, term)
+    cdef:
+        unsigned char *addr
+        size_t sz
+
+    print('Dump members:')
+    print(term[0].type)
+    print(term[0].data if term[0].data is not NULL else 'NULL')
+    print(term[0].datatype if term[0].datatype is not NULL else 'NULL')
+    print(term[0].lang if term[0].lang is not NULL else 'NULL')
+    print('Now serializing.')
+    tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, term)
+    print('Serialized.')
+    sterm[0].addr = addr
+    sterm[0].sz = sz
+    print('Assigned to buffer. Returning.')
 
 
 cdef int deserialize(const Buffer *data, Term *term) except -1:
@@ -54,33 +66,84 @@ cdef int from_rdflib(term_obj, Term *term) except -1:
         term[0].datatype = _datatype
         term[0].lang = _lang
     else:
+        term[0].datatype = NULL
+        term[0].lang = NULL
         if isinstance(term_obj, URIRef):
             term[0].type = LSUP_TERM_TYPE_URIREF
         elif isinstance(term_obj, BNode):
             term[0].type = LSUP_TERM_TYPE_BNODE
         else:
             raise ValueError(f'Unsupported term type: {type(term_obj)}')
+    print(f'term data: {term[0].data}')
 
 
-cdef Buffer *serialize_from_rdflib(term_obj) except NULL:
+cdef int serialize_from_rdflib(term_obj, Buffer *data) except -1:
     """
     Return a Buffer struct from a Python/RDFLib term.
     """
+
     cdef:
-        Term term
-        Buffer data
+        Term _term
+        void *addr
+        size_t sz
+
+    # From RDFlib
+    _data = str(term_obj).encode()
+    _term.data = _data
+
+    if isinstance(term_obj, Literal):
+        _datatype = (getattr(term_obj, 'datatype') or '').encode()
+        _lang = (getattr(term_obj, 'language') or '').encode()
+        _term.type = LSUP_TERM_TYPE_LITERAL
+        _term.datatype = _datatype
+        _term.lang = _lang
+    else:
+        _term.datatype = NULL
+        _term.lang = NULL
+        if isinstance(term_obj, URIRef):
+            _term.type = LSUP_TERM_TYPE_URIREF
+        elif isinstance(term_obj, BNode):
+            _term.type = LSUP_TERM_TYPE_BNODE
+        else:
+            raise ValueError(f'Unsupported term type: {type(term_obj)}')
+    #print(f'term data: {_term.data}')
+
+    # # # #
+
+    # Serialize
+    print('Dump members:')
+    print(_term.type)
+    print(_term.data if _term.data is not NULL else 'NULL')
+    print(_term.datatype if _term.datatype is not NULL else 'NULL')
+    print(_term.lang if _term.lang is not NULL else 'NULL')
+    print('Now serializing.')
+    tpl.tpl_jot(tpl.TPL_MEM, &addr, &sz, LSUP_TERM_STRUCT_PK_FMT, &_term)
+    print('Serialized.')
+
+    print(f'addr: {<unsigned long>addr}; size: {sz}')
+    data[0].addr = addr
+    data[0].sz = sz
+
+    print('data to be returned: ')
+    print((<unsigned char *>data[0].addr)[:data[0].sz])
+    #print('Assigned to buffer. Returning.')
 
-    from_rdflib(term_obj, &term)
-    serialize(&term, &data)
+    # # # #
+    #cdef:
+    #    Term _term
 
-    return &data
+    # Resusing other methods. This won't work until I figure out how to
+    # not drop the intermediate var in from_rdflib().
+    #from_rdflib(term_obj, &_term)
+    #print('Dump members in serialize_from_rdflib:')
+    #serialize(&_term, data)
 
 
 cdef object to_rdflib(const Term *term):
     """
     Return an RDFLib term.
     """
-    data = (<bytes>term[0].data).decode()
+    cdef str data = (<bytes>term[0].data).decode()
     if term[0].type == LSUP_TERM_TYPE_LITERAL:
         return Literal(data, datatype=term[0].datatype, lang=term[0].lang)
     else:
@@ -96,11 +159,11 @@ cdef object deserialize_to_rdflib(const Buffer *data):
     """
     Return a Python/RDFLib term from a serialized Cython term.
     """
-    cdef Term *t
+    cdef Term t
 
-    deserialize(data, t)
+    deserialize(data, &t)
 
-    return to_rdflib(t)
+    return to_rdflib(&t)
 
 
 cdef object to_bytes(const Term *term):

+ 11 - 6
setup.py

@@ -147,12 +147,17 @@ extensions = [
 ]
 
 if USE_CYTHON:
-    extensions = cythonize(extensions, include_path=include_dirs, compiler_directives={
-        'language_level': 3,
-        'boundscheck': False,
-        'wraparound': False,
-        'profile': True,
-    })
+    extensions = cythonize(
+        extensions,
+        include_path=include_dirs,
+        annotate=True,
+        compiler_directives={
+            'language_level': 3,
+            'boundscheck': False,
+            'wraparound': False,
+            'profile': True,
+        }
+    )
 
 
 setup(