Browse Source

Implement callback iterations; move all callback functions to separate module.

Stefano Cossu 5 years ago
parent
commit
bdfe4bd145

+ 2 - 3
.gitmodules

@@ -4,10 +4,9 @@
 [submodule "ext/tpl"]
     path = ext/tpl
     url = https://github.com/troydhanson/tpl.git
-    branch = stable
 [submodule "ext/spookyhash"]
     path = ext/spookyhash
     url = https://github.com/centaurean/spookyhash.git
 [submodule "ext/collections-c"]
-	path = ext/collections-c
-	url = https://github.com/srdja/Collections-C.git
+    path = ext/collections-c
+    url = https://github.com/srdja/Collections-C.git

+ 43 - 0
lakesuperior/model/graph/callbacks.pxd

@@ -0,0 +1,43 @@
+from libc.stdint cimport uint32_t, uint64_t
+
+from lakesuperior.model.base cimport Buffer
+from lakesuperior.model.graph cimport graph
+from lakesuperior.model.graph.triple cimport BufferTriple
+
+cdef extern from 'spookyhash_api.h':
+    uint64_t spookyhash_64(const void *input, size_t input_size, uint64_t seed)
+
+cdef:
+    bint graph_eq_fn(graph.SimpleGraph g1, graph.SimpleGraph g2)
+    int term_cmp_fn(const void* key1, const void* key2)
+    int trp_cmp_fn(const void* key1, const void* key2)
+    size_t term_hash_fn(const void* key, int l, uint32_t seed)
+    size_t trp_hash_fn(const void* key, int l, uint32_t seed)
+    bint lookup_none_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+    )
+    bint lookup_s_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+    )
+    bint lookup_p_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+    )
+    bint lookup_o_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+    )
+    bint lookup_sp_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+    )
+    bint lookup_so_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+    )
+    bint lookup_po_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+    )
+    void add_trp_callback(
+        graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
+    )
+    void del_trp_callback(
+        graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
+    )
+

+ 249 - 0
lakesuperior/model/graph/callbacks.pyx

@@ -0,0 +1,249 @@
+import logging
+
+from libc.stdint cimport uint32_t, uint64_t
+from libc.string cimport memcmp
+
+from lakesuperior.cy_include cimport collections as cc
+from lakesuperior.cy_include cimport spookyhash as sph
+from lakesuperior.model.base cimport Buffer, buffer_dump
+from lakesuperior.model.graph cimport graph
+from lakesuperior.model.graph.triple cimport BufferTriple
+
+logger = logging.getLogger(__name__)
+
+
+cdef int term_cmp_fn(const void* key1, const void* key2):
+    """
+    Compare function for two Buffer objects.
+
+    :rtype: int
+    :return: 0 if the byte streams are the same, another integer otherwise.
+    """
+    b1 = <Buffer *>key1
+    b2 = <Buffer *>key2
+
+    if b1.sz != b2.sz:
+        logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
+        return 1
+
+    cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
+    logger.info(f'term memcmp: {cmp}')
+    return cmp
+
+
+cdef int trp_cmp_fn(const void* key1, const void* key2):
+    """
+    Compare function for two triples in a set.
+
+    s, p, o byte data are compared literally.
+
+    :rtype: int
+    :return: 0 if all three terms point to byte-wise identical data in both
+        triples.
+    """
+    t1 = <BufferTriple *>key1
+    t2 = <BufferTriple *>key2
+
+    diff = (
+        term_cmp_fn(t1.o, t2.o) or
+        term_cmp_fn(t1.s, t2.s) or
+        term_cmp_fn(t1.p, t2.p)
+    )
+
+    logger.info(f'Triples match: {not(diff)}')
+    return diff
+
+
+#cdef int trp_cmp_fn(const void* key1, const void* key2):
+#    """
+#    Compare function for two triples in a set.
+#
+#    Here, pointers to terms are compared for s, p, o. The pointers should be
+#    guaranteed to point to unique values (i.e. no two pointers have the same
+#    term value within a graph).
+#
+#    :rtype: int
+#    :return: 0 if the addresses of all terms are the same, 1 otherwise.
+#    """
+#    t1 = <BufferTriple *>key1
+#    t2 = <BufferTriple *>key2
+#
+#    cdef int is_not_equal = (
+#        t1.s.addr != t2.s.addr or
+#        t1.p.addr != t2.p.addr or
+#        t1.o.addr != t2.o.addr
+#    )
+#
+#    logger.info(f'Triples match: {not(is_not_equal)}')
+#    return is_not_equal
+
+
+cdef bint graph_eq_fn(graph.SimpleGraph g1, graph.SimpleGraph g2):
+    """
+    Compare 2 graphs for equality.
+
+    Note that this returns the opposite value than the triple and term
+    compare functions: 1 (True) if equal, 0 (False) if not.
+    """
+    cdef:
+        void* el
+        cc.HashSetIter it
+
+    cc.hashset_iter_init(&it, g1._triples)
+    while cc.hashset_iter_next(&it, &el) != cc.CC_ITER_END:
+        if cc.hashset_contains(g2._triples, el):
+            return False
+
+    return True
+
+
+cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
+    """
+    Hash function for serialized terms (:py:class:`Buffer` objects)
+    """
+    return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
+
+
+cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
+    """
+    Hash function for sets of (serialized) triples.
+
+    This function concatenates the literal terms of the triple as bytes
+    and computes their hash.
+    """
+    trp = <BufferTriple*>key
+    seed64 = <uint64_t>seed
+    seed_dummy = seed64
+
+    cdef sph.spookyhash_context ctx
+
+    sph.spookyhash_context_init(&ctx, seed64, seed_dummy)
+    sph.spookyhash_update(&ctx, trp.s.addr, trp.s.sz)
+    sph.spookyhash_update(&ctx, trp.s.addr, trp.p.sz)
+    sph.spookyhash_update(&ctx, trp.s.addr, trp.o.sz)
+    sph.spookyhash_final(&ctx, &seed64, &seed_dummy)
+
+    return <size_t>seed64
+
+
+#cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
+#    """
+#    Hash function for sets of (serialized) triples.
+#
+#    This function computes the hash of the concatenated pointer values in the
+#    s, p, o members of the triple. The triple structure is treated as a byte
+#    string. This is safe in spite of byte-wise struct evaluation being a
+#    frowned-upon practice (due to padding issues), because it is assumed that
+#    the input value is always the same type of structure.
+#    """
+#    return <size_t>spookyhash_64(key, l, seed)
+
+
+#cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
+#    """
+#    No-op function that takes a pointer and does *not* hash it.
+#
+#    The pointer value is used as the "hash".
+#    """
+#    return <size_t>key
+
+
+cdef inline bint lookup_none_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
+    """
+    Dummy callback for queries with all parameters unbound.
+
+    This function always returns ``True`` 
+    """
+    return True
+
+
+cdef inline bint lookup_s_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
+    """
+    Lookup callback compare function for a given ``s`` in a triple.
+
+    The function returns ``True`` if ``t1`` matches the first term.
+
+    ``t2`` is not used and is declared only for compatibility with the
+    other interchangeable functions.
+    """
+    return not term_cmp_fn(t1, trp[0].s)
+
+
+cdef inline bint lookup_p_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
+    """
+    Lookup callback compare function for a given ``p`` in a triple.
+    """
+    return not term_cmp_fn(t1, trp[0].p)
+
+
+cdef inline bint lookup_o_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
+    """
+    Lookup callback compare function for a given ``o`` in a triple.
+    """
+    return not term_cmp_fn(t1, trp[0].o)
+
+
+cdef inline bint lookup_sp_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
+    """
+    Lookup callback compare function for a given ``s`` and ``p`` pair.
+    """
+    return (
+            not term_cmp_fn(t1, trp[0].s)
+            and not term_cmp_fn(t2, trp[0].p))
+
+
+cdef inline bint lookup_so_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
+    """
+    Lookup callback compare function for a given ``s`` and ``o`` pair.
+    """
+    return (
+            not term_cmp_fn(t1, trp[0].s)
+            and not term_cmp_fn(t2, trp[0].o))
+
+
+cdef inline bint lookup_po_cmp_fn(
+    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
+):
+    """
+    Lookup callback compare function for a given ``p`` and ``o`` pair.
+    """
+    return (
+            not term_cmp_fn(t1, trp[0].p)
+            and not term_cmp_fn(t2, trp[0].o))
+
+
+## LOOKUP CALLBACK FUNCTIONS
+
+cdef inline void add_trp_callback(
+    graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
+):
+    """
+    Add a triple to a graph as a result of a lookup callback.
+    """
+    gr.add_triple(trp, True)
+
+
+cdef inline void del_trp_callback(
+    graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
+):
+    """
+    Remove a triple from a graph as a result of a lookup callback.
+    """
+    logger.info('removing triple: {} {} {}'.format(
+        buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
+    ))
+    gr.remove_triple(trp)
+
+

+ 14 - 13
lakesuperior/model/graph/graph.pxd

@@ -5,22 +5,20 @@ from cymem.cymem cimport Pool
 from lakesuperior.cy_include cimport collections as cc
 from lakesuperior.model.base cimport Buffer
 from lakesuperior.model.graph.triple cimport BufferTriple
-from lakesuperior.model.structures.keyset cimport Keyset
 
 # Lookup function that returns whether a triple contains a match pattern.
+# Return True if the triple exists, False otherwise.
 ctypedef bint (*lookup_fn_t)(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2)
 
+# Callback for an iterator.
+ctypedef void (*lookup_callback_fn_t)(
+    SimpleGraph gr, const BufferTriple* trp, void* ctx
+)
+
 ctypedef Buffer SPOBuffer[3]
 ctypedef Buffer *BufferPtr
 
-cdef:
-    int term_cmp_fn(const void* key1, const void* key2)
-    int trp_cmp_fn(const void* key1, const void* key2)
-    bint graph_eq_fn(SimpleGraph g1, SimpleGraph g2)
-    size_t trp_hash_fn(const void* key, int l, uint32_t seed)
-    size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed)
-
 cdef class SimpleGraph:
     cdef:
         cc.HashSet *_terms # Set of unique serialized terms.
@@ -32,9 +30,9 @@ cdef class SimpleGraph:
         cc.key_compare_ft trp_cmp_fn
 
         inline BufferTriple* store_triple(self, const BufferTriple* strp)
-        inline void add_triple(self, BufferTriple *trp, bint add=*) except *
-        int remove_triple(self, BufferTriple* trp_buf) except -1
-        bint trp_contains(self, BufferTriple* btrp)
+        inline void add_triple(self, const BufferTriple *trp, bint add=*) except *
+        int remove_triple(self, const BufferTriple* trp_buf) except -1
+        bint trp_contains(self, const BufferTriple* btrp)
 
         # Basic graph operations.
         void ip_union(self, SimpleGraph other) except *
@@ -42,18 +40,21 @@ cdef class SimpleGraph:
         void ip_intersection(self, SimpleGraph other) except *
         void ip_xor(self, SimpleGraph other) except *
         SimpleGraph empty_copy(self)
+        void _match_ptn_callback(
+            self, pattern, SimpleGraph gr,
+            lookup_callback_fn_t callback_fn, void* ctx=*
+        ) except *
 
     cpdef union_(self, SimpleGraph other)
     cpdef subtraction(self, SimpleGraph other)
     cpdef intersection(self, SimpleGraph other)
     cpdef xor(self, SimpleGraph other)
     cpdef void set(self, tuple trp) except *
-    cpdef void remove_triples(self, pattern) except *
 
 
 cdef class Imr(SimpleGraph):
     cdef:
-        readonly str uri
+        readonly str id
         Imr empty_copy(self)
 
     cpdef as_rdflib(self)

+ 102 - 301
lakesuperior/model/graph/graph.pyx

@@ -2,234 +2,26 @@ import logging
 
 from functools import wraps
 
-from rdflib import Graph
+from rdflib import Graph, URIRef
 from rdflib.term import Node
 
 from lakesuperior import env
 
-from libc.stdint cimport uint32_t, uint64_t
-from libc.string cimport memcmp, memcpy
+from libc.string cimport memcpy
 from libc.stdlib cimport free
 
 from cymem.cymem cimport Pool
 
-from lakesuperior.cy_include cimport cylmdb as lmdb
 from lakesuperior.cy_include cimport collections as cc
-from lakesuperior.cy_include cimport spookyhash as sph
 from lakesuperior.model.base cimport Buffer, buffer_dump
+from lakesuperior.model.graph cimport callbacks as cb
 from lakesuperior.model.graph cimport term
 from lakesuperior.model.graph.triple cimport BufferTriple
-from lakesuperior.model.structures.hash cimport hash64
 from lakesuperior.model.structures.hash cimport term_hash_seed32
-from lakesuperior.model.structures.keyset cimport Keyset
-from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
-        KLEN, DBL_KLEN, TRP_KLEN, TripleKey)
-
-cdef extern from 'spookyhash_api.h':
-    uint64_t spookyhash_64(const void *input, size_t input_size, uint64_t seed)
 
 logger = logging.getLogger(__name__)
 
 
-cdef int term_cmp_fn(const void* key1, const void* key2):
-    """
-    Compare function for two Buffer objects.
-
-    :rtype: int
-    :return: 0 if the byte streams are the same, another integer otherwise.
-    """
-    b1 = <Buffer *>key1
-    b2 = <Buffer *>key2
-
-    if b1.sz != b2.sz:
-        logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
-        return 1
-
-    cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
-    logger.info(f'term memcmp: {cmp}')
-    return cmp
-
-
-cdef int trp_lit_cmp_fn(const void* key1, const void* key2):
-    """
-    Compare function for two triples in a set.
-
-    s, p, o byte data are compared literally.
-
-    :rtype: int
-    :return: 0 if all three terms point to byte-wise identical data in both
-        triples.
-    """
-    t1 = <BufferTriple *>key1
-    t2 = <BufferTriple *>key2
-
-    diff = (
-        term_cmp_fn(t1.o, t2.o) or
-        term_cmp_fn(t1.s, t2.s) or
-        term_cmp_fn(t1.p, t2.p)
-    )
-
-    logger.info(f'Triples match: {not(diff)}')
-    return diff
-
-
-cdef int trp_cmp_fn(const void* key1, const void* key2):
-    """
-    Compare function for two triples in a set.
-
-    Here, pointers to terms are compared for s, p, o. The pointers should be
-    guaranteed to point to unique values (i.e. no two pointers have the same
-    term value within a graph).
-
-    :rtype: int
-    :return: 0 if the addresses of all terms are the same, 1 otherwise.
-    """
-    t1 = <BufferTriple *>key1
-    t2 = <BufferTriple *>key2
-
-    cdef int is_not_equal = (
-        t1.s.addr != t2.s.addr or
-        t1.p.addr != t2.p.addr or
-        t1.o.addr != t2.o.addr
-    )
-
-    logger.info(f'Triples match: {not(is_not_equal)}')
-    return is_not_equal
-
-
-cdef bint graph_eq_fn(SimpleGraph g1, SimpleGraph g2):
-    """
-    Compare 2 graphs for equality.
-
-    Note that this returns the opposite value than the triple and term
-    compare functions: 1 (True) if equal, 0 (False) if not.
-    """
-    cdef:
-        void* el
-        cc.HashSetIter it
-
-    cc.hashset_iter_init(&it, g1._triples)
-    while cc.hashset_iter_next(&it, &el) != cc.CC_ITER_END:
-        if cc.hashset_contains(g2._triples, el):
-            return False
-
-    return True
-
-
-cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
-    """
-    Hash function for serialized terms (:py:class:`Buffer` objects)
-    """
-    return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
-
-
-cdef size_t trp_lit_hash_fn(const void* key, int l, uint32_t seed):
-    """
-    Hash function for sets of (serialized) triples.
-
-    This function concatenates the literal terms of the triple as bytes
-    and computes their hash.
-    """
-    trp = <BufferTriple*>key
-    seed64 = <uint64_t>seed
-    seed_dummy = seed64
-
-    cdef sph.spookyhash_context ctx
-
-    sph.spookyhash_context_init(&ctx, seed64, seed_dummy)
-    sph.spookyhash_update(&ctx, trp.s.addr, trp.s.sz)
-    sph.spookyhash_update(&ctx, trp.s.addr, trp.p.sz)
-    sph.spookyhash_update(&ctx, trp.s.addr, trp.o.sz)
-    sph.spookyhash_final(&ctx, &seed64, &seed_dummy)
-
-    return <size_t>seed64
-
-
-cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
-    """
-    Hash function for sets of (serialized) triples.
-
-    This function computes the hash of the concatenated pointer values in the
-    s, p, o members of the triple. The triple structure is treated as a byte
-    string. This is safe in spite of byte-wise struct evaluation being a
-    frowned-upon practice (due to padding issues), because it is assumed that
-    the input value is always the same type of structure.
-    """
-    return <size_t>spookyhash_64(key, l, seed)
-
-
-cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
-    """
-    No-op function that takes a pointer and does *not* hash it.
-
-    The pointer value is used as the "hash".
-    """
-    return <size_t>key
-
-
-cdef inline bint lookup_none_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Dummy callback for queries with all parameters unbound.
-
-    This function always returns ``True`` 
-    """
-    return True
-
-
-cdef inline bint lookup_s_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given s in a triple.
-
-    The function returns ``True`` if ``t1`` matches the first term.
-
-    ``t2`` is not used and is declared only for compatibility with the
-    other interchangeable functions.
-    """
-    return term_cmp_fn(t1, trp[0].s)
-
-
-cdef inline bint lookup_p_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    return term_cmp_fn(t1, trp[0].p)
-
-
-cdef inline bint lookup_o_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    return term_cmp_fn(t1, trp[0].o)
-
-
-cdef inline bint lookup_sp_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    return (
-            term_cmp_fn(t1, trp[0].s)
-            and term_cmp_fn(t2, trp[0].p))
-
-
-cdef inline bint lookup_so_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    return (
-            term_cmp_fn(t1, trp[0].s)
-            and term_cmp_fn(t2, trp[0].o))
-
-
-cdef inline bint lookup_po_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    return (
-            term_cmp_fn(t1, trp[0].p)
-            and term_cmp_fn(t2, trp[0].o))
-
-
-
-
 cdef class SimpleGraph:
     """
     Fast and simple implementation of a graph.
@@ -255,19 +47,19 @@ cdef class SimpleGraph:
         cdef:
             cc.HashSetConf terms_conf, trp_conf
 
-        self.term_cmp_fn = &term_cmp_fn
-        self.trp_cmp_fn = &trp_lit_cmp_fn
+        self.term_cmp_fn = cb.term_cmp_fn
+        self.trp_cmp_fn = cb.trp_cmp_fn
 
         cc.hashset_conf_init(&terms_conf)
         terms_conf.load_factor = 0.85
-        terms_conf.hash = &term_hash_fn
+        terms_conf.hash = cb.term_hash_fn
         terms_conf.hash_seed = term_hash_seed32
         terms_conf.key_compare = self.term_cmp_fn
         terms_conf.key_length = sizeof(Buffer*)
 
         cc.hashset_conf_init(&trp_conf)
         trp_conf.load_factor = 0.75
-        trp_conf.hash = &trp_lit_hash_fn
+        trp_conf.hash = cb.trp_hash_fn
         trp_conf.hash_seed = term_hash_seed32
         trp_conf.key_compare = self.trp_cmp_fn
         trp_conf.key_length = sizeof(BufferTriple)
@@ -296,16 +88,16 @@ cdef class SimpleGraph:
     @property
     def data(self):
         """
-        Triple data as a Python set.
+        Triple data as a Python generator.
 
-        :rtype: set
+        :rtype: generator
         """
         cdef:
             void *void_p
             cc.HashSetIter ti
-            Buffer* ss, sp, so
-
-        graph_set = set()
+            Buffer* ss
+            Buffer* sp
+            Buffer* so
 
         cc.hashset_iter_init(&ti, self._triples)
         while cc.hashset_iter_next(&ti, &void_p) != cc.CC_ITER_END:
@@ -315,16 +107,14 @@ cdef class SimpleGraph:
                 break
 
             trp = <BufferTriple *>void_p
-            print(f'trp.s: {buffer_dump(trp.s)}')
-            print(f'trp.p: {buffer_dump(trp.p)}')
-            print(f'trp.o: {buffer_dump(trp.o)}')
-            graph_set.add((
+            #print(f'trp.s: {buffer_dump(trp.s)}')
+            #print(f'trp.p: {buffer_dump(trp.p)}')
+            #print(f'trp.o: {buffer_dump(trp.o)}')
+            yield (
                 term.deserialize_to_rdflib(trp.s),
                 term.deserialize_to_rdflib(trp.p),
                 term.deserialize_to_rdflib(trp.o),
-            ))
-
-        return graph_set
+            )
 
     @property
     def stored_terms(self):
@@ -368,7 +158,7 @@ cdef class SimpleGraph:
         """
         return (
             f'<{self.__class__.__name__} @{hex(id(self))} '
-            f'length={len(self.data)}>'
+            f'length={len(self)}>'
         )
 
 
@@ -455,7 +245,12 @@ cdef class SimpleGraph:
 
     def __iter__(self):
         """ Graph iterator. It iterates over the set triples. """
-        raise NotImplementedError()
+        yield from self.data
+
+
+    #def __next__(self):
+    #    """ Graph iterator. It iterates over the set triples. """
+    #    return self.data.__next__()
 
 
     # Slicing.
@@ -522,26 +317,15 @@ cdef class SimpleGraph:
         return cc.hashset_size(self._terms)
 
 
-    def remove(self, trp):
+    def remove(self, pattern):
         """
-        Remove one item from the graph.
+        Remove triples by pattern.
 
-        :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
-            wildcards are not accepted.
+        The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
         """
-        cdef:
-            Buffer ss, sp, so
-            BufferTriple trp_buf
-
-        term.serialize_from_rdflib(trp[0], &ss, self.pool)
-        term.serialize_from_rdflib(trp[1], &sp, self.pool)
-        term.serialize_from_rdflib(trp[2], &so, self.pool)
-
-        trp_buf.s = &ss
-        trp_buf.p = &sp
-        trp_buf.o = &so
-
-        self.remove_triple(&trp_buf)
+        self._match_ptn_callback(
+            pattern, self, cb.del_trp_callback, NULL
+        )
 
 
     ## CYTHON-ACCESSIBLE BASIC METHODS ##
@@ -817,7 +601,7 @@ cdef class SimpleGraph:
 
 
     cdef inline void add_triple(
-        self, BufferTriple* trp, bint add=False
+        self, const BufferTriple* trp, bint add=False
     ) except *:
         """
         Add a triple from 3 (TPL) serialized terms.
@@ -852,14 +636,14 @@ cdef class SimpleGraph:
             void *cur
 
 
-    cdef int remove_triple(self, BufferTriple* btrp) except -1:
+    cdef int remove_triple(self, const BufferTriple* btrp) except -1:
         """
         Remove one triple from the graph.
         """
         return cc.hashset_remove(self._triples, btrp, NULL)
 
 
-    cdef bint trp_contains(self, BufferTriple* btrp):
+    cdef bint trp_contains(self, const BufferTriple* btrp):
         cdef:
             cc.HashSetIter it
             void* cur
@@ -883,18 +667,6 @@ cdef class SimpleGraph:
         self.add((trp,))
 
 
-    cpdef void remove_triples(self, pattern) except *:
-        """
-        Remove triples by pattern.
-
-        The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
-        """
-        s, p, o = pattern
-        for match in self.lookup(s, p, o):
-            logger.debug(f'Removing from graph: {match}.')
-            self.data.remove(match)
-
-
     def as_rdflib(self):
         """
         Return the data set as an RDFLib Graph.
@@ -936,73 +708,94 @@ cdef class SimpleGraph:
             return (s,p,o) in _data
 
 
-    def lookup(self, s, p, o):
+    def lookup(self, pattern):
         """
         Look up triples by a pattern.
 
         This function converts RDFLib terms into the serialized format stored
         in the graph's internal structure and compares them bytewise.
 
-        Any and all of the lookup terms can be ``None``.
+        Any and all of the lookup terms msy be ``None``.
+
+        :rtype: SimpleGraph
+        "return: New SimpleGraph instance with matching triples.
         """
         cdef:
-            void *void_p
+            void* cur
             BufferTriple trp
-            BufferTriple *trp_p
-            cc.HashSetIter ti
-            Buffer t1
-            Buffer t2
-            lookup_fn_t fn
+            SimpleGraph res_gr = SimpleGraph()
+
+        self._match_ptn_callback(pattern, res_gr, cb.add_trp_callback, NULL)
+
+        return res_gr
+
+
+    cdef void _match_ptn_callback(
+        self, pattern, SimpleGraph gr,
+        lookup_callback_fn_t callback_fn, void* ctx=NULL
+    ) except *:
+        """
+        Execute an arbitrary function on a list of triples matching a pattern.
+
+        The arbitrary function is appied to each triple found in the current
+        graph, and to a discrete graph that can be the current graph itself
+        or a different one.
+        """
+        cdef:
+            void* cur
+            Buffer t1, t2
+            Buffer ss, sp, so
+            BufferTriple trp
+            BufferTriple* trp_p
+            lookup_fn_t cmp_fn
+            cc.HashSetIter it
 
-        res = set()
+        s, p, o = pattern
 
         # Decide comparison logic outside the loop.
         if s is not None and p is not None and o is not None:
-            # Return immediately if 3-term match is requested.
+            logger.info('Looping over one triple only.')
+            # Shortcut for 3-term match.
+            trp.s = &ss
+            trp.p = &sp
+            trp.o = &so
             term.serialize_from_rdflib(s, trp.s, self.pool)
             term.serialize_from_rdflib(p, trp.p, self.pool)
             term.serialize_from_rdflib(o, trp.o, self.pool)
 
             if cc.hashset_contains(self._triples, &trp):
-                res.add((s, p, o))
-
-            return res
+                callback_fn(gr, &trp, ctx)
+                return
 
-        elif s is not None:
+        if s is not None:
             term.serialize_from_rdflib(s, &t1)
             if p is not None:
-                fn = lookup_sp_cmp_fn
+                cmp_fn = cb.lookup_sp_cmp_fn
                 term.serialize_from_rdflib(p, &t2)
             elif o is not None:
-                fn = lookup_so_cmp_fn
+                cmp_fn = cb.lookup_so_cmp_fn
                 term.serialize_from_rdflib(o, &t2)
             else:
-                fn = lookup_s_cmp_fn
+                cmp_fn = cb.lookup_s_cmp_fn
         elif p is not None:
             term.serialize_from_rdflib(p, &t1)
             if o is not None:
-                fn = lookup_po_cmp_fn
+                cmp_fn = cb.lookup_po_cmp_fn
                 term.serialize_from_rdflib(o, &t2)
             else:
-                fn = lookup_p_cmp_fn
+                cmp_fn = cb.lookup_p_cmp_fn
         elif o is not None:
-            fn = lookup_o_cmp_fn
+            cmp_fn = cb.lookup_o_cmp_fn
             term.serialize_from_rdflib(o, &t1)
         else:
-            fn = lookup_none_cmp_fn
+            cmp_fn = cb.lookup_none_cmp_fn
 
         # Iterate over serialized triples.
-        cc.hashset_iter_init(&ti, self._triples)
-        while cc.hashset_iter_next(&ti, &void_p) != cc.CC_ITER_END:
-            if void_p == NULL:
-                trp_p = <BufferTriple *>void_p
-                res.add((
-                    term.deserialize_to_rdflib(trp_p[0].s),
-                    term.deserialize_to_rdflib(trp_p[0].p),
-                    term.deserialize_to_rdflib(trp_p[0].o),
-                ))
-
-        return res
+        cc.hashset_iter_init(&it, self._triples)
+        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
+            trp_p = <BufferTriple*>cur
+            if cmp_fn(trp_p, &t1, &t2):
+                callback_fn(gr, trp_p, ctx)
 
 
 
@@ -1034,7 +827,7 @@ cdef class Imr(SimpleGraph):
         :param kwargs: Keyword arguments inherited from
             ``SimpleGraph.__init__``.
         """
-        self.uri = str(uri)
+        self.id = str(uri)
         #super().__init(*args, **kwargs)
 
 
@@ -1045,8 +838,8 @@ cdef class Imr(SimpleGraph):
         This includes the subject URI, number of triples contained and the
         memory address of the instance.
         """
-        return (f'<{self.__class__.__name__} @{hex(id(self))} uri={self.uri}, '
-            f'length={len(self.data)}>')
+        return (f'<{self.__class__.__name__} @{hex(id(self))} id={self.id}, '
+            f'length={len(self)}>')
 
 
     def __getitem__(self, item):
@@ -1061,16 +854,26 @@ cdef class Imr(SimpleGraph):
             # If a Node is given, return all values for that predicate.
             return {
                     r[2] for r in self.data
-                    if r[0] == self.uri and r[1] == item}
+                    if r[0] == self.id and r[1] == item}
         else:
             raise TypeError(f'Wrong slice format: {item}.')
 
 
+    @property
+    def uri(self):
+        """
+        Get resource identifier as a RDFLib URIRef.
+
+        :rtype: rdflib.URIRef.
+        """
+        return URIRef(self.id)
+
+
     cdef Imr empty_copy(self):
         """
         Create an empty instance carrying over some key properties.
         """
-        return self.__class__(uri=self.uri)
+        return self.__class__(uri=self.id)
 
 
     def value(self, p, strict=False):
@@ -1087,7 +890,7 @@ cdef class Imr(SimpleGraph):
 
         if strict and len(values) > 1:
             raise RuntimeError('More than one value found for {}, {}.'.format(
-                    self.uri, p))
+                    self.id, p))
 
         for ret in values:
             return ret
@@ -1108,5 +911,3 @@ cdef class Imr(SimpleGraph):
         return gr.resource(identifier=self.uri)
 
 
-
-

+ 21 - 1
lakesuperior/model/graph/term.pxd

@@ -2,6 +2,21 @@ from cymem.cymem cimport Pool
 
 from lakesuperior.model.base cimport Buffer
 
+#cdef extern from "regex.h" nogil:
+#   ctypedef struct regmatch_t:
+#      int rm_so
+#      int rm_eo
+#   ctypedef struct regex_t:
+#      pass
+#   int REG_NOSUB, REG_NOMATCH
+#   int regcomp(regex_t* preg, const char* regex, int cflags)
+#   int regexec(
+#       const regex_t *preg, const char* string, size_t nmatch,
+#       regmatch_t pmatch[], int eflags
+#    )
+#   void regfree(regex_t* preg)
+
+
 ctypedef struct Term:
     char type
     char *data
@@ -9,8 +24,12 @@ ctypedef struct Term:
     char *lang
 
 cdef:
+    #int term_new(
+    #    Term* term, char type, char* data, char* datatype=*, char* lang=*
+    #) except -1
+    #regex_t uri_regex
     # Temporary TPL variable.
-    char *_pk
+    #char* _pk
 
     int serialize(const Term *term, Buffer *sterm, Pool pool=*) except -1
     int deserialize(const Buffer *data, Term *term) except -1
@@ -19,3 +38,4 @@ cdef:
     object deserialize_to_rdflib(const Buffer *data)
     object to_rdflib(const Term *term)
     object to_bytes(const Term *term)
+

+ 29 - 0
lakesuperior/model/graph/term.pyx

@@ -1,3 +1,5 @@
+from uuid import uuid4
+
 from rdflib import URIRef, BNode, Literal
 
 #from cpython.mem cimport PyMem_Malloc, PyMem_Free
@@ -16,6 +18,33 @@ DEF LSUP_TERM_TYPE_BNODE = 2
 DEF LSUP_TERM_TYPE_LITERAL = 3
 DEF LSUP_TERM_PK_FMT = b'csss' # Reflects the Term structure
 DEF LSUP_TERM_STRUCT_PK_FMT = b'S(' + LSUP_TERM_PK_FMT + b')'
+# URI parsing regular expression. Conforms to RFC3986.
+#DEF URI_REGEX_STR = (
+#    b'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
+#)
+
+#cdef char* ptn = URI_REGEX_STR
+#regcomp(&uri_regex, ptn, REG_NOSUB)
+# Compile with no catch groups.
+# TODO This should be properly cleaned up on application shutdown:
+# regfree(&uri_regex)
+
+#cdef int term_new(
+#    Term* term, char type, char* data, char* datatype=NULL, char* lang=NULL
+#) except -1:
+#    if regexec(&uri_regex, data, 0, NULL, 0) == REG_NOMATCH:
+#        raise ValueError('Not a valid URI.')
+#    term.type = type
+#    term.data = (
+#        data # TODO use C UUID v4 (RFC 4122) generator
+#        if term.type == LSUP_TERM_TYPE_BNODE
+#        else data
+#    )
+#    if term.type == LSUP_TERM_TYPE_LITERAL:
+#        term.datatype = datatype
+#        term.lang = lang
+#
+#    return 0
 
 
 cdef int serialize(const Term *term, Buffer *sterm, Pool pool=None) except -1:

+ 6 - 5
lakesuperior/model/ldp/ldpr.py

@@ -600,7 +600,7 @@ class Ldpr(metaclass=ABCMeta):
 
         :param SimpleGraph gr: The graph to validate.
         """
-        offending_subjects = gr.terms('s') & srv_mgd_subjects
+        offending_subjects = gr.terms_by_type('s') & srv_mgd_subjects
         if offending_subjects:
             if self.handling == 'strict':
                 raise ServerManagedTermError(offending_subjects, 's')
@@ -612,7 +612,7 @@ class Ldpr(metaclass=ABCMeta):
                         if t[0] == s:
                             gr.remove(t)
 
-        offending_predicates = gr.terms('p') & srv_mgd_predicates
+        offending_predicates = gr.terms_by_type('p') & srv_mgd_predicates
         # Allow some predicates if the resource is being created.
         if offending_predicates:
             if self.handling == 'strict':
@@ -808,8 +808,9 @@ class Ldpr(metaclass=ABCMeta):
         :param create: Whether the resource is being created.
         """
         # Base LDP types.
-        for t in self.base_types:
-            self.provided_imr.add((self.uri, RDF.type, t))
+        self.provided_imr.add(
+            [(self.uri, RDF.type, t) for t in self.base_types]
+        )
 
         # Create and modify timestamp.
         if create:
@@ -900,7 +901,7 @@ class Ldpr(metaclass=ABCMeta):
 
         :param rdflib.resource.Resouce cont_rsrc:  The container resource.
         """
-        cont_p = cont_rsrc.metadata.terms('p')
+        cont_p = cont_rsrc.metadata.terms_by_type('p')
 
         logger.info('Checking direct or indirect containment.')
         logger.debug('Parent predicates: {}'.format(cont_p))

+ 13 - 13
tests/0_data_structures/test_graph.py

@@ -416,7 +416,7 @@ class TestImrOps:
         assert trp[0] in gr3
         assert trp[4] in gr3
 
-        assert gr3.uri == 'http://example.edu/imr01'
+        assert gr3.uri == URIRef('http://example.edu/imr01')
 
 
     def test_ip_union(self, trp):
@@ -435,7 +435,7 @@ class TestImrOps:
         assert trp[0] in gr1
         assert trp[4] in gr1
 
-        assert gr1.uri == 'http://example.edu/imr01'
+        assert gr1.uri == URIRef('http://example.edu/imr01')
 
 
     def test_addition(self, trp):
@@ -454,7 +454,7 @@ class TestImrOps:
         assert trp[0] in gr3
         assert trp[4] in gr3
 
-        assert gr3.uri == 'http://example.edu/imr01'
+        assert gr3.uri == URIRef('http://example.edu/imr01')
 
 
     def test_ip_addition(self, trp):
@@ -473,7 +473,7 @@ class TestImrOps:
         assert trp[0] in gr1
         assert trp[4] in gr1
 
-        assert gr1.uri == 'http://example.edu/imr01'
+        assert gr1.uri == URIRef('http://example.edu/imr01')
 
 
     def test_subtraction(self, trp):
@@ -495,7 +495,7 @@ class TestImrOps:
         assert trp[3] not in gr3
         assert trp[4] not in gr3
 
-        assert gr3.uri == 'http://example.edu/imr01'
+        assert gr3.uri == URIRef('http://example.edu/imr01')
 
         gr3 = gr2 - gr1
 
@@ -507,7 +507,7 @@ class TestImrOps:
         assert trp[4] in gr3
         assert trp[5] in gr3
 
-        assert gr3.uri == 'http://example.edu/imr02'
+        assert gr3.uri == URIRef('http://example.edu/imr02')
 
 
     def test_ip_subtraction(self, trp):
@@ -529,7 +529,7 @@ class TestImrOps:
         assert trp[3] not in gr1
         assert trp[4] not in gr1
 
-        assert gr1.uri == 'http://example.edu/imr01'
+        assert gr1.uri == URIRef('http://example.edu/imr01')
 
 
 
@@ -551,7 +551,7 @@ class TestImrOps:
         assert trp[0] not in gr3
         assert trp[5] not in gr3
 
-        assert gr3.uri == 'http://example.edu/imr01'
+        assert gr3.uri == URIRef('http://example.edu/imr01')
 
 
     def test_ip_intersect(self, trp):
@@ -572,7 +572,7 @@ class TestImrOps:
         assert trp[0] not in gr1
         assert trp[5] not in gr1
 
-        assert gr1.uri == 'http://example.edu/imr01'
+        assert gr1.uri == URIRef('http://example.edu/imr01')
 
 
     def test_xor(self, trp):
@@ -593,7 +593,7 @@ class TestImrOps:
         assert trp[0] in gr3
         assert trp[5] in gr3
 
-        assert gr3.uri == 'http://example.edu/imr01'
+        assert gr3.uri == URIRef('http://example.edu/imr01')
 
 
     def test_ip_xor(self, trp):
@@ -614,7 +614,7 @@ class TestImrOps:
         assert trp[0] in gr1
         assert trp[5] in gr1
 
-        assert gr1.uri == 'http://example.edu/imr01'
+        assert gr1.uri == URIRef('http://example.edu/imr01')
 
 
 @pytest.mark.usefixtures('trp')
@@ -641,7 +641,7 @@ class TestHybridOps:
         assert trp[4] in gr3
 
         assert isinstance(gr3, Imr)
-        assert gr3.uri == 'http://example.edu/imr01'
+        assert gr3.uri == URIRef('http://example.edu/imr01')
 
         gr4 = gr2 | gr1
 
@@ -666,7 +666,7 @@ class TestHybridOps:
         assert trp[0] in gr1
         assert trp[4] in gr1
 
-        assert gr1.uri == 'http://example.edu/imr01'
+        assert gr1.uri == URIRef('http://example.edu/imr01')
 
 
     def test_ip_union_gr(self, trp):

+ 5 - 4
tests/1_store/test_lmdb_store.py

@@ -8,6 +8,7 @@ from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
 from rdflib.namespace import RDF, RDFS
 
 from lakesuperior.store.ldp_rs.lmdb_store import LmdbStore
+from lakesuperior.model.graph.graph import Imr
 
 
 @pytest.fixture(scope='class')
@@ -747,11 +748,11 @@ class TestContext:
             res_no_ctx = store.triples(trp3)
             res_ctx = store.triples(trp3, gr2_uri)
             for res in res_no_ctx:
-                assert Graph(identifier=gr_uri) in res[1]
-                assert Graph(identifier=gr2_uri) in res[1]
+                assert Imr(uri=gr_uri) in res[1]
+                assert Imr(uri=gr2_uri) in res[1]
             for res in res_ctx:
-                assert Graph(identifier=gr_uri) in res[1]
-                assert Graph(identifier=gr2_uri) in res[1]
+                assert Imr(uri=gr_uri) in res[1]
+                assert Imr(uri=gr2_uri) in res[1]
 
 
     def test_delete_from_ctx(self, store):