Parcourir la source

WIP Reorganize modules into model package.
[ci skip]

Stefano Cossu il y a 6 ans
Parent
commit
bf5900e13e

+ 4 - 0
lakesuperior/model/base.pxd

@@ -0,0 +1,4 @@
+from lakesuperior.cy_include cimport cytpl as tpl
+
+ctypedef tpl.tpl_bin Buffer
+

+ 0 - 0
lakesuperior/model/base.pyx


+ 0 - 0
lakesuperior/model/graph/__init__.pxd


+ 0 - 0
lakesuperior/model/graph/__init__.py


+ 10 - 4
lakesuperior/store/ldp_rs/graph.pxd → lakesuperior/model/graph/graph.pxd

@@ -1,14 +1,18 @@
+from cymem.cymem cimport Pool
+
 from lakesuperior.cy_include cimport calg
-from lakesuperior.store.ldp_rs.keyset cimport Keyset
-from lakesuperior.store.ldp_rs.lmdb_triplestore cimport TripleKey
-from lakesuperior.store.ldp_rs.term cimport Buffer
-from lakesuperior.store.ldp_rs.triple cimport BufferTriple
+from lakesuperior.model.base cimport Buffer
+from lakesuperior.model.graph.triple cimport BufferTriple
+from lakesuperior.model.structures.keyset cimport Keyset
 from lakesuperior.store.ldp_rs.lmdb_triplestore cimport LmdbTriplestore
+from lakesuperior.store.ldp_rs.lmdb_triplestore cimport TripleKey
 
 # Lookup function that returns whether a triple contains a match pattern.
 ctypedef bint (*lookup_fn_t)(
         const BufferTriple *trp, const Buffer *t1, const Buffer *t2)
 
+ctypedef Buffer SPOBuffer[3]
+
 cdef:
     unsigned int term_hash_fn(const calg.SetValue data)
     bint buffer_cmp_fn(const calg.SetValue v1, const calg.SetValue v2)
@@ -21,6 +25,8 @@ cdef class SimpleGraph:
         calg.Set *_triples # Set of unique triples.
         calg.Set *_terms # Set of unique serialized terms.
         LmdbTriplestore store
+        # Temp data pool. It gets managed with the object lifecycle via cymem.
+        Pool _pool
 
         void _data_from_lookup(self, tuple trp_ptn, ctx=*) except *
         void _data_from_keyset(self, Keyset data) except *

+ 810 - 63
lakesuperior/model/graph/graph.pyx

@@ -1,79 +1,826 @@
-from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
+import logging
 
-from lakesuperior.util.hash cimport HLEN_32, Hash32, hash32
+from functools import wraps
 
-ctypedef void *SetItem
-ctypedef struct Index:
-    size_t *addr
-    size_t ct
+from rdflib import Graph
+from rdflib.term import Node
 
-cdef class VarSet:
+from lakesuperior import env
+
+from libc.string cimport memcmp, memcpy
+from libc.stdlib cimport free
+
+from cymem.cymem cimport Pool
+
+from lakesuperior.cy_include cimport calg
+from lakesuperior.cy_include cimport cylmdb as lmdb
+from lakesuperior.model.graph cimport term
+from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
+        KLEN, DBL_KLEN, TRP_KLEN, TripleKey)
+from lakesuperior.model.structures.keyset cimport Keyset
+from lakesuperior.model.base cimport Buffer
+from lakesuperior.model.graph.triple cimport BufferTriple
+from lakesuperior.model.structures.hash cimport Hash32, hash32
+
+#BUF_PTR_SZ = sizeof(Buffer *)
+
+logger = logging.getLogger(__name__)
+
+
+def use_data(fn):
+    """
+    Decorator to indicate that a set operation between two SimpleGraph
+    instances should use the ``data`` property of the second term. The second
+    term can also be a simple set.
+    """
+    @wraps(fn)
+    def _wrapper(self, other):
+        if isinstance(other, SimpleGraph):
+            other = other.data
+    return _wrapper
+
+
+cdef unsigned int term_hash_fn(const calg.SetValue data):
+    """
+    Hash function for sets of terms.
+
+    https://fragglet.github.io/c-algorithms/doc/set_8h.html#6c7986a2a80d7a3cb7b9d74e1c6fef97
+
+    :param SetValue *data: Pointer to a Buffer structure.
+    """
+    cdef:
+        Hash32 hash
+
+    hash32(<const Buffer *>&data, &hash)
+
+    return hash
+
+
+cdef unsigned int trp_hash_fn(calg.SetValue btrp):
+    """
+    Hash function for sets of (serialized) triples.
+
+    https://fragglet.github.io/c-algorithms/doc/set_8h.html#6c7986a2a80d7a3cb7b9d74e1c6fef97
+
+    This function computes the hash of the concatenated pointer values in the
+    s, p, o members of the triple. The triple structure is treated as a byte
+    string. This is safe in spite of byte-wise struct evaluation being a
+    frowned-upon practice (due to padding issues), because it is assumed that
+    the input value is always the same type of structure.
+
+    :param SetItem *data: Pointer to a BufferTriple structure.
+    """
+    cdef:
+        Buffer data
+        Hash32 hash
+
+    data.addr = &btrp
+    data.sz = sizeof(btrp)
+    hash32(&data, &hash)
+
+    return hash
+
+
+cdef bint buffer_cmp_fn(const calg.SetValue v1, const calg.SetValue v2):
+    """
+    Compare function for two Buffer objects.
+
+    https://fragglet.github.io/c-algorithms/doc/set_8h.html#40fa2c86d5b003c1b0b0e8dd1e4df9f4
+    """
+    # No-cast option.
+    #if v1[0].sz != v2[0].sz:
+    #    return False
+    #return memcmp(v1[0].addr, v2[0].addr, v1[0].sz) == 0
+    cdef:
+        Buffer b1 = (<Buffer *>v1)[0]
+        Buffer b2 = (<Buffer *>v2)[0]
+
+    if b1.sz != b2.sz:
+        return False
+
+    return memcmp(b1.addr, b2.addr, b1.sz) == 0
+
+
+cdef bint triple_cmp_fn(const calg.SetValue v1, const calg.SetValue v2):
     """
-    Variable-size set of variable-size values.
+    Compare function for two triples in a CAlg set.
+
+    Here, pointers to terms are compared for s, p, o. The pointers should be
+    guaranteed to point to unique values (i.e. no two pointers have the same
+    term value within a graph).
+
+    https://fragglet.github.io/c-algorithms/doc/set_8h.html#40fa2c86d5b003c1b0b0e8dd1e4df9f4
     """
     cdef:
-        # Data blob. Stored contibuously in memory, and found by index.
-        void *_data
-        # Total size of data.
-        size_t _data_sz
-        # Index used to find start and end of each item.
-        Index _index
-        # KeySet of hashes of the set items.
-        Keyset _hashes
+        BufferTriple t1 = (<BufferTriple *>v1)[0]
+        BufferTriple t2 = (<BufferTriple *>v2)[0]
+
+    return(
+            t1.s == t2.s and
+            t1.p == t2.p and
+            t1.o == t2.o)
+
+
+cdef inline bint lookup_none_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+    return True
+
+
+cdef inline bint lookup_s_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+    """
+    Lookup callback compare function for a given s in a triple.
+
+    The function returns ``True`` if ``t1`` matches the first term.
+
+    ``t2`` is not used and is declared only for compatibility with the
+    other interchangeable functions.
+    """
+    return buffer_cmp_fn(t1, trp[0].s)
+
 
-    def __cinit__(self):
-        self._data = PyMem_Malloc(0)
-        self._hashes = Keyset(0, sizeof(Hash32))
-        self._data_sz = 0
+cdef inline bint lookup_p_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+    return buffer_cmp_fn(t1, trp[0].p)
+
+
+cdef inline bint lookup_o_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+    return buffer_cmp_fn(t1, trp[0].o)
+
+
+cdef inline bint lookup_sp_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+    return (
+            buffer_cmp_fn(t1, trp[0].s)
+            and buffer_cmp_fn(t2, trp[0].p))
+
+
+cdef inline bint lookup_so_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+    return (
+            buffer_cmp_fn(t1, trp[0].s)
+            and buffer_cmp_fn(t2, trp[0].o))
+
+
+cdef inline bint lookup_po_cmp_fn(
+        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
+    return (
+            buffer_cmp_fn(t1, trp[0].p)
+            and buffer_cmp_fn(t2, trp[0].o))
+
+
+
+cdef class SimpleGraph:
+    """
+    Fast and simple implementation of a graph.
+
+    Most functions should mimic RDFLib's graph with less overhead. It uses
+    the same funny but functional slicing notation. No lookup functions within
+    the graph are available at this time.
+
+    Instances of this class hold a set of
+    :py:class:`~lakesuperior.store.ldp_rs.term.Term` structures that stores
+    unique terms within the graph, and a set of
+    :py:class:`~lakesuperior.store.ldp_rs.triple.Triple` structures referencing
+    those terms. Therefore, no data duplication occurs and the storage is quite
+    sparse.
+
+    A graph can be instantiated from a store lookup.
+
+    A SimpleGraph can also be obtained from a
+    :py:class:`lakesuperior.store.keyset.Keyset` which is convenient bacause
+    a Keyset can be obtained very efficiently from querying a store, then also
+    very efficiently filtered and eventually converted into a set of meaningful
+    terms.
+
+    An instance of this class can also be converted to and from a
+    ``rdflib.Graph`` instance. TODO verify that this frees Cython pointers.
+    """
+
+    def __cinit__(
+            self, Keyset keyset=None, store=None, set data=set()):
+        """
+        Initialize the graph with pre-existing data or by looking up a store.
+
+        One of ``keyset``, or ``data`` can be provided. If more than
+        one of these is provided, precedence is given in the mentioned order.
+        If none of them is specified, an empty graph is initialized.
+
+        :param rdflib.URIRef uri: The graph URI.
+            This will serve as the subject for some queries.
+        :param Keyset keyset: Keyset to create the graph from. Keys will be
+            converted to set elements.
+        :param lakesuperior.store.ldp_rs.LmdbTripleStore store: store to
+            look up the keyset. Only used if ``keyset`` is specified. If not
+            set, the environment store is used.
+        :param set data: Initial data as a set of 3-tuples of RDFLib terms.
+        :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
+            E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
+            Any and all elements may be ``None``.
+        :param lmdbStore store: the store to look data up.
+        """
+        self.store = store or env.app_globals.rdf_store
+        self._terms = calg.set_new(term_hash_fn, buffer_cmp_fn)
+        self._triples = calg.set_new(trp_hash_fn, triple_cmp_fn)
+        self._pool = Pool()
+
+        cdef:
+            size_t i = 0
+            TripleKey spok
+            term.Buffer pk_t
+
+        # Initialize empty data set.
+        if keyset:
+            # Populate with triples extracted from provided key set.
+            self._data_from_keyset(keyset)
+        elif data is not None:
+            # Populate with provided Python set.
+            for s, p, o in data:
+                self._add_from_rdflib(s, p, o)
 
 
     def __dealloc__(self):
-        PyMem_Free(self._data)
+        """
+        Free the triple pointers. TODO use a Cymem pool
+        """
+        free(self._triples)
+        free(self._terms)
+
+
+    @property
+    def data(self):
+        """
+        Triple data as a Python set.
+
+        :rtype: set
+        """
+        return self._data_as_set()
 
 
-    cdef int add(self, const SetItem data, Index *idx) except -1:
+    cdef void _data_from_lookup(self, tuple trp_ptn, ctx=None) except *:
         """
-        Add a number of items.
+        Look up triples in the triplestore and load them into ``data``.
 
-        The items' content as a blob and their end boundaries must be given
-        as an array of ``size_t``.
-        """"
-        #cdef size_t grow_sz = idx.addr[idx.ct - 1]
-        # Last index indicates the position of the last byte
+        :param tuple lookup: 3-tuple of RDFlib terms or ``None``.
+        :param LmdbTriplestore store: Reference to a LMDB triplestore. This
+            is normally set to ``lakesuperior.env.app_globals.rdf_store``.
+        """
+        cdef:
+            size_t i
+            unsigned char spok[TRP_KLEN]
+
+        with self.store.txn_ctx():
+            keyset = self.store.triple_keys(trp_ptn, ctx)
+            self.data_from_keyset(keyset)
+
+
+
+    cdef void _data_from_keyset(self, Keyset data) except *:
+        """Populate a graph from a Keyset."""
+        cdef TripleKey spok
+
+        while data.next(spok):
+            self._add_from_spok(spok)
+
+
+    cdef inline void _add_from_spok(self, const TripleKey spok) except *:
+        """
+        Add a triple from a TripleKey of term keys.
+        """
         cdef:
-            size_t i, cur = 0, data_exp_sz, hash_exp_sz
-            void *_tmp_data
-            Hash32 hash
-            Buffer msg
-            SetItem *item
-
-        # Resize data sets to maximium possible size for this function call.
-        _tmp_data = PyMem_Realloc(self._data, idx.addr[idx.ct - 1])
-        if not _tmp_data:
-            raise MemoryError('Unable to allocate memory for set data.')
-        self._hashes.resize(self._hashes.ct + idx.ct)
-
-        for i in idx.ct:
-            # Iterate over the items in the index and verify if they can be
-            # added if they are not duplicates.
-            msg.addr = data + cur
-            msg.sz = idx[i] - cur
-            hash32(&msg, &hash)
-
-            if not self.hashes.contains(hash):
-                # Add to the data.
-                memcpy(_tmp_data + i * HLEN_32, msg.addr, msg.sz)
-                # Add to the hashes keyset.
-                memcpy(self._hashes + self._data_sz, hash, HLEN32)
-                # Record the memory expansion.
-                self._data_sz += msg.sz
-
-            cur = idx[i]
-
-        # Shrink data back to their actual size.
-        self.hashes.resize(cur)
-        _tmp_data = PyMem_Malloc(self._data_sz)
-        if not _tmp_data :
-            raise MemoryError('Unable to allocate memory for set data.')
-        self._data = _tmp_data
+            SPOBuffer s_spo
+            BufferTriple trp
+
+        s_spo = <SPOBuffer>self._pool.alloc(3, sizeof(Buffer))
+
+        self.store.lookup_term(spok, s_spo)
+        self.store.lookup_term(spok + KLEN, s_spo + 1)
+        self.store.lookup_term(spok + DBL_KLEN, s_spo + 2)
+
+        self._add_triple(s_spo, s_spo + 1, s_spo + 2)
+
+
+    cdef inline void _add_triple(
+        self, const Buffer *ss, const Buffer *sp, const Buffer *so
+    ) except *:
+        """
+        Add a triple from 3 (TPL) serialized terms.
+
+        Each of the terms is added to the term set if not existing. The triple
+        also is only added if not existing.
+        """
+        trp = <BufferTriple *>self._pool.alloc(1, sizeof(BufferTriple))
+
+        print('Adding terms.')
+        print('ss: ')
+        print((<unsigned char *>ss[0].addr)[:ss[0].sz])
+        calg.set_insert(self._terms, ss)
+        print('sp: ')
+        print((<unsigned char *>sp[0].addr)[:sp[0].sz])
+        calg.set_insert(self._terms, sp)
+        print('so: ')
+        print((<unsigned char *>so[0].addr)[:so[0].sz])
+        calg.set_insert(self._terms, so)
+        print('Added terms.')
+
+        cdef calg.SetIterator ti
+        cdef Buffer *t
+        calg.set_iterate(self._terms, &ti)
+        while calg.set_iter_has_more(&ti):
+            t = <Buffer *>calg.set_iter_next(&ti)
+            print('term @{}: '.format(<size_t>t.addr))
+            print((<unsigned char *>t.addr)[:t.sz])
+
+        trp.s = ss
+        trp.p = sp
+        trp.o = so
+
+        print('Adding triple.')
+        calg.set_insert(self._triples, trp)
+        print('Added triple.')
+
+        cdef BufferTriple *tt
+        calg.set_iterate(self._triples, &ti)
+        while calg.set_iter_has_more(&ti):
+            tt = <BufferTriple *>calg.set_iter_next(&ti)
+            print('Triple pointer address: {}'.format(<unsigned long>tt))
+
+            print('Triple s address: {}'.format(<unsigned long>tt[0].s))
+            print('triple s: ')
+            print((<unsigned char *>tt[0].s.addr)[:tt[0].s.sz])
+
+            print('Triple p address: {}'.format(<unsigned long>tt[0].p))
+            print('triple p: ')
+            print((<unsigned char *>tt[0].p.addr)[:tt[0].o.sz])
+
+            print('Triple o address: {}'.format(<unsigned long>tt[0].o))
+            print('triple o: ')
+            print((<unsigned char *>tt[0].o.addr)[:tt[0].o.sz])
+
+
+    cdef set _data_as_set(self):
+        """
+        Convert triple data to a Python set.
+
+        :rtype: set
+        """
+        cdef:
+            calg.SetIterator ti
+            BufferTriple *trp
+            term.Term s, p, o
+
+        graph_set = set()
+
+        print('Initialize iterator.')
+        calg.set_iterate(self._triples, &ti)
+        print('start loop.')
+        while calg.set_iter_has_more(&ti):
+            print('Set up triple.')
+            trp = <BufferTriple *>calg.set_iter_next(&ti)
+            if trp == NULL:
+                print('Triple is NULL!')
+                return graph_set
+            print('Triple pointer address: {}'.format(<unsigned long>trp))
+
+            print('Triple s address: {}'.format(<unsigned long>trp[0].s))
+            print(f'Triple s size: {trp[0].s.sz}')
+            print('Triple s:')
+            print((<unsigned char *>trp[0].p.addr)[:trp[0].p.sz])
+            print('Triple p address: {}'.format(<unsigned long>trp[0].p))
+            print(f'Triple p size: {trp[0].p.sz}')
+            print('Triple p:')
+            print((<unsigned char *>trp[0].p.addr)[:trp[0].p.sz])
+            print('Triple o address: {}'.format(<unsigned long>trp[0].o))
+            print(f'Triple o size: {trp[0].o.sz}')
+            print('Triple o:')
+            print((<unsigned char *>trp[0].o.addr)[:trp[0].o.sz])
+
+            print('Add triple.')
+            graph_set.add((
+                term.deserialize_to_rdflib(trp.s),
+                term.deserialize_to_rdflib(trp.p),
+                term.deserialize_to_rdflib(trp.o),
+            ))
+
+        return graph_set
+
+
+    # Basic set operations.
+
+    def add(self, triple):
+        """ Add one triple to the graph. """
+        cdef SPOBuffer s_spo
+
+        s_spo = <SPOBuffer>self._pool.alloc(3, sizeof(Buffer))
+        s, p, o = triple
+
+        #print('Serializing s.')
+        term.serialize_from_rdflib(s, s_spo)
+        #print('Serializing p.')
+        term.serialize_from_rdflib(p, s_spo + 1)
+        #print('Serializing o.')
+        term.serialize_from_rdflib(o, s_spo + 2)
+
+        print('Adding triple from rdflib.')
+        self._add_triple(s_spo, s_spo + 1, s_spo + 2)
+        print('Added triple from rdflib.')
+
+
+    def remove(self, item):
+        """
+        Remove one item from the graph.
+
+        :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
+            wildcards are not accepted.
+        """
+        self.data.remove(item)
+
+    def __len__(self):
+        """ Number of triples in the graph. """
+        return len(self.data)
+
+    @use_data
+    def __eq__(self, other):
+        """ Equality operator between ``SimpleGraph`` instances. """
+        return self.data == other
+
+    def __repr__(self):
+        """
+        String representation of the graph.
+
+        It provides the number of triples in the graph and memory address of
+            the instance.
+        """
+        return (f'<{self.__class__.__name__} @{hex(id(self))} '
+            f'length={len(self.data)}>')
+
+    def __str__(self):
+        """ String dump of the graph triples. """
+        return str(self.data)
+
+    @use_data
+    def __sub__(self, other):
+        """ Set subtraction. """
+        return self.data - other
+
+    @use_data
+    def __isub__(self, other):
+        """ In-place set subtraction. """
+        self.data -= other
+        return self
+
+    @use_data
+    def __and__(self, other):
+        """ Set intersection. """
+        return self.data & other
+
+    @use_data
+    def __iand__(self, other):
+        """ In-place set intersection. """
+        self.data &= other
+        return self
+
+    @use_data
+    def __or__(self, other):
+        """ Set union. """
+        return self.data | other
+
+    @use_data
+    def __ior__(self, other):
+        """ In-place set union. """
+        self.data |= other
+        return self
+
+    @use_data
+    def __xor__(self, other):
+        """ Set exclusive intersection (XOR). """
+        return self.data ^ other
+
+    @use_data
+    def __ixor__(self, other):
+        """ In-place set exclusive intersection (XOR). """
+        self.data ^= other
+        return self
+
+    def __contains__(self, item):
+        """
+        Whether the graph contains a triple.
+
+        :rtype: boolean
+        """
+        return item in self.data
+
+    def __iter__(self):
+        """ Graph iterator. It iterates over the set triples. """
+        return self.data.__iter__()
+
+
+    # Slicing.
+
+    def __getitem__(self, item):
+        """
+        Slicing function.
+
+        It behaves similarly to `RDFLib graph slicing
+        <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
+        """
+        if isinstance(item, slice):
+            s, p, o = item.start, item.stop, item.step
+            return self._slice(s, p, o)
+        else:
+            raise TypeError(f'Wrong slice format: {item}.')
+
+
+    cpdef void set(self, tuple trp) except *:
+        """
+        Set a single value for subject and predicate.
+
+        Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
+        """
+        if None in trp:
+            raise ValueError(f'Invalid triple: {trp}')
+        self.remove_triples((trp[0], trp[1], None))
+        self.add(trp)
+
+
+    cpdef void remove_triples(self, pattern) except *:
+        """
+        Remove triples by pattern.
+
+        The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
+        """
+        s, p, o = pattern
+        for match in self.lookup(s, p, o):
+            logger.debug(f'Removing from graph: {match}.')
+            self.data.remove(match)
+
+
+    cpdef object as_rdflib(self):
+        """
+        Return the data set as an RDFLib Graph.
+
+        :rtype: rdflib.Graph
+        """
+        gr = Graph()
+        for trp in self.data:
+            gr.add(trp)
+
+        return gr
+
+
+    def _slice(self, s, p, o):
+        """
+        Return terms filtered by other terms.
+
+        This behaves like the rdflib.Graph slicing policy.
+        """
+        _data = self.data
+
+        logger.debug(f'Slicing graph by: {s}, {p}, {o}.')
+        if s is None and p is None and o is None:
+            return _data
+        elif s is None and p is None:
+            return {(r[0], r[1]) for r in _data if r[2] == o}
+        elif s is None and o is None:
+            return {(r[0], r[2]) for r in _data if r[1] == p}
+        elif p is None and o is None:
+            return {(r[1], r[2]) for r in _data if r[0] == s}
+        elif s is None:
+            return {r[0] for r in _data if r[1] == p and r[2] == o}
+        elif p is None:
+            return {r[1] for r in _data if r[0] == s and r[2] == o}
+        elif o is None:
+            return {r[2] for r in _data if r[0] == s and r[1] == p}
+        else:
+            # all given
+            return (s,p,o) in _data
+
+
+    def lookup(self, s, p, o):
+        """
+        Look up triples by a pattern.
+
+        This function converts RDFLib terms into the serialized format stored
+        in the graph's internal structure and compares them bytewise.
+
+        Any and all of the lookup terms can be ``None``.
+        """
+        cdef:
+            BufferTriple trp
+            BufferTriple *trp_p
+            calg.SetIterator ti
+            const Buffer t1
+            const Buffer t2
+            lookup_fn_t fn
+
+        res = set()
+
+        # Decide comparison logic outside the loop.
+        if s is not None and p is not None and o is not None:
+            # Return immediately if 3-term match is requested.
+            term.serialize_from_rdflib(s, trp.s)
+            term.serialize_from_rdflib(p, trp.p)
+            term.serialize_from_rdflib(o, trp.o)
+
+            if calg.set_query(self._triples, &trp):
+                res.add((s, p, o))
+
+            return res
+
+        elif s is not None:
+            term.serialize_from_rdflib(s, &t1)
+            if p is not None:
+                fn = lookup_sp_cmp_fn
+                term.serialize_from_rdflib(p, &t2)
+            elif o is not None:
+                fn = lookup_so_cmp_fn
+                term.serialize_from_rdflib(o, &t2)
+            else:
+                fn = lookup_s_cmp_fn
+        elif p is not None:
+            term.serialize_from_rdflib(p, &t1)
+            if o is not None:
+                fn = lookup_po_cmp_fn
+                term.serialize_from_rdflib(o, &t2)
+            else:
+                fn = lookup_p_cmp_fn
+        elif o is not None:
+            fn = lookup_o_cmp_fn
+            term.serialize_from_rdflib(o, &t1)
+        else:
+            fn = lookup_none_cmp_fn
+
+        # Iterate over serialized triples.
+        calg.set_iterate(self._triples, &ti)
+        while calg.set_iter_has_more(&ti):
+            trp_p = <BufferTriple *>calg.set_iter_next(&ti)
+            if fn(trp_p, &t1, &t2):
+                res.add((
+                    term.deserialize_to_rdflib(trp_p[0].s),
+                    term.deserialize_to_rdflib(trp_p[0].p),
+                    term.deserialize_to_rdflib(trp_p[0].o),
+                ))
+
+        return res
+
+
+    cpdef set terms(self, str type):
+        """
+        Get all terms of a type: subject, predicate or object.
+
+        :param str type: One of ``s``, ``p`` or ``o``.
+        """
+        i = 'spo'.index(type)
+        return {r[i] for r in self.data}
+
+
+
+cdef class Imr(SimpleGraph):
+    """
+    In-memory resource data container.
+
+    This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
+    the data set and some convenience methods.
+
+    An instance of this class can be converted to a ``rdflib.Resource``
+    instance.
+
+    Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
+    will create a new ``Imr`` instance with the same subject URI.
+    """
+    def __init__(self, str uri, *args, **kwargs):
+        """
+        Initialize the graph with pre-existing data or by looking up a store.
+
+        Either ``data``, or ``lookup`` *and* ``store``, can be provide.
+        ``lookup`` and ``store`` have precedence. If none of them is specified,
+        an empty graph is initialized.
+
+        :param rdflib.URIRef uri: The graph URI.
+            This will serve as the subject for some queries.
+        :param set data: Initial data as a set of 3-tuples of RDFLib terms.
+        :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
+            E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
+            Any and all elements may be ``None``.
+        :param lmdbStore store: the store to look data up.
+        """
+        super().__init__(*args, **kwargs)
+
+        self.uri = uri
+
+
+    @property
+    def identifier(self):
+        """
+        IMR URI. For compatibility with RDFLib Resource.
+
+        :rtype: string
+        """
+        return self.uri
+
+
+    @property
+    def graph(self):
+        """
+        Return a SimpleGraph with the same data.
+
+        :rtype: SimpleGraph
+        """
+        return SimpleGraph(self.data)
+
+
+    def __repr__(self):
+        """
+        String representation of an Imr.
+
+        This includes the subject URI, number of triples contained and the
+        memory address of the instance.
+        """
+        return (f'<{self.__class__.__name__} @{hex(id(self))} uri={self.uri}, '
+            f'length={len(self.data)}>')
+
+    @use_data
+    def __sub__(self, other):
+        """
+        Set difference. This creates a new Imr with the same subject URI.
+        """
+        return self.__class__(uri=self.uri, data=self.data - other)
+
+    @use_data
+    def __and__(self, other):
+        """
+        Set intersection. This creates a new Imr with the same subject URI.
+        """
+        return self.__class__(uri=self.uri, data=self.data & other)
+
+    @use_data
+    def __or__(self, other):
+        """
+        Set union. This creates a new Imr with the same subject URI.
+        """
+        return self.__class__(uri=self.uri, data=self.data | other)
+
+    @use_data
+    def __xor__(self, other):
+        """
+        Set exclusive OR (XOR). This creates a new Imr with the same subject
+        URI.
+        """
+        return self.__class__(uri=self.uri, data=self.data ^ other)
+
+
+    def __getitem__(self, item):
+        """
+        Supports slicing notation.
+        """
+        if isinstance(item, slice):
+            s, p, o = item.start, item.stop, item.step
+            return self._slice(s, p, o)
+
+        elif isinstance(item, Node):
+            # If a Node is given, return all values for that predicate.
+            return {
+                    r[2] for r in self.data
+                    if r[0] == self.uri and r[1] == item}
+        else:
+            raise TypeError(f'Wrong slice format: {item}.')
+
+
+    def value(self, p, strict=False):
+        """
+        Get an individual value.
+
+        :param rdflib.termNode p: Predicate to search for.
+        :param bool strict: If set to ``True`` the method raises an error if
+            more than one value is found. If ``False`` (the default) only
+            the first found result is returned.
+        :rtype: rdflib.term.Node
+        """
+        values = self[p]
+
+        if strict and len(values) > 1:
+            raise RuntimeError('More than one value found for {}, {}.'.format(
+                    self.uri, p))
+
+        for ret in values:
+            return ret
+
+        return None
+
+
+    cpdef as_rdflib(self):
+        """
+        Return the IMR as a RDFLib Resource.
+
+        :rtype: rdflib.Resource
+        """
+        gr = Graph()
+        for trp in self.data:
+            gr.add(trp)
+
+        return gr.resource(identifier=self.uri)
+
+
+
 

+ 1 - 2
lakesuperior/store/ldp_rs/term.pxd → lakesuperior/model/graph/term.pxd

@@ -1,6 +1,5 @@
-from lakesuperior.cy_include cimport cytpl as tpl
+from lakesuperior.model.base cimport Buffer
 
-ctypedef tpl.tpl_bin Buffer
 ctypedef struct Term:
     char type
     char *data

+ 1 - 0
lakesuperior/store/ldp_rs/term.pyx → lakesuperior/model/graph/term.pyx

@@ -5,6 +5,7 @@ from libc.stdint cimport uint64_t
 from libc.stdlib cimport free
 
 from lakesuperior.cy_include cimport cytpl as tpl
+from lakesuperior.model.base cimport Buffer
 
 
 DEF LSUP_TERM_TYPE_URIREF = 1

+ 3 - 2
lakesuperior/store/ldp_rs/triple.pxd → lakesuperior/model/graph/triple.pxd

@@ -1,5 +1,6 @@
-from lakesuperior.cy_include cimport cytpl as tpl
-from lakesuperior.store.ldp_rs.term cimport Buffer, Term
+#from lakesuperior.cy_include cimport cytpl as tpl
+from lakesuperior.model.base cimport Buffer
+from lakesuperior.model.graph.term cimport Term
 
 # Triple of Term structs.
 ctypedef struct Triple:

+ 0 - 0
lakesuperior/store/ldp_rs/triple.pyx → lakesuperior/model/graph/triple.pyx


+ 0 - 0
lakesuperior/model/ldp_factory.py → lakesuperior/model/ldp/ldp_factory.py


+ 0 - 0
lakesuperior/model/ldp_nr.py → lakesuperior/model/ldp/ldp_nr.py


+ 0 - 0
lakesuperior/model/ldp_rs.py → lakesuperior/model/ldp/ldp_rs.py


+ 0 - 0
lakesuperior/model/ldpr.py → lakesuperior/model/ldp/ldpr.py


+ 0 - 0
lakesuperior/model/structures/__init__.pxd


+ 0 - 0
lakesuperior/model/structures/__init__.py


+ 1 - 1
lakesuperior/util/hash.pxd → lakesuperior/model/structures/hash.pxd

@@ -1,6 +1,6 @@
 from libc.stdint cimport uint32_t, uint64_t
 
-from lakesuperior.store.ldp_rs.term cimport Buffer
+from lakesuperior.model.base cimport Buffer
 
 
 # Seed for computing the term hash.

+ 1 - 1
lakesuperior/util/hash.pyx → lakesuperior/model/structures/hash.pyx

@@ -1,7 +1,7 @@
 from libc.stdint cimport uint64_t
 from libc.string cimport memcpy
 
-from lakesuperior.store.ldp_rs.term cimport Buffer
+from lakesuperior.model.base cimport Buffer
 
 
 memcpy(&term_hash_seed32, TERM_HASH_SEED, HLEN_32)

+ 1 - 0
lakesuperior/store/ldp_rs/keyset.pxd → lakesuperior/model/structures/keyset.pxd

@@ -8,4 +8,5 @@ cdef class Keyset:
         void resize(self, size_t ct) except *
         unsigned char *get_item(self, i)
         bint next(self, void *val)
+        bint contains(self, const void *val)
 

+ 4 - 4
lakesuperior/store/ldp_rs/keyset.pyx → lakesuperior/model/structures/keyset.pyx

@@ -163,11 +163,11 @@ cdef class Keyset:
         """
         Whether a value exists in the set.
         """
-        cdef void *cval
+        cdef void *stored_val
 
         self.reset()
-        while next(val):
-            if memcmp(val, cval, self.itemsize) == 0:
+        while self.next(stored_val):
+            if memcmp(val, stored_val, self.itemsize) == 0:
                 return True
-        retuern False
+        return False
 

+ 0 - 800
lakesuperior/store/ldp_rs/graph.pyx

@@ -1,800 +0,0 @@
-import logging
-
-from functools import wraps
-
-from rdflib import Graph
-from rdflib.term import Node
-
-from lakesuperior import env
-
-from libc.string cimport memcmp, memcpy
-from libc.stdlib cimport free
-
-from lakesuperior.cy_include cimport calg
-from lakesuperior.cy_include cimport cylmdb as lmdb
-from lakesuperior.store.ldp_rs cimport term
-from lakesuperior.store.ldp_rs.lmdb_triplestore cimport (
-        KLEN, DBL_KLEN, TRP_KLEN, TripleKey, LmdbTriplestore)
-from lakesuperior.store.ldp_rs.keyset cimport Keyset
-from lakesuperior.store.ldp_rs.triple cimport BufferTriple
-from lakesuperior.util.hash cimport Hash32, hash32
-
-#BUF_PTR_SZ = sizeof(Buffer *)
-
-logger = logging.getLogger(__name__)
-
-
-def use_data(fn):
-    """
-    Decorator to indicate that a set operation between two SimpleGraph
-    instances should use the ``data`` property of the second term. The second
-    term can also be a simple set.
-    """
-    @wraps(fn)
-    def _wrapper(self, other):
-        if isinstance(other, SimpleGraph):
-            other = other.data
-    return _wrapper
-
-
-cdef unsigned int term_hash_fn(const calg.SetValue data):
-    """
-    Hash function for sets of terms.
-
-    https://fragglet.github.io/c-algorithms/doc/set_8h.html#6c7986a2a80d7a3cb7b9d74e1c6fef97
-
-    :param SetValue *data: Pointer to a Buffer structure.
-    """
-    cdef:
-        Hash32 hash
-
-    hash32(<const Buffer *>&data, &hash)
-
-    return hash
-
-
-cdef unsigned int trp_hash_fn(calg.SetValue btrp):
-    """
-    Hash function for sets of (serialized) triples.
-
-    https://fragglet.github.io/c-algorithms/doc/set_8h.html#6c7986a2a80d7a3cb7b9d74e1c6fef97
-
-    This function computes the hash of the concatenated pointer values in the
-    s, p, o members of the triple. The triple structure is treated as a byte
-    string. This is safe in spite of byte-wise struct evaluation being a
-    frowned-upon practice (due to padding issues), because it is assumed that
-    the input value is always the same type of structure.
-
-    :param SetItem *data: Pointer to a BufferTriple structure.
-    """
-    cdef:
-        Buffer data
-        Hash32 hash
-
-    data.addr = &btrp
-    data.sz = sizeof(btrp)
-    hash32(&data, &hash)
-
-    return hash
-
-
-cdef bint buffer_cmp_fn(const calg.SetValue v1, const calg.SetValue v2):
-    """
-    Compare function for two Buffer objects.
-
-    https://fragglet.github.io/c-algorithms/doc/set_8h.html#40fa2c86d5b003c1b0b0e8dd1e4df9f4
-    """
-    # No-cast option.
-    #if v1[0].sz != v2[0].sz:
-    #    return False
-    #return memcmp(v1[0].addr, v2[0].addr, v1[0].sz) == 0
-    cdef:
-        Buffer b1 = (<Buffer *>v1)[0]
-        Buffer b2 = (<Buffer *>v2)[0]
-
-    if b1.sz != b2.sz:
-        return False
-
-    return memcmp(b1.addr, b2.addr, b1.sz) == 0
-
-
-cdef bint triple_cmp_fn(const calg.SetValue v1, const calg.SetValue v2):
-    """
-    Compare function for two triples in a CAlg set.
-
-    Here, pointers to terms are compared for s, p, o. The pointers should be
-    guaranteed to point to unique values (i.e. no two pointers have the same
-    term value within a graph).
-
-    https://fragglet.github.io/c-algorithms/doc/set_8h.html#40fa2c86d5b003c1b0b0e8dd1e4df9f4
-    """
-    cdef:
-        BufferTriple t1 = (<BufferTriple *>v1)[0]
-        BufferTriple t2 = (<BufferTriple *>v2)[0]
-
-    return(
-            t1.s == t2.s and
-            t1.p == t2.p and
-            t1.o == t2.o)
-
-
-cdef inline bint lookup_none_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
-    return True
-
-
-cdef inline bint lookup_s_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
-    """
-    Lookup callback compare function for a given s in a triple.
-
-    The function returns ``True`` if ``t1`` matches the first term.
-
-    ``t2`` is not used and is declared only for compatibility with the
-    other interchangeable functions.
-    """
-    return buffer_cmp_fn(t1, trp[0].s)
-
-
-cdef inline bint lookup_p_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
-    return buffer_cmp_fn(t1, trp[0].p)
-
-
-cdef inline bint lookup_o_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
-    return buffer_cmp_fn(t1, trp[0].o)
-
-
-cdef inline bint lookup_sp_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
-    return (
-            buffer_cmp_fn(t1, trp[0].s)
-            and buffer_cmp_fn(t2, trp[0].p))
-
-
-cdef inline bint lookup_so_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
-    return (
-            buffer_cmp_fn(t1, trp[0].s)
-            and buffer_cmp_fn(t2, trp[0].o))
-
-
-cdef inline bint lookup_po_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2):
-    return (
-            buffer_cmp_fn(t1, trp[0].p)
-            and buffer_cmp_fn(t2, trp[0].o))
-
-
-
-cdef class SimpleGraph:
-    """
-    Fast and simple implementation of a graph.
-
-    Most functions should mimic RDFLib's graph with less overhead. It uses
-    the same funny but functional slicing notation. No lookup functions within
-    the graph are available at this time.
-
-    Instances of this class hold a set of
-    :py:class:`~lakesuperior.store.ldp_rs.term.Term` structures that stores
-    unique terms within the graph, and a set of
-    :py:class:`~lakesuperior.store.ldp_rs.triple.Triple` structures referencing
-    those terms. Therefore, no data duplication occurs and the storage is quite
-    sparse.
-
-    A graph can be instantiated from a store lookup.
-
-    A SimpleGraph can also be obtained from a
-    :py:class:`lakesuperior.store.keyset.Keyset` which is convenient bacause
-    a Keyset can be obtained very efficiently from querying a store, then also
-    very efficiently filtered and eventually converted into a set of meaningful
-    terms.
-
-    An instance of this class can also be converted to and from a
-    ``rdflib.Graph`` instance. TODO verify that this frees Cython pointers.
-    """
-
-    def __cinit__(
-            self, Keyset keyset=None, store=None, set data=set()):
-        """
-        Initialize the graph with pre-existing data or by looking up a store.
-
-        One of ``keyset``, or ``data`` can be provided. If more than
-        one of these is provided, precedence is given in the mentioned order.
-        If none of them is specified, an empty graph is initialized.
-
-        :param rdflib.URIRef uri: The graph URI.
-            This will serve as the subject for some queries.
-        :param Keyset keyset: Keyset to create the graph from. Keys will be
-            converted to set elements.
-        :param lakesuperior.store.ldp_rs.LmdbTripleStore store: store to
-            look up the keyset. Only used if ``keyset`` is specified. If not
-            set, the environment store is used.
-        :param set data: Initial data as a set of 3-tuples of RDFLib terms.
-        :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
-            E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
-            Any and all elements may be ``None``.
-        :param lmdbStore store: the store to look data up.
-        """
-        self.store = store or env.app_globals.rdf_store
-        self._terms = calg.set_new(term_hash_fn, buffer_cmp_fn)
-        self._triples = calg.set_new(trp_hash_fn, triple_cmp_fn)
-
-        cdef:
-            size_t i = 0
-            TripleKey spok
-            term.Buffer pk_t
-
-        # Initialize empty data set.
-        if keyset:
-            # Populate with triples extracted from provided key set.
-            self._data_from_keyset(keyset)
-        elif data is not None:
-            # Populate with provided Python set.
-            for s, p, o in data:
-                self._add_from_rdflib(s, p, o)
-
-
-    def __dealloc__(self):
-        """
-        Free the triple pointers. TODO use a Cymem pool
-        """
-        free(self._triples)
-        free(self._terms)
-
-
-    @property
-    def data(self):
-        """
-        Triple data as a Python set.
-
-        :rtype: set
-        """
-        return self._data_as_set()
-
-
-    cdef void _data_from_lookup(self, tuple trp_ptn, ctx=None) except *:
-        """
-        Look up triples in the triplestore and load them into ``data``.
-
-        :param tuple lookup: 3-tuple of RDFlib terms or ``None``.
-        :param LmdbTriplestore store: Reference to a LMDB triplestore. This
-            is normally set to ``lakesuperior.env.app_globals.rdf_store``.
-        """
-        cdef:
-            size_t i
-            unsigned char spok[TRP_KLEN]
-
-        with self.store.txn_ctx():
-            keyset = self.store.triple_keys(trp_ptn, ctx)
-            self.data_from_keyset(keyset)
-
-
-
-    cdef void _data_from_keyset(self, Keyset data) except *:
-        """Populate a graph from a Keyset."""
-        cdef TripleKey spok
-
-        while data.next(spok):
-            self._add_from_spok(spok)
-
-
-    cdef inline void _add_from_spok(self, const TripleKey spok) except *:
-        """
-        Add a triple from a TripleKey of term keys.
-        """
-        cdef:
-            Buffer *ss = NULL
-            Buffer *sp = NULL
-            Buffer *so = NULL
-            BufferTriple trp
-
-        self.store.lookup_term(spok, ss)
-        self.store.lookup_term(spok + KLEN, sp)
-        self.store.lookup_term(spok + DBL_KLEN, so)
-
-        self._add_triple(ss, sp, so, &trp)
-
-
-    cdef inline void _add_triple(
-        self, const Buffer *ss, const Buffer *sp, const Buffer *so,
-        BufferTriple *trp
-    ) except *:
-        """
-        Add a triple from 3 (TPL) serialized terms.
-
-        Each of the terms is added to the term set if not existing. The triple
-        also is only added if not existing.
-        """
-        print('Adding terms.')
-        print('ss: ')
-        print((<unsigned char *>ss[0].addr)[:ss[0].sz])
-        calg.set_insert(self._terms, ss)
-        print('sp: ')
-        print((<unsigned char *>sp[0].addr)[:sp[0].sz])
-        calg.set_insert(self._terms, sp)
-        print('so: ')
-        print((<unsigned char *>so[0].addr)[:so[0].sz])
-        calg.set_insert(self._terms, so)
-        print('Added terms.')
-
-        cdef calg.SetIterator ti
-        cdef Buffer *t
-        calg.set_iterate(self._terms, &ti)
-        while calg.set_iter_has_more(&ti):
-            t = <Buffer *>calg.set_iter_next(&ti)
-            print('term: ')
-            print((<unsigned char *>t.addr)[:t.sz])
-
-        trp[0].s = ss
-        trp[0].p = sp
-        trp[0].o = so
-
-        print('Adding triple.')
-        calg.set_insert(self._triples, trp)
-        print('Added triple.')
-
-        cdef BufferTriple *tt
-        calg.set_iterate(self._triples, &ti)
-        while calg.set_iter_has_more(&ti):
-            tt = <BufferTriple *>calg.set_iter_next(&ti)
-            print('Triple pointer address: {}'.format(<unsigned long>tt))
-            print('Triple s address: {}'.format(<unsigned long>tt[0].s))
-            print('triple s: ')
-            print((<unsigned char *>trp[0].s.addr)[:trp[0].s.sz])
-            print((<unsigned char *>tt[0].s.addr)[:tt[0].s.sz])
-
-
-    cdef set _data_as_set(self):
-        """
-        Convert triple data to a Python set.
-
-        :rtype: set
-        """
-        cdef:
-            calg.SetIterator ti
-            BufferTriple *trp
-            term.Term s, p, o
-
-        graph_set = set()
-
-        print('Initialize iterator.')
-        calg.set_iterate(self._triples, &ti)
-        print('start loop.')
-        while calg.set_iter_has_more(&ti):
-            print('Set up triple.')
-            trp = <BufferTriple *>calg.set_iter_next(&ti)
-            if trp == NULL:
-                print('Triple is NULL!')
-                return graph_set
-            print('Triple pointer address: {}'.format(<unsigned long>trp))
-            print('Triple s address: {}'.format(<unsigned long>trp[0].s))
-            print('Triple s:')
-            print((<unsigned char *>trp[0].s.addr)[trp[0].s.sz])
-
-            print('Add triple.')
-            graph_set.add((
-                    term.deserialize_to_rdflib(trp[0].s),
-                    term.deserialize_to_rdflib(trp[0].p),
-                    term.deserialize_to_rdflib(trp[0].o)))
-
-        return graph_set
-
-
-    # Basic set operations.
-
-    def add(self, triple):
-        """ Add one triple to the graph. """
-        cdef Buffer ss, sp, so
-
-        s, p, o = triple
-        #print('Serializing s.')
-        term.serialize_from_rdflib(s, &ss)
-        #print('Serializing p.')
-        term.serialize_from_rdflib(p, &sp)
-        #print('Serializing o.')
-        term.serialize_from_rdflib(o, &so)
-
-        print('Adding triple from rdflib.')
-        self._add_triple(&ss, &sp, &so)
-        print('Added triple from rdflib.')
-
-
-    def remove(self, item):
-        """
-        Remove one item from the graph.
-
-        :param tuple item: A 3-tuple of RDFlib terms. Only exact terms, i.e.
-            wildcards are not accepted.
-        """
-        self.data.remove(item)
-
-    def __len__(self):
-        """ Number of triples in the graph. """
-        return len(self.data)
-
-    @use_data
-    def __eq__(self, other):
-        """ Equality operator between ``SimpleGraph`` instances. """
-        return self.data == other
-
-    def __repr__(self):
-        """
-        String representation of the graph.
-
-        It provides the number of triples in the graph and memory address of
-            the instance.
-        """
-        return (f'<{self.__class__.__name__} @{hex(id(self))} '
-            f'length={len(self.data)}>')
-
-    def __str__(self):
-        """ String dump of the graph triples. """
-        return str(self.data)
-
-    @use_data
-    def __sub__(self, other):
-        """ Set subtraction. """
-        return self.data - other
-
-    @use_data
-    def __isub__(self, other):
-        """ In-place set subtraction. """
-        self.data -= other
-        return self
-
-    @use_data
-    def __and__(self, other):
-        """ Set intersection. """
-        return self.data & other
-
-    @use_data
-    def __iand__(self, other):
-        """ In-place set intersection. """
-        self.data &= other
-        return self
-
-    @use_data
-    def __or__(self, other):
-        """ Set union. """
-        return self.data | other
-
-    @use_data
-    def __ior__(self, other):
-        """ In-place set union. """
-        self.data |= other
-        return self
-
-    @use_data
-    def __xor__(self, other):
-        """ Set exclusive intersection (XOR). """
-        return self.data ^ other
-
-    @use_data
-    def __ixor__(self, other):
-        """ In-place set exclusive intersection (XOR). """
-        self.data ^= other
-        return self
-
-    def __contains__(self, item):
-        """
-        Whether the graph contains a triple.
-
-        :rtype: boolean
-        """
-        return item in self.data
-
-    def __iter__(self):
-        """ Graph iterator. It iterates over the set triples. """
-        return self.data.__iter__()
-
-
-    # Slicing.
-
-    def __getitem__(self, item):
-        """
-        Slicing function.
-
-        It behaves similarly to `RDFLib graph slicing
-        <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
-        """
-        if isinstance(item, slice):
-            s, p, o = item.start, item.stop, item.step
-            return self._slice(s, p, o)
-        else:
-            raise TypeError(f'Wrong slice format: {item}.')
-
-
-    cpdef void set(self, tuple trp) except *:
-        """
-        Set a single value for subject and predicate.
-
-        Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
-        """
-        if None in trp:
-            raise ValueError(f'Invalid triple: {trp}')
-        self.remove_triples((trp[0], trp[1], None))
-        self.add(trp)
-
-
-    cpdef void remove_triples(self, pattern) except *:
-        """
-        Remove triples by pattern.
-
-        The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
-        """
-        s, p, o = pattern
-        for match in self.lookup(s, p, o):
-            logger.debug(f'Removing from graph: {match}.')
-            self.data.remove(match)
-
-
-    cpdef object as_rdflib(self):
-        """
-        Return the data set as an RDFLib Graph.
-
-        :rtype: rdflib.Graph
-        """
-        gr = Graph()
-        for trp in self.data:
-            gr.add(trp)
-
-        return gr
-
-
-    def _slice(self, s, p, o):
-        """
-        Return terms filtered by other terms.
-
-        This behaves like the rdflib.Graph slicing policy.
-        """
-        _data = self.data
-
-        logger.debug(f'Slicing graph by: {s}, {p}, {o}.')
-        if s is None and p is None and o is None:
-            return _data
-        elif s is None and p is None:
-            return {(r[0], r[1]) for r in _data if r[2] == o}
-        elif s is None and o is None:
-            return {(r[0], r[2]) for r in _data if r[1] == p}
-        elif p is None and o is None:
-            return {(r[1], r[2]) for r in _data if r[0] == s}
-        elif s is None:
-            return {r[0] for r in _data if r[1] == p and r[2] == o}
-        elif p is None:
-            return {r[1] for r in _data if r[0] == s and r[2] == o}
-        elif o is None:
-            return {r[2] for r in _data if r[0] == s and r[1] == p}
-        else:
-            # all given
-            return (s,p,o) in _data
-
-
-    def lookup(self, s, p, o):
-        """
-        Look up triples by a pattern.
-
-        This function converts RDFLib terms into the serialized format stored
-        in the graph's internal structure and compares them bytewise.
-
-        Any and all of the lookup terms can be ``None``.
-        """
-        cdef:
-            BufferTriple trp
-            BufferTriple *trp_p
-            calg.SetIterator ti
-            const Buffer t1
-            const Buffer t2
-            lookup_fn_t fn
-
-        res = set()
-
-        # Decide comparison logic outside the loop.
-        if s is not None and p is not None and o is not None:
-            # Return immediately if 3-term match is requested.
-            term.serialize_from_rdflib(s, trp.s)
-            term.serialize_from_rdflib(p, trp.p)
-            term.serialize_from_rdflib(o, trp.o)
-
-            if calg.set_query(self._triples, &trp):
-                res.add((s, p, o))
-
-            return res
-
-        elif s is not None:
-            term.serialize_from_rdflib(s, &t1)
-            if p is not None:
-                fn = lookup_sp_cmp_fn
-                term.serialize_from_rdflib(p, &t2)
-            elif o is not None:
-                fn = lookup_so_cmp_fn
-                term.serialize_from_rdflib(o, &t2)
-            else:
-                fn = lookup_s_cmp_fn
-        elif p is not None:
-            term.serialize_from_rdflib(p, &t1)
-            if o is not None:
-                fn = lookup_po_cmp_fn
-                term.serialize_from_rdflib(o, &t2)
-            else:
-                fn = lookup_p_cmp_fn
-        elif o is not None:
-            fn = lookup_o_cmp_fn
-            term.serialize_from_rdflib(o, &t1)
-        else:
-            fn = lookup_none_cmp_fn
-
-        # Iterate over serialized triples.
-        calg.set_iterate(self._triples, &ti)
-        while calg.set_iter_has_more(&ti):
-            trp_p = <BufferTriple *>calg.set_iter_next(&ti)
-            if fn(trp_p, &t1, &t2):
-                res.add((
-                    term.deserialize_to_rdflib(trp_p[0].s),
-                    term.deserialize_to_rdflib(trp_p[0].p),
-                    term.deserialize_to_rdflib(trp_p[0].o),
-                ))
-
-        return res
-
-
-    cpdef set terms(self, str type):
-        """
-        Get all terms of a type: subject, predicate or object.
-
-        :param str type: One of ``s``, ``p`` or ``o``.
-        """
-        i = 'spo'.index(type)
-        return {r[i] for r in self.data}
-
-
-
-cdef class Imr(SimpleGraph):
-    """
-    In-memory resource data container.
-
-    This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
-    the data set and some convenience methods.
-
-    An instance of this class can be converted to a ``rdflib.Resource``
-    instance.
-
-    Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
-    will create a new ``Imr`` instance with the same subject URI.
-    """
-    def __init__(self, str uri, *args, **kwargs):
-        """
-        Initialize the graph with pre-existing data or by looking up a store.
-
-        Either ``data``, or ``lookup`` *and* ``store``, can be provide.
-        ``lookup`` and ``store`` have precedence. If none of them is specified,
-        an empty graph is initialized.
-
-        :param rdflib.URIRef uri: The graph URI.
-            This will serve as the subject for some queries.
-        :param set data: Initial data as a set of 3-tuples of RDFLib terms.
-        :param tuple lookup: tuple of a 3-tuple of lookup terms, and a context.
-            E.g. ``((URIRef('urn:ns:a'), None, None), URIRef('urn:ns:ctx'))``.
-            Any and all elements may be ``None``.
-        :param lmdbStore store: the store to look data up.
-        """
-        super().__init__(*args, **kwargs)
-
-        self.uri = uri
-
-
-    @property
-    def identifier(self):
-        """
-        IMR URI. For compatibility with RDFLib Resource.
-
-        :rtype: string
-        """
-        return self.uri
-
-
-    @property
-    def graph(self):
-        """
-        Return a SimpleGraph with the same data.
-
-        :rtype: SimpleGraph
-        """
-        return SimpleGraph(self.data)
-
-
-    def __repr__(self):
-        """
-        String representation of an Imr.
-
-        This includes the subject URI, number of triples contained and the
-        memory address of the instance.
-        """
-        return (f'<{self.__class__.__name__} @{hex(id(self))} uri={self.uri}, '
-            f'length={len(self.data)}>')
-
-    @use_data
-    def __sub__(self, other):
-        """
-        Set difference. This creates a new Imr with the same subject URI.
-        """
-        return self.__class__(uri=self.uri, data=self.data - other)
-
-    @use_data
-    def __and__(self, other):
-        """
-        Set intersection. This creates a new Imr with the same subject URI.
-        """
-        return self.__class__(uri=self.uri, data=self.data & other)
-
-    @use_data
-    def __or__(self, other):
-        """
-        Set union. This creates a new Imr with the same subject URI.
-        """
-        return self.__class__(uri=self.uri, data=self.data | other)
-
-    @use_data
-    def __xor__(self, other):
-        """
-        Set exclusive OR (XOR). This creates a new Imr with the same subject
-        URI.
-        """
-        return self.__class__(uri=self.uri, data=self.data ^ other)
-
-
-    def __getitem__(self, item):
-        """
-        Supports slicing notation.
-        """
-        if isinstance(item, slice):
-            s, p, o = item.start, item.stop, item.step
-            return self._slice(s, p, o)
-
-        elif isinstance(item, Node):
-            # If a Node is given, return all values for that predicate.
-            return {
-                    r[2] for r in self.data
-                    if r[0] == self.uri and r[1] == item}
-        else:
-            raise TypeError(f'Wrong slice format: {item}.')
-
-
-    def value(self, p, strict=False):
-        """
-        Get an individual value.
-
-        :param rdflib.termNode p: Predicate to search for.
-        :param bool strict: If set to ``True`` the method raises an error if
-            more than one value is found. If ``False`` (the default) only
-            the first found result is returned.
-        :rtype: rdflib.term.Node
-        """
-        values = self[p]
-
-        if strict and len(values) > 1:
-            raise RuntimeError('More than one value found for {}, {}.'.format(
-                    self.uri, p))
-
-        for ret in values:
-            return ret
-
-        return None
-
-
-    cpdef as_rdflib(self):
-        """
-        Return the IMR as a RDFLib Resource.
-
-        :rtype: rdflib.Resource
-        """
-        gr = Graph()
-        for trp in self.data:
-            gr.add(trp)
-
-        return gr.resource(identifier=self.uri)
-
-
-
-

+ 2 - 2
lakesuperior/store/ldp_rs/lmdb_triplestore.pxd

@@ -1,9 +1,9 @@
 cimport lakesuperior.cy_include.cylmdb as lmdb
 cimport lakesuperior.cy_include.cytpl as tpl
 
+from lakesuperior.model.base cimport Buffer
+from lakesuperior.model.structures.keyset cimport Keyset
 from lakesuperior.store.base_lmdb_store cimport BaseLmdbStore
-from lakesuperior.store.ldp_rs.keyset cimport Keyset
-from lakesuperior.store.ldp_rs.term cimport Buffer
 
 #Fixed length for term keys.
 #

+ 7 - 5
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx

@@ -12,14 +12,16 @@ from libc.stdlib cimport free
 from libc.string cimport memcpy
 
 cimport lakesuperior.cy_include.cylmdb as lmdb
-from lakesuperior.store.ldp_rs.term cimport Term
+from lakesuperior.model.graph.term cimport Term
 
 from lakesuperior.store.base_lmdb_store cimport (
         BaseLmdbStore, data_v, dbi, key_v)
-from lakesuperior.store.ldp_rs.keyset cimport Keyset
-from lakesuperior.store.ldp_rs.term cimport (
-        Buffer, deserialize_to_rdflib, serialize_from_rdflib)
-from lakesuperior.util.hash cimport HLEN_128 as HLEN, Hash128, hash128
+from lakesuperior.model.base cimport Buffer
+from lakesuperior.model.graph.term cimport (
+        deserialize_to_rdflib, serialize_from_rdflib)
+from lakesuperior.model.structures.keyset cimport Keyset
+from lakesuperior.model.structures.hash cimport (
+        HLEN_128 as HLEN, Hash128, hash128)
 
 
 FIRST_KEY = <bytes>KEY_START * KLEN

+ 1 - 1
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -24,7 +24,7 @@ from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
 from lakesuperior.globals import ROOT_RSRC_URI
 from lakesuperior.exceptions import (InvalidResourceError,
         ResourceNotExistsError, TombstoneError, PathSegmentError)
-from lakesuperior.store.ldp_rs.graph import SimpleGraph, Imr
+from lakesuperior.model.graph.graph import SimpleGraph, Imr
 
 
 META_GR_URI = nsc['fcsystem']['meta']

+ 43 - 63
setup.py

@@ -68,41 +68,47 @@ else:
 
 extensions = [
     Extension(
-        'lakesuperior.store.base_lmdb_store',
+        'lakesuperior.model.base',
         [
-            path.join(lmdb_src_dir, 'mdb.c'),
-            path.join(lmdb_src_dir, 'midl.c'),
-            path.join('lakesuperior', 'store', f'base_lmdb_store.{ext}'),
+            path.join(tpl_src_dir, 'tpl.c'),
+            path.join('lakesuperior', 'model', f'base.{ext}'),
         ],
         include_dirs=include_dirs,
+        #extra_compile_args=['-fopenmp'],
+        #extra_link_args=['-fopenmp']
     ),
     Extension(
-        'lakesuperior.store.ldp_rs.term',
+        'lakesuperior.model.structures.*',
         [
-            path.join(tpl_src_dir, 'tpl.c'),
-            path.join('lakesuperior', 'store', 'ldp_rs', f'term.{ext}'),
+            path.join(calg_src_dir, 'set.c'),
+            path.join(spookyhash_src_dir, 'spookyhash.c'),
+            path.join('lakesuperior', 'model', 'structures', f'*.{ext}'),
         ],
         include_dirs=include_dirs,
-        extra_compile_args=['-fopenmp'],
-        extra_link_args=['-fopenmp']
+        #extra_compile_args=['-fopenmp'],
+        #extra_link_args=['-fopenmp']
     ),
     Extension(
-        'lakesuperior.store.ldp_rs.triple',
+        'lakesuperior.model.graph.*',
         [
-            path.join('lakesuperior', 'store', 'ldp_rs', f'triple.{ext}'),
+            path.join(calg_src_dir, 'set.c'),
+            path.join(tpl_src_dir, 'tpl.c'),
+            #path.join(spookyhash_src_dir, 'spookyhash.c'),
+            path.join('lakesuperior', 'model', 'graph', f'*.{ext}'),
         ],
         include_dirs=include_dirs,
         extra_compile_args=['-fopenmp'],
         extra_link_args=['-fopenmp']
     ),
     Extension(
-        'lakesuperior.store.ldp_rs.keyset',
+        'lakesuperior.store.base_lmdb_store',
         [
-            path.join('lakesuperior', 'store', 'ldp_rs', f'keyset.{ext}'),
+            path.join(tpl_src_dir, 'tpl.c'),
+            path.join(lmdb_src_dir, 'mdb.c'),
+            path.join(lmdb_src_dir, 'midl.c'),
+            path.join('lakesuperior', 'store', f'base_lmdb_store.{ext}'),
         ],
         include_dirs=include_dirs,
-        #extra_compile_args=['-fopenmp'],
-        #extra_link_args=['-fopenmp']
     ),
     Extension(
         'lakesuperior.store.ldp_rs.lmdb_triplestore',
@@ -116,34 +122,27 @@ extensions = [
         extra_compile_args=['-fopenmp'],
         extra_link_args=['-fopenmp']
     ),
-    Extension(
-        'lakesuperior.util.hash',
-        [
-            path.join(spookyhash_src_dir, 'spookyhash.c'),
-            path.join('lakesuperior', 'util', f'hash.{ext}'),
-        ],
-        include_dirs=include_dirs,
-    ),
-    Extension(
-        'lakesuperior.store.ldp_rs.graph',
-        [
-            path.join(calg_src_dir, 'set.c'),
-            path.join('lakesuperior', 'store', 'ldp_rs', f'graph.{ext}'),
-        ],
-        include_dirs=include_dirs,
-        extra_compile_args=['-fopenmp'],
-        extra_link_args=['-fopenmp']
-    ),
-    # For testing.
-    #Extension(
-    #    '*',
-    #    [
-    #        #path.join(tpl_src_dir, 'tpl.c'),
-    #        path.join(
-    #            path.dirname(lakesuperior.basedir), 'sandbox', f'*.{ext}'),
-    #    ],
-    #    include_dirs=include_dirs,
-    #),
+]
+
+# Great reference read about dependency management:
+# https://caremad.io/posts/2013/07/setup-vs-requirement/
+install_requires = [
+    'CoilMQ',
+    'Flask',
+    'HiYaPyCo',
+    'PyYAML',
+    'arrow',
+    'click',
+    'click-log',
+    'cymem',
+    'gevent',
+    'gunicorn',
+    'rdflib',
+    'rdflib-jsonld',
+    'requests',
+    'requests-toolbelt',
+    'sphinx-rtd-theme',
+    'stomp.py',
 ]
 
 if USE_CYTHON:
@@ -209,26 +208,7 @@ setup(
 
     packages=find_packages(exclude=['contrib', 'docs', 'tests']),
 
-    # Great reference read about dependency management:
-    # https://caremad.io/posts/2013/07/setup-vs-requirement/
-    install_requires=[
-        'CoilMQ',
-        'Flask',
-        'HiYaPyCo',
-        'PyYAML',
-        'arrow',
-        'cchardet',
-        'click',
-        'click-log',
-        'gevent',
-        'gunicorn',
-        'rdflib',
-        'rdflib-jsonld',
-        'requests',
-        'requests-toolbelt',
-        'sphinx-rtd-theme',
-        'stomp.py',
-    ],
+    install_requires=install_requires,
 
     setup_requires=[
         'setuptools>=18.0',