浏览代码

Merge pull request #85 from scossu/keyset_graph

Keyset graph
Stefano Cossu 6 年之前
父节点
当前提交
db7e356af4
共有 43 个文件被更改,包括 2473 次插入2774 次删除
  1. 8 7
      lakesuperior/api/admin.py
  2. 31 32
      lakesuperior/api/resource.py
  3. 57 44
      lakesuperior/endpoints/ldp.py
  4. 0 0
      lakesuperior/model/__init__.pxd
  5. 10 0
      lakesuperior/model/base.pxd
  6. 30 0
      lakesuperior/model/callbacks.pxd
  7. 54 0
      lakesuperior/model/callbacks.pyx
  8. 0 44
      lakesuperior/model/graph/callbacks.pxd
  9. 0 250
      lakesuperior/model/graph/callbacks.pyx
  10. 0 63
      lakesuperior/model/graph/graph.pxd
  11. 0 922
      lakesuperior/model/graph/graph.pyx
  12. 8 8
      lakesuperior/model/ldp/ldp_factory.py
  13. 16 14
      lakesuperior/model/ldp/ldpr.py
  14. 0 0
      lakesuperior/model/rdf/__init__.pxd
  15. 0 0
      lakesuperior/model/rdf/__init__.py
  16. 37 0
      lakesuperior/model/rdf/graph.pxd
  17. 613 0
      lakesuperior/model/rdf/graph.pyx
  18. 0 0
      lakesuperior/model/rdf/term.pxd
  19. 8 6
      lakesuperior/model/rdf/term.pyx
  20. 1 1
      lakesuperior/model/rdf/triple.pxd
  21. 0 0
      lakesuperior/model/rdf/triple.pyx
  22. 0 21
      lakesuperior/model/structures/callbacks.pxd
  23. 0 33
      lakesuperior/model/structures/callbacks.pyx
  24. 21 10
      lakesuperior/model/structures/keyset.pxd
  25. 199 56
      lakesuperior/model/structures/keyset.pyx
  26. 50 5
      lakesuperior/store/base_lmdb_store.pyx
  27. 0 53
      lakesuperior/store/ldp_rs/lmdb_store.py
  28. 11 21
      lakesuperior/store/ldp_rs/lmdb_triplestore.pxd
  29. 150 315
      lakesuperior/store/ldp_rs/lmdb_triplestore.pyx
  30. 26 26
      lakesuperior/store/ldp_rs/rsrc_centric_layout.py
  31. 7 1
      lakesuperior/util/benchmark.py
  32. 15 0
      sandbox/NOTES
  33. 10 0
      sandbox/txn_openLogic.txt
  34. 31 17
      setup.py
  35. 850 0
      tests/0_data_structures/test_0_0_graph.py
  36. 0 688
      tests/0_data_structures/test_graph.py
  37. 78 23
      tests/1_store/test_1_0_lmdb_store.py
  38. 137 107
      tests/2_api/test_2_0_resource_api.py
  39. 11 6
      tests/2_api/test_2_1_admin_api.py
  40. 0 0
      tests/3_endpoints/test_3_0_ldp.py
  41. 4 1
      tests/3_endpoints/test_3_1_admin.py
  42. 0 0
      tests/3_endpoints/test_3_2_query.py
  43. 0 0
      tests/4_ancillary/test_4_0_toolbox.py

+ 8 - 7
lakesuperior/api/admin.py

@@ -80,15 +80,16 @@ def fixity_check(uid):
     from lakesuperior.model.ldp.ldp_factory import LDP_NR_TYPE
 
     rsrc = rsrc_api.get(uid)
-    if LDP_NR_TYPE not in rsrc.ldp_types:
-        raise IncompatibleLdpTypeError()
+    with env.app_globals.rdf_store.txn_ctx():
+        if LDP_NR_TYPE not in rsrc.ldp_types:
+            raise IncompatibleLdpTypeError()
 
-    ref_digest_term = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
-    ref_digest_parts = ref_digest_term.split(':')
-    ref_cksum = ref_digest_parts[-1]
-    ref_cksum_algo = ref_digest_parts[-2]
+        ref_digest_term = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
+        ref_digest_parts = ref_digest_term.split(':')
+        ref_cksum = ref_digest_parts[-1]
+        ref_cksum_algo = ref_digest_parts[-2]
 
-    calc_cksum = hashlib.new(ref_cksum_algo, rsrc.content.read()).hexdigest()
+        calc_cksum = hashlib.new(ref_cksum_algo, rsrc.content.read()).hexdigest()
 
     if calc_cksum != ref_cksum:
         raise ChecksumValidationError(uid, ref_cksum, calc_cksum)

+ 31 - 32
lakesuperior/api/resource.py

@@ -7,7 +7,7 @@ from threading import Lock, Thread
 
 import arrow
 
-from rdflib import Graph, Literal, URIRef
+from rdflib import Literal
 from rdflib.namespace import XSD
 
 from lakesuperior.config_parser import config
@@ -16,7 +16,6 @@ from lakesuperior.exceptions import (
 from lakesuperior import env, thread_env
 from lakesuperior.globals import RES_DELETED, RES_UPDATED
 from lakesuperior.model.ldp.ldp_factory import LDP_NR_TYPE, LdpFactory
-from lakesuperior.model.graph.graph import SimpleGraph
 
 
 logger = logging.getLogger(__name__)
@@ -24,36 +23,36 @@ logger = logging.getLogger(__name__)
 __doc__ = """
 Primary API for resource manipulation.
 
-Quickstart:
-
->>> # First import default configuration and globals—only done once.
->>> import lakesuperior.default_env
->>> from lakesuperior.api import resource
->>> # Get root resource.
->>> rsrc = resource.get('/')
->>> # Dump graph.
->>> set(rsrc.imr)
-{(rdflib.term.URIRef('info:fcres/'),
-  rdflib.term.URIRef('http://purl.org/dc/terms/title'),
-  rdflib.term.Literal('Repository Root')),
- (rdflib.term.URIRef('info:fcres/'),
-  rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-  rdflib.term.URIRef('http://fedora.info/definitions/v4/repository#Container')),
- (rdflib.term.URIRef('info:fcres/'),
-  rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-  rdflib.term.URIRef('http://fedora.info/definitions/v4/repository#RepositoryRoot')),
- (rdflib.term.URIRef('info:fcres/'),
-  rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-  rdflib.term.URIRef('http://fedora.info/definitions/v4/repository#Resource')),
- (rdflib.term.URIRef('info:fcres/'),
-  rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-  rdflib.term.URIRef('http://www.w3.org/ns/ldp#BasicContainer')),
- (rdflib.term.URIRef('info:fcres/'),
-  rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-  rdflib.term.URIRef('http://www.w3.org/ns/ldp#Container')),
- (rdflib.term.URIRef('info:fcres/'),
-  rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
-  rdflib.term.URIRef('http://www.w3.org/ns/ldp#RDFSource'))}
+Quickstart::
+
+    >>> # First import default configuration and globals—only done once.
+    >>> import lakesuperior.default_env
+    >>> from lakesuperior.api import resource
+    >>> # Get root resource.
+    >>> rsrc = resource.get('/')
+    >>> # Dump graph.
+    >>> set(rsrc.imr)
+    {(rdflib.term.URIRef('info:fcres/'),
+      rdflib.term.URIRef('http://purl.org/dc/terms/title'),
+      rdflib.term.Literal('Repository Root')),
+     (rdflib.term.URIRef('info:fcres/'),
+      rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+      rdflib.term.URIRef('http://fedora.info/definitions/v4/repository#Container')),
+     (rdflib.term.URIRef('info:fcres/'),
+      rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+      rdflib.term.URIRef('http://fedora.info/definitions/v4/repository#RepositoryRoot')),
+     (rdflib.term.URIRef('info:fcres/'),
+      rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+      rdflib.term.URIRef('http://fedora.info/definitions/v4/repository#Resource')),
+     (rdflib.term.URIRef('info:fcres/'),
+      rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+      rdflib.term.URIRef('http://www.w3.org/ns/ldp#BasicContainer')),
+     (rdflib.term.URIRef('info:fcres/'),
+      rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+      rdflib.term.URIRef('http://www.w3.org/ns/ldp#Container')),
+     (rdflib.term.URIRef('info:fcres/'),
+      rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+      rdflib.term.URIRef('http://www.w3.org/ns/ldp#RDFSource'))}
 """
 
 def transaction(write=False):

+ 57 - 44
lakesuperior/endpoints/ldp.py

@@ -44,6 +44,8 @@ rdf_parsable_mimetypes = {
 }
 """MIMEtypes that can be parsed into RDF."""
 
+store = env.app_globals.rdf_store
+
 rdf_serializable_mimetypes = {
     #mt.name for mt in plugin.plugins()
     #if mt.kind is serializer.Serializer and '/' in mt.name
@@ -166,39 +168,42 @@ def get_resource(uid, out_fmt=None):
 
     rsrc = rsrc_api.get(uid, repr_options)
 
-    if out_fmt is None:
-        rdf_mimetype = _best_rdf_mimetype()
-        out_fmt = (
-                'rdf'
-                if isinstance(rsrc, LdpRs) or rdf_mimetype is not None
-                else 'non_rdf')
-    out_headers.update(_headers_from_metadata(rsrc, out_fmt))
-    uri = g.tbox.uid_to_uri(uid)
-
-    # RDF output.
-    if out_fmt == 'rdf':
-        if locals().get('rdf_mimetype', None) is None:
-            rdf_mimetype = DEFAULT_RDF_MIMETYPE
-        ggr = g.tbox.globalize_imr(rsrc.out_graph)
-        ggr.namespace_manager = nsm
-        return _negotiate_content(
-                ggr, rdf_mimetype, out_headers, uid=uid, uri=uri)
-
-    # Datastream.
-    else:
-        if not getattr(rsrc, 'local_path', False):
-            return ('{} has no binary content.'.format(rsrc.uid), 404)
-
-        logger.debug('Streaming out binary content.')
-        if request.range and request.range.units == 'bytes':
-            # Stream partial response.
-            # This is only true if the header is well-formed. Thanks, Werkzeug.
-            rsp = _parse_range_header(request.range.ranges, rsrc, out_headers)
+    with store.txn_ctx():
+        if out_fmt is None:
+            rdf_mimetype = _best_rdf_mimetype()
+            out_fmt = (
+                    'rdf'
+                    if isinstance(rsrc, LdpRs) or rdf_mimetype is not None
+                    else 'non_rdf')
+        out_headers.update(_headers_from_metadata(rsrc, out_fmt))
+        uri = g.tbox.uid_to_uri(uid)
+
+# RDF output.
+        if out_fmt == 'rdf':
+            if locals().get('rdf_mimetype', None) is None:
+                rdf_mimetype = DEFAULT_RDF_MIMETYPE
+            ggr = g.tbox.globalize_imr(rsrc.out_graph)
+            ggr.namespace_manager = nsm
+            return _negotiate_content(
+                    ggr, rdf_mimetype, out_headers, uid=uid, uri=uri)
+
+# Datastream.
         else:
-            rsp = make_response(send_file(
-                    rsrc.local_path, as_attachment=True,
-                    attachment_filename=rsrc.filename,
-                    mimetype=rsrc.mimetype), 200, out_headers)
+            if not getattr(rsrc, 'local_path', False):
+                return ('{} has no binary content.'.format(rsrc.uid), 404)
+
+            logger.debug('Streaming out binary content.')
+            if request.range and request.range.units == 'bytes':
+                # Stream partial response.
+                # This is only true if the header is well-formed. Thanks, Werkzeug.
+                rsp = _parse_range_header(
+                    request.range.ranges, rsrc, out_headers
+                )
+            else:
+                rsp = make_response(send_file(
+                        rsrc.local_path, as_attachment=True,
+                        attachment_filename=rsrc.filename,
+                        mimetype=rsrc.mimetype), 200, out_headers)
 
         # This seems necessary to prevent Flask from setting an
         # additional ETag.
@@ -225,7 +230,8 @@ def get_version_info(uid):
     except TombstoneError as e:
         return _tombstone_response(e, uid)
     else:
-        return _negotiate_content(g.tbox.globalize_imr(imr), rdf_mimetype)
+        with store.txn_ctx():
+            return _negotiate_content(g.tbox.globalize_imr(imr), rdf_mimetype)
 
 
 @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
@@ -246,7 +252,8 @@ def get_version(uid, ver_uid):
     except TombstoneError as e:
         return _tombstone_response(e, uid)
     else:
-        return _negotiate_content(g.tbox.globalize_imr(imr), rdf_mimetype)
+        with store.txn_ctx():
+            return _negotiate_content(g.tbox.globalize_imr(imr), rdf_mimetype)
 
 
 @ldp.route('/<path:parent_uid>', methods=['POST'], strict_slashes=False)
@@ -290,7 +297,8 @@ def post_resource(parent_uid):
         return str(e), 412
 
     uri = g.tbox.uid_to_uri(rsrc.uid)
-    rsp_headers.update(_headers_from_metadata(rsrc))
+    with store.txn_ctx():
+        rsp_headers.update(_headers_from_metadata(rsrc))
     rsp_headers['Location'] = uri
 
     if mimetype and kwargs.get('rdf_fmt') is None:
@@ -346,7 +354,8 @@ def put_resource(uid):
     except TombstoneError as e:
         return _tombstone_response(e, uid)
 
-    rsp_headers = _headers_from_metadata(rsrc)
+    with store.txn_ctx():
+        rsp_headers = _headers_from_metadata(rsrc)
     rsp_headers['Content-Type'] = 'text/plain; charset=utf-8'
 
     uri = g.tbox.uid_to_uri(uid)
@@ -397,7 +406,8 @@ def patch_resource(uid, is_metadata=False):
     except InvalidResourceError as e:
         return str(e), 415
     else:
-        rsp_headers.update(_headers_from_metadata(rsrc))
+        with store.txn_ctx():
+            rsp_headers.update(_headers_from_metadata(rsrc))
         return '', 204, rsp_headers
 
 
@@ -455,7 +465,7 @@ def tombstone(uid):
     405.
     """
     try:
-        rsrc = rsrc_api.get(uid)
+        rsrc_api.get(uid)
     except TombstoneError as e:
         if request.method == 'DELETE':
             if e.uid == uid:
@@ -764,12 +774,14 @@ def _condition_hdr_match(uid, headers, safe=True):
         req_etags = [
                 et.strip('\'" ') for et in headers.get(cond_hdr).split(',')]
 
-        try:
-            rsrc_meta = rsrc_api.get_metadata(uid)
-        except ResourceNotExistsError:
-            rsrc_meta = Imr(nsc['fcres'][uid])
+        with store.txn_ctx():
+            try:
+                rsrc_meta = rsrc_api.get_metadata(uid)
+            except ResourceNotExistsError:
+                rsrc_meta = Graph(uri=nsc['fcres'][uid])
+
+            digest_prop = rsrc_meta.value(nsc['premis'].hasMessageDigest)
 
-        digest_prop = rsrc_meta.value(nsc['premis'].hasMessageDigest)
         if digest_prop:
             etag, _ = _digest_headers(digest_prop)
             if cond_hdr == 'if-match':
@@ -793,7 +805,8 @@ def _condition_hdr_match(uid, headers, safe=True):
                 'if-unmodified-since': False
             }
 
-        lastmod_str = rsrc_meta.value(nsc['fcrepo'].lastModified)
+        with store.txn_ctx():
+            lastmod_str = rsrc_meta.value(nsc['fcrepo'].lastModified)
         lastmod_ts = arrow.get(lastmod_str)
 
         # If date is not in a RFC 5322 format

+ 0 - 0
lakesuperior/model/graph/__init__.pxd → lakesuperior/model/__init__.pxd


+ 10 - 0
lakesuperior/model/base.pxd

@@ -16,4 +16,14 @@ cdef enum:
     TRP_KLEN = 3 * sizeof(Key)
     QUAD_KLEN = 4 * sizeof(Key)
 
+    # "NULL" key, a value that is never user-provided. Used to mark special
+    # values (e.g. deleted records).
+    NULL_KEY = 0
+    # Value of first key inserted in an empty term database.
+    FIRST_KEY = 1
+
 cdef bytes buffer_dump(Buffer* buf)
+
+# "NULL" triple, a value that is never user-provided. Used to mark special
+# values (e.g. deleted records).
+cdef TripleKey NULL_TRP = [NULL_KEY, NULL_KEY, NULL_KEY]

+ 30 - 0
lakesuperior/model/callbacks.pxd

@@ -0,0 +1,30 @@
+from lakesuperior.model.base cimport Key, TripleKey
+
+cdef:
+    bint lookup_sk_cmp_fn(
+        const TripleKey* spok, const Key k1, const Key k2
+    )
+
+    bint lookup_pk_cmp_fn(
+        const TripleKey* spok, const Key k1, const Key k2
+    )
+
+    bint lookup_ok_cmp_fn(
+        const TripleKey* spok, const Key k1, const Key k2
+    )
+
+    bint lookup_skpk_cmp_fn(
+        const TripleKey* spok, const Key k1, const Key k2
+    )
+
+    bint lookup_skok_cmp_fn(
+        const TripleKey* spok, const Key k1, const Key k2
+    )
+
+    bint lookup_pkok_cmp_fn(
+        const TripleKey* spok, const Key k1, const Key k2
+    )
+
+    bint lookup_none_cmp_fn(
+        const TripleKey* spok, const Key k1, const Key k2
+    )

+ 54 - 0
lakesuperior/model/callbacks.pyx

@@ -0,0 +1,54 @@
+from lakesuperior.model.base cimport Key, TripleKey
+
+cdef inline bint lookup_sk_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
+    """ Keyset lookup for S key. """
+    return spok[0][0] == k1
+
+
+cdef inline bint lookup_pk_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
+    """ Keyset lookup for P key. """
+    return spok[0][1] == k1
+
+
+cdef inline bint lookup_ok_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
+    """ Keyset lookup for O key. """
+    return spok[0][2] == k1
+
+
+cdef inline bint lookup_skpk_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
+    """ Keyset lookup for S and P keys. """
+    return spok[0][0] == k1 and spok[0][1] == k2
+
+
+cdef inline bint lookup_skok_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
+    """ Keyset lookup for S and O keys. """
+    return spok[0][0] == k1 and spok[0][2] == k2
+
+
+cdef inline bint lookup_pkok_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
+    """ Keyset lookup for P and O keys. """
+    return spok[0][1] == k1 and spok[0][2] == k2
+
+
+cdef inline bint lookup_none_cmp_fn(
+    const TripleKey* spok, const Key k1, const Key k2
+):
+    """
+    Dummy callback for queries with all parameters unbound.
+
+    This function always returns ``True`` 
+    """
+    return True
+

+ 0 - 44
lakesuperior/model/graph/callbacks.pxd

@@ -1,44 +0,0 @@
-from libc.stdint cimport uint32_t, uint64_t
-
-from lakesuperior.model.base cimport Buffer
-from lakesuperior.model.graph cimport graph
-from lakesuperior.model.graph.triple cimport BufferTriple
-
-cdef extern from 'spookyhash_api.h':
-    uint64_t spookyhash_64(const void *input, size_t input_size, uint64_t seed)
-
-cdef:
-    bint graph_eq_fn(graph.SimpleGraph g1, graph.SimpleGraph g2)
-    int term_cmp_fn(const void* key1, const void* key2)
-    int trp_cmp_fn(const void* key1, const void* key2)
-    size_t term_hash_fn(const void* key, int l, uint32_t seed)
-    size_t trp_hash_fn(const void* key, int l, uint32_t seed)
-
-    bint lookup_none_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_s_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_p_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_o_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_sp_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_so_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    bint lookup_po_cmp_fn(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-    )
-    void add_trp_callback(
-        graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
-    )
-    void del_trp_callback(
-        graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
-    )
-

+ 0 - 250
lakesuperior/model/graph/callbacks.pyx

@@ -1,250 +0,0 @@
-import logging
-
-from libc.stdint cimport uint32_t, uint64_t
-from libc.string cimport memcmp
-
-cimport lakesuperior.cy_include.collections as cc
-cimport lakesuperior.cy_include.spookyhash as sph
-
-from lakesuperior.model.base cimport Buffer, buffer_dump
-from lakesuperior.model.graph cimport graph
-from lakesuperior.model.graph.triple cimport BufferTriple
-
-logger = logging.getLogger(__name__)
-
-
-cdef int term_cmp_fn(const void* key1, const void* key2):
-    """
-    Compare function for two Buffer objects.
-
-    :rtype: int
-    :return: 0 if the byte streams are the same, another integer otherwise.
-    """
-    b1 = <Buffer *>key1
-    b2 = <Buffer *>key2
-
-    if b1.sz != b2.sz:
-        #logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
-        return 1
-
-    cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
-    #logger.info(f'term memcmp: {cmp}')
-    return cmp
-
-
-cdef int trp_cmp_fn(const void* key1, const void* key2):
-    """
-    Compare function for two triples in a set.
-
-    s, p, o byte data are compared literally.
-
-    :rtype: int
-    :return: 0 if all three terms point to byte-wise identical data in both
-        triples.
-    """
-    t1 = <BufferTriple *>key1
-    t2 = <BufferTriple *>key2
-
-    diff = (
-        term_cmp_fn(t1.o, t2.o) or
-        term_cmp_fn(t1.s, t2.s) or
-        term_cmp_fn(t1.p, t2.p)
-    )
-
-    #logger.info(f'Triples match: {not(diff)}')
-    return diff
-
-
-#cdef int trp_cmp_fn(const void* key1, const void* key2):
-#    """
-#    Compare function for two triples in a set.
-#
-#    Here, pointers to terms are compared for s, p, o. The pointers should be
-#    guaranteed to point to unique values (i.e. no two pointers have the same
-#    term value within a graph).
-#
-#    :rtype: int
-#    :return: 0 if the addresses of all terms are the same, 1 otherwise.
-#    """
-#    t1 = <BufferTriple *>key1
-#    t2 = <BufferTriple *>key2
-#
-#    cdef int is_not_equal = (
-#        t1.s.addr != t2.s.addr or
-#        t1.p.addr != t2.p.addr or
-#        t1.o.addr != t2.o.addr
-#    )
-#
-#    logger.info(f'Triples match: {not(is_not_equal)}')
-#    return is_not_equal
-
-
-cdef bint graph_eq_fn(graph.SimpleGraph g1, graph.SimpleGraph g2):
-    """
-    Compare 2 graphs for equality.
-
-    Note that this returns the opposite value than the triple and term
-    compare functions: 1 (True) if equal, 0 (False) if not.
-    """
-    cdef:
-        void* el
-        cc.HashSetIter it
-
-    cc.hashset_iter_init(&it, g1._triples)
-    while cc.hashset_iter_next(&it, &el) != cc.CC_ITER_END:
-        if cc.hashset_contains(g2._triples, el):
-            return False
-
-    return True
-
-
-cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
-    """
-    Hash function for serialized terms (:py:class:`Buffer` objects)
-    """
-    return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)
-
-
-cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
-    """
-    Hash function for sets of (serialized) triples.
-
-    This function concatenates the literal terms of the triple as bytes
-    and computes their hash.
-    """
-    trp = <BufferTriple*>key
-    seed64 = <uint64_t>seed
-    seed_dummy = seed64
-
-    cdef sph.spookyhash_context ctx
-
-    sph.spookyhash_context_init(&ctx, seed64, seed_dummy)
-    sph.spookyhash_update(&ctx, trp.s.addr, trp.s.sz)
-    sph.spookyhash_update(&ctx, trp.s.addr, trp.p.sz)
-    sph.spookyhash_update(&ctx, trp.s.addr, trp.o.sz)
-    sph.spookyhash_final(&ctx, &seed64, &seed_dummy)
-
-    return <size_t>seed64
-
-
-#cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
-#    """
-#    Hash function for sets of (serialized) triples.
-#
-#    This function computes the hash of the concatenated pointer values in the
-#    s, p, o members of the triple. The triple structure is treated as a byte
-#    string. This is safe in spite of byte-wise struct evaluation being a
-#    frowned-upon practice (due to padding issues), because it is assumed that
-#    the input value is always the same type of structure.
-#    """
-#    return <size_t>spookyhash_64(key, l, seed)
-
-
-#cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
-#    """
-#    No-op function that takes a pointer and does *not* hash it.
-#
-#    The pointer value is used as the "hash".
-#    """
-#    return <size_t>key
-
-
-cdef inline bint lookup_none_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Dummy callback for queries with all parameters unbound.
-
-    This function always returns ``True`` 
-    """
-    return True
-
-
-cdef inline bint lookup_s_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``s`` in a triple.
-
-    The function returns ``True`` if ``t1`` matches the first term.
-
-    ``t2`` is not used and is declared only for compatibility with the
-    other interchangeable functions.
-    """
-    return not term_cmp_fn(t1, trp[0].s)
-
-
-cdef inline bint lookup_p_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``p`` in a triple.
-    """
-    return not term_cmp_fn(t1, trp[0].p)
-
-
-cdef inline bint lookup_o_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``o`` in a triple.
-    """
-    return not term_cmp_fn(t1, trp[0].o)
-
-
-cdef inline bint lookup_sp_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``s`` and ``p`` pair.
-    """
-    return (
-            not term_cmp_fn(t1, trp[0].s)
-            and not term_cmp_fn(t2, trp[0].p))
-
-
-cdef inline bint lookup_so_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``s`` and ``o`` pair.
-    """
-    return (
-            not term_cmp_fn(t1, trp[0].s)
-            and not term_cmp_fn(t2, trp[0].o))
-
-
-cdef inline bint lookup_po_cmp_fn(
-    const BufferTriple *trp, const Buffer *t1, const Buffer *t2
-):
-    """
-    Lookup callback compare function for a given ``p`` and ``o`` pair.
-    """
-    return (
-            not term_cmp_fn(t1, trp[0].p)
-            and not term_cmp_fn(t2, trp[0].o))
-
-
-## LOOKUP CALLBACK FUNCTIONS
-
-cdef inline void add_trp_callback(
-    graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
-):
-    """
-    Add a triple to a graph as a result of a lookup callback.
-    """
-    gr.add_triple(trp, True)
-
-
-cdef inline void del_trp_callback(
-    graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
-):
-    """
-    Remove a triple from a graph as a result of a lookup callback.
-    """
-    #logger.info('removing triple: {} {} {}'.format(
-        #buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
-    #))
-    gr.remove_triple(trp)
-
-

+ 0 - 63
lakesuperior/model/graph/graph.pxd

@@ -1,63 +0,0 @@
-from libc.stdint cimport uint32_t, uint64_t
-
-from cymem.cymem cimport Pool
-
-cimport lakesuperior.cy_include.collections as cc
-
-from lakesuperior.model.base cimport Buffer
-from lakesuperior.model.graph.triple cimport BufferTriple
-
-# Lookup function that returns whether a triple contains a match pattern.
-# Return True if the triple exists, False otherwise.
-ctypedef bint (*lookup_fn_t)(
-        const BufferTriple *trp, const Buffer *t1, const Buffer *t2)
-
-# Callback for an iterator.
-ctypedef void (*lookup_callback_fn_t)(
-    SimpleGraph gr, const BufferTriple* trp, void* ctx
-)
-
-ctypedef Buffer SPOBuffer[3]
-ctypedef Buffer *BufferPtr
-
-cdef class SimpleGraph:
-    cdef:
-        cc.HashSet *_terms # Set of unique serialized terms.
-        cc.HashSet *_triples # Set of unique triples.
-        # Temp data pool. It gets managed with the object lifecycle via cymem.
-        Pool pool
-
-        cc.key_compare_ft term_cmp_fn
-        cc.key_compare_ft trp_cmp_fn
-
-        BufferTriple* store_triple(self, const BufferTriple* strp)
-        void add_triple(
-            self, const BufferTriple *trp, bint copy=*
-        ) except *
-        int remove_triple(self, const BufferTriple* trp_buf) except -1
-        bint trp_contains(self, const BufferTriple* btrp)
-
-        # Basic graph operations.
-        void ip_union(self, SimpleGraph other) except *
-        void ip_subtraction(self, SimpleGraph other) except *
-        void ip_intersection(self, SimpleGraph other) except *
-        void ip_xor(self, SimpleGraph other) except *
-        SimpleGraph empty_copy(self)
-        void _match_ptn_callback(
-            self, pattern, SimpleGraph gr,
-            lookup_callback_fn_t callback_fn, void* ctx=*
-        ) except *
-
-    cpdef union_(self, SimpleGraph other)
-    cpdef subtraction(self, SimpleGraph other)
-    cpdef intersection(self, SimpleGraph other)
-    cpdef xor(self, SimpleGraph other)
-    cpdef void set(self, tuple trp) except *
-
-
-cdef class Imr(SimpleGraph):
-    cdef:
-        readonly str id
-        Imr empty_copy(self)
-
-    cpdef as_rdflib(self)

+ 0 - 922
lakesuperior/model/graph/graph.pyx

@@ -1,922 +0,0 @@
-import logging
-
-from functools import wraps
-
-from rdflib import Graph, URIRef
-from rdflib.term import Node
-
-from lakesuperior import env
-
-from libc.string cimport memcpy
-from libc.stdlib cimport free
-
-from cymem.cymem cimport Pool
-
-cimport lakesuperior.cy_include.collections as cc
-cimport lakesuperior.model.graph.callbacks as cb
-
-from lakesuperior.model.base cimport Buffer, buffer_dump
-from lakesuperior.model.graph cimport term
-from lakesuperior.model.graph.triple cimport BufferTriple
-from lakesuperior.model.structures.hash cimport term_hash_seed32
-
-logger = logging.getLogger(__name__)
-
-
-cdef class SimpleGraph:
-    """
-    Fast and simple implementation of a graph.
-
-    Most functions should mimic RDFLib's graph with less overhead. It uses
-    the same funny but functional slicing notation.
-
-    A SimpleGraph can be instantiated from a store lookup. This makes it
-    possible to use a Keyset to perform initial filtering via identity by key,
-    then the filtered Keyset can be converted into a set of meaningful terms.
-
-    An instance of this class can also be converted to and from a
-    ``rdflib.Graph`` instance.
-    """
-
-    def __cinit__(self, set data=set(), *args, **kwargs):
-        """
-        Initialize the graph, optionally with Python data.
-
-        :param set data: Initial data as a set of 3-tuples of RDFLib terms.
-        """
-        cdef:
-            cc.HashSetConf terms_conf, trp_conf
-
-        self.term_cmp_fn = cb.term_cmp_fn
-        self.trp_cmp_fn = cb.trp_cmp_fn
-
-        cc.hashset_conf_init(&terms_conf)
-        terms_conf.load_factor = 0.85
-        terms_conf.hash = cb.term_hash_fn
-        terms_conf.hash_seed = term_hash_seed32
-        terms_conf.key_compare = self.term_cmp_fn
-        terms_conf.key_length = sizeof(Buffer*)
-
-        cc.hashset_conf_init(&trp_conf)
-        trp_conf.load_factor = 0.75
-        trp_conf.hash = cb.trp_hash_fn
-        trp_conf.hash_seed = term_hash_seed32
-        trp_conf.key_compare = self.trp_cmp_fn
-        trp_conf.key_length = sizeof(BufferTriple)
-
-        cc.hashset_new_conf(&terms_conf, &self._terms)
-        cc.hashset_new_conf(&trp_conf, &self._triples)
-
-        self.pool = Pool()
-
-        # Initialize empty data set.
-        if data:
-            # Populate with provided Python set.
-            self.add(data)
-
-
-    def __dealloc__(self):
-        """
-        Free the triple pointers.
-        """
-        free(self._triples)
-        free(self._terms)
-
-
-    ## PROPERTIES ##
-
-    @property
-    def data(self):
-        """
-        Triple data as a Python generator.
-
-        :rtype: generator
-        """
-        cdef:
-            void *void_p
-            cc.HashSetIter ti
-            Buffer* ss
-            Buffer* sp
-            Buffer* so
-
-        cc.hashset_iter_init(&ti, self._triples)
-        while cc.hashset_iter_next(&ti, &void_p) != cc.CC_ITER_END:
-            #logger.info(f'Data loop.')
-            if void_p == NULL:
-                #logger.warn('Triple is NULL!')
-                break
-
-            trp = <BufferTriple *>void_p
-            #print(f'trp.s: {buffer_dump(trp.s)}')
-            #print(f'trp.p: {buffer_dump(trp.p)}')
-            #print(f'trp.o: {buffer_dump(trp.o)}')
-            yield (
-                term.deserialize_to_rdflib(trp.s),
-                term.deserialize_to_rdflib(trp.p),
-                term.deserialize_to_rdflib(trp.o),
-            )
-
-    @property
-    def stored_terms(self):
-        """
-        All terms in the graph with their memory address.
-
-        For debugging purposes.
-        """
-        cdef:
-            cc.HashSetIter it
-            void *cur
-
-        terms = set()
-
-        cc.hashset_iter_init(&it, self._terms)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            s_term = <Buffer*>cur
-            terms.add((f'0x{<size_t>cur:02x}', term.deserialize_to_rdflib(s_term)))
-
-        return terms
-
-
-    ## MAGIC METHODS ##
-
-    def __len__(self):
-        """ Number of triples in the graph. """
-        return cc.hashset_size(self._triples)
-
-
-    def __eq__(self, other):
-        """ Equality operator between ``SimpleGraph`` instances. """
-        return len(self ^ other) == 0
-
-
-    def __repr__(self):
-        """
-        String representation of the graph.
-
-        It provides the number of triples in the graph and memory address of
-            the instance.
-        """
-        return (
-            f'<{self.__class__.__name__} @{hex(id(self))} '
-            f'length={len(self)}>'
-        )
-
-
-    def __str__(self):
-        """ String dump of the graph triples. """
-        return str(self.data)
-
-
-    def __add__(self, other):
-        """ Alias for set-theoretical union. """
-        return self.union_(other)
-
-
-    def __iadd__(self, other):
-        """ Alias for in-place set-theoretical union. """
-        self.ip_union(other)
-        return self
-
-
-    def __sub__(self, other):
-        """ Set-theoretical subtraction. """
-        return self.subtraction(other)
-
-
-    def __isub__(self, other):
-        """ In-place set-theoretical subtraction. """
-        self.ip_subtraction(other)
-        return self
-
-    def __and__(self, other):
-        """ Set-theoretical intersection. """
-        return self.intersection(other)
-
-
-    def __iand__(self, other):
-        """ In-place set-theoretical intersection. """
-        self.ip_intersection(other)
-        return self
-
-
-    def __or__(self, other):
-        """ Set-theoretical union. """
-        return self.union_(other)
-
-
-    def __ior__(self, other):
-        """ In-place set-theoretical union. """
-        self.ip_union(other)
-        return self
-
-
-    def __xor__(self, other):
-        """ Set-theoretical exclusive disjunction (XOR). """
-        return self.xor(other)
-
-
-    def __ixor__(self, other):
-        """ In-place set-theoretical exclusive disjunction (XOR). """
-        self.ip_xor(other)
-        return self
-
-
-    def __contains__(self, trp):
-        """
-        Whether the graph contains a triple.
-
-        :rtype: boolean
-        """
-        cdef:
-            Buffer ss, sp, so
-            BufferTriple btrp
-
-        btrp.s = &ss
-        btrp.p = &sp
-        btrp.o = &so
-
-        s, p, o = trp
-        term.serialize_from_rdflib(s, &ss)
-        term.serialize_from_rdflib(p, &sp)
-        term.serialize_from_rdflib(o, &so)
-
-        return self.trp_contains(&btrp)
-
-
-    def __iter__(self):
-        """ Graph iterator. It iterates over the set triples. """
-        yield from self.data
-
-
-    #def __next__(self):
-    #    """ Graph iterator. It iterates over the set triples. """
-    #    return self.data.__next__()
-
-
-    # Slicing.
-
-    def __getitem__(self, item):
-        """
-        Slicing function.
-
-        It behaves similarly to `RDFLib graph slicing
-        <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
-        """
-        if isinstance(item, slice):
-            s, p, o = item.start, item.stop, item.step
-            return self._slice(s, p, o)
-        else:
-            raise TypeError(f'Wrong slice format: {item}.')
-
-
-    def __hash__(self):
-        return 23465
-
-
-    ## BASIC PYTHON-ACCESSIBLE SET OPERATIONS ##
-
-    def terms_by_type(self, type):
-        """
-        Get all terms of a type: subject, predicate or object.
-
-        :param str type: One of ``s``, ``p`` or ``o``.
-        """
-        i = 'spo'.index(type)
-        return {r[i] for r in self.data}
-
-
-    def add(self, trp):
-        """
-        Add triples to the graph.
-
-        :param iterable triples: iterable of 3-tuple triples.
-        """
-        cdef size_t cur = 0, trp_cur = 0
-
-        trp_ct = len(trp)
-        term_buf = <Buffer*>self.pool.alloc(3 * trp_ct, sizeof(Buffer))
-        trp_buf = <BufferTriple*>self.pool.alloc(trp_ct, sizeof(BufferTriple))
-
-        for s, p, o in trp:
-            term.serialize_from_rdflib(s, term_buf + cur, self.pool)
-            term.serialize_from_rdflib(p, term_buf + cur + 1, self.pool)
-            term.serialize_from_rdflib(o, term_buf + cur + 2, self.pool)
-
-            (trp_buf + trp_cur).s = term_buf + cur
-            (trp_buf + trp_cur).p = term_buf + cur + 1
-            (trp_buf + trp_cur).o = term_buf + cur + 2
-
-            self.add_triple(trp_buf + trp_cur)
-
-            trp_cur += 1
-            cur += 3
-
-
-    def len_terms(self):
-        """ Number of terms in the graph. """
-        return cc.hashset_size(self._terms)
-
-
-    def remove(self, pattern):
-        """
-        Remove triples by pattern.
-
-        The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
-        """
-        self._match_ptn_callback(
-            pattern, self, cb.del_trp_callback, NULL
-        )
-
-
-    ## CYTHON-ACCESSIBLE BASIC METHODS ##
-
-    cdef SimpleGraph empty_copy(self):
-        """
-        Create an empty copy carrying over some key properties.
-
-        Override in subclasses to accommodate for different init properties.
-        """
-        return self.__class__()
-
-
-    cpdef union_(self, SimpleGraph other):
-        """
-        Perform set union resulting in a new SimpleGraph instance.
-
-        TODO Allow union of multiple graphs at a time.
-
-        :param SimpleGraph other: The other graph to merge.
-
-        :rtype: SimpleGraph
-        :return: A new SimpleGraph instance.
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-            BufferTriple *trp
-
-        new_gr = self.empty_copy()
-
-        for gr in (self, other):
-            cc.hashset_iter_init(&it, gr._triples)
-            while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-                bt = <BufferTriple*>cur
-                new_gr.add_triple(bt, True)
-
-        return new_gr
-
-
-    cdef void ip_union(self, SimpleGraph other) except *:
-        """
-        Perform an in-place set union that adds triples to this instance
-
-        TODO Allow union of multiple graphs at a time.
-
-        :param SimpleGraph other: The other graph to merge.
-
-        :rtype: void
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        cc.hashset_iter_init(&it, other._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            self.add_triple(bt, True)
-
-
-    cpdef intersection(self, SimpleGraph other):
-        """
-        Graph intersection.
-
-        :param SimpleGraph other: The other graph to intersect.
-
-        :rtype: SimpleGraph
-        :return: A new SimpleGraph instance.
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        new_gr = self.empty_copy()
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            #print('Checking: <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
-            #    <size_t>bt.s, <size_t>bt.p, <size_t>bt.o))
-            if other.trp_contains(bt):
-                #print('Adding.')
-                new_gr.add_triple(bt, True)
-
-        return new_gr
-
-
-    cdef void ip_intersection(self, SimpleGraph other) except *:
-        """
-        In-place graph intersection.
-
-        Triples not in common with another graph are removed from the current
-        one.
-
-        :param SimpleGraph other: The other graph to intersect.
-
-        :rtype: void
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if not other.trp_contains(bt):
-                self.remove_triple(bt)
-
-
-    cpdef subtraction(self, SimpleGraph other):
-        """
-        Graph set-theoretical subtraction.
-
-        Create a new graph with the triples of this graph minus the ones in
-        common with the other graph.
-
-        :param SimpleGraph other: The other graph to subtract to this.
-
-        :rtype: SimpleGraph
-        :return: A new SimpleGraph instance.
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        new_gr = self.empty_copy()
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            #print('Checking: <0x{:02x}> <0x{:02x}> <0x{:02x}>'.format(
-            #    <size_t>bt.s, <size_t>bt.p, <size_t>bt.o))
-            if not other.trp_contains(bt):
-                #print('Adding.')
-                new_gr.add_triple(bt, True)
-
-        return new_gr
-
-
-    cdef void ip_subtraction(self, SimpleGraph other) except *:
-        """
-        In-place graph subtraction.
-
-        Triples in common with another graph are removed from the current one.
-
-        :param SimpleGraph other: The other graph to intersect.
-
-        :rtype: void
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if other.trp_contains(bt):
-                self.remove_triple(bt)
-
-
-    cpdef xor(self, SimpleGraph other):
-        """
-        Graph Exclusive disjunction (XOR).
-
-        :param SimpleGraph other: The other graph to perform XOR with.
-
-        :rtype: SimpleGraph
-        :return: A new SimpleGraph instance.
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-            BufferTriple* bt
-
-        new_gr = self.empty_copy()
-
-        # Add triples in this and not in other.
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if not other.trp_contains(bt):
-                new_gr.add_triple(bt, True)
-
-        # Other way around.
-        cc.hashset_iter_init(&it, other._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if not self.trp_contains(bt):
-                new_gr.add_triple(bt, True)
-
-        return new_gr
-
-
-    cdef void ip_xor(self, SimpleGraph other) except *:
-        """
-        In-place graph XOR.
-
-        Triples in common with another graph are removed from the current one,
-        and triples not in common will be added from the other one.
-
-        :param SimpleGraph other: The other graph to perform XOR with.
-
-        :rtype: void
-        """
-        cdef:
-            void *cur
-            cc.HashSetIter it
-            # TODO This could be more efficient to stash values in a simple
-            # array, but how urgent is it to improve an in-place XOR?
-            SimpleGraph tmp = SimpleGraph()
-
-        # Add *to the tmp graph* triples in other graph and not in this graph.
-        cc.hashset_iter_init(&it, other._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if not self.trp_contains(bt):
-                tmp.add_triple(bt)
-
-        # Remove triples in common.
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            bt = <BufferTriple*>cur
-            if other.trp_contains(bt):
-                self.remove_triple(bt)
-
-        self |= tmp
-
-
-    cdef inline BufferTriple* store_triple(self, const BufferTriple* strp):
-        """
-        Store triple data in the graph.
-
-        Normally, raw data underlying the triple and terms are only referenced
-        by pointers. If the destination data are garbage collected before the
-        graph is, segfaults are bound to happen.
-
-        This method copies the data to the graph's memory pool, so they are
-        managed with the lifecycle of the graph.
-
-        Note that this method stores items regardless of whether thwy are
-        duplicate or not, so there may be some duplication.
-        """
-        cdef:
-            BufferTriple* dtrp = <BufferTriple*>self.pool.alloc(
-                1, sizeof(BufferTriple)
-            )
-            Buffer* spo = <Buffer*>self.pool.alloc(3, sizeof(Buffer))
-
-        if not dtrp:
-            raise MemoryError()
-        if not spo:
-            raise MemoryError()
-
-        dtrp.s = spo
-        dtrp.p = spo + 1
-        dtrp.o = spo + 2
-
-        spo[0].addr = self.pool.alloc(strp.s.sz, 1)
-        spo[0].sz = strp.s.sz
-        spo[1].addr = self.pool.alloc(strp.p.sz, 1)
-        spo[1].sz = strp.p.sz
-        spo[2].addr = self.pool.alloc(strp.o.sz, 1)
-        spo[2].sz = strp.o.sz
-
-        if not spo[0].addr or not spo[1].addr or not spo[2].addr:
-            raise MemoryError()
-
-        memcpy(dtrp.s.addr, strp.s.addr, strp.s.sz)
-        memcpy(dtrp.p.addr, strp.p.addr, strp.p.sz)
-        memcpy(dtrp.o.addr, strp.o.addr, strp.o.sz)
-
-        return dtrp
-
-
-    cdef inline void add_triple(
-        self, const BufferTriple* trp, bint copy=False
-    ) except *:
-        """
-        Add a triple from 3 (TPL) serialized terms.
-
-        Each of the terms is added to the term set if not existing. The triple
-        also is only added if not existing.
-
-        :param BufferTriple* trp: The triple to add.
-        :param bint copy: if ``True``, the triple and term data will be
-            allocated and copied into the graph memory pool.
-        """
-        if copy:
-            trp = self.store_triple(trp)
-
-        #logger.info('Inserting terms.')
-        cc.hashset_add(self._terms, trp.s)
-        cc.hashset_add(self._terms, trp.p)
-        cc.hashset_add(self._terms, trp.o)
-        #logger.info('inserted terms.')
-        #logger.info(f'Terms set size: {cc.hashset_size(self._terms)}')
-
-        cdef size_t trp_sz = cc.hashset_size(self._triples)
-        #logger.info(f'Triples set size before adding: {trp_sz}')
-
-        r = cc.hashset_add(self._triples, trp)
-
-        trp_sz = cc.hashset_size(self._triples)
-        #logger.info(f'Triples set size after adding: {trp_sz}')
-
-        cdef:
-            cc.HashSetIter ti
-            void *cur
-
-
-    cdef int remove_triple(self, const BufferTriple* btrp) except -1:
-        """
-        Remove one triple from the graph.
-        """
-        return cc.hashset_remove(self._triples, btrp, NULL)
-
-
-    cdef bint trp_contains(self, const BufferTriple* btrp):
-        cdef:
-            cc.HashSetIter it
-            void* cur
-
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            if self.trp_cmp_fn(cur, btrp) == 0:
-                return True
-        return False
-
-
-    cpdef void set(self, tuple trp) except *:
-        """
-        Set a single value for subject and predicate.
-
-        Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
-        """
-        if None in trp:
-            raise ValueError(f'Invalid triple: {trp}')
-        self.remove((trp[0], trp[1], None))
-        self.add((trp,))
-
-
-    def as_rdflib(self):
-        """
-        Return the data set as an RDFLib Graph.
-
-        :rtype: rdflib.Graph
-        """
-        gr = Graph()
-        for trp in self.data:
-            gr.add(trp)
-
-        return gr
-
-
-    def _slice(self, s, p, o):
-        """
-        Return terms filtered by other terms.
-
-        This behaves like the rdflib.Graph slicing policy.
-        """
-        #logger.info(f'Slicing graph by: {s}, {p}, {o}.')
-        # If no terms are unbound, check for containment.
-        if s is not None and p is not None and o is not None: # s p o
-            return (s, p, o) in self
-
-        # If some terms are unbound, do a lookup.
-        res = self.lookup((s, p, o))
-        if s is not None:
-            if p is not None: # s p ?
-                return {r[2] for r in res}
-
-            if o is not None: # s ? o
-                return {r[1] for r in res}
-
-            # s ? ?
-            return {(r[1], r[2]) for r in res}
-
-        if p is not None:
-            if o is not None: # ? p o
-                return {r[0] for r in res}
-
-            # ? p ?
-            return {(r[0], r[2]) for r in res}
-
-        if o is not None: # ? ? o
-            return {(r[0], r[1]) for r in res}
-
-        # ? ? ?
-        return res
-
-
-    def lookup(self, pattern):
-        """
-        Look up triples by a pattern.
-
-        This function converts RDFLib terms into the serialized format stored
-        in the graph's internal structure and compares them bytewise.
-
-        Any and all of the lookup terms msy be ``None``.
-
-        :rtype: SimpleGraph
-        "return: New SimpleGraph instance with matching triples.
-        """
-        cdef:
-            void* cur
-            BufferTriple trp
-            SimpleGraph res_gr = SimpleGraph()
-
-        self._match_ptn_callback(pattern, res_gr, cb.add_trp_callback, NULL)
-
-        return res_gr
-
-
-    cdef void _match_ptn_callback(
-        self, pattern, SimpleGraph gr,
-        lookup_callback_fn_t callback_fn, void* ctx=NULL
-    ) except *:
-        """
-        Execute an arbitrary function on a list of triples matching a pattern.
-
-        The arbitrary function is appied to each triple found in the current
-        graph, and to a discrete graph that can be the current graph itself
-        or a different one.
-        """
-        cdef:
-            void* cur
-            Buffer t1, t2
-            Buffer ss, sp, so
-            BufferTriple trp
-            BufferTriple* trp_p
-            lookup_fn_t cmp_fn
-            cc.HashSetIter it
-
-        s, p, o = pattern
-
-        # Decide comparison logic outside the loop.
-        if s is not None and p is not None and o is not None:
-            #logger.info('Looping over one triple only.')
-            # Shortcut for 3-term match.
-            trp.s = &ss
-            trp.p = &sp
-            trp.o = &so
-            term.serialize_from_rdflib(s, trp.s, self.pool)
-            term.serialize_from_rdflib(p, trp.p, self.pool)
-            term.serialize_from_rdflib(o, trp.o, self.pool)
-
-            if cc.hashset_contains(self._triples, &trp):
-                callback_fn(gr, &trp, ctx)
-                return
-
-        if s is not None:
-            term.serialize_from_rdflib(s, &t1)
-            if p is not None:
-                cmp_fn = cb.lookup_sp_cmp_fn
-                term.serialize_from_rdflib(p, &t2)
-            elif o is not None:
-                cmp_fn = cb.lookup_so_cmp_fn
-                term.serialize_from_rdflib(o, &t2)
-            else:
-                cmp_fn = cb.lookup_s_cmp_fn
-        elif p is not None:
-            term.serialize_from_rdflib(p, &t1)
-            if o is not None:
-                cmp_fn = cb.lookup_po_cmp_fn
-                term.serialize_from_rdflib(o, &t2)
-            else:
-                cmp_fn = cb.lookup_p_cmp_fn
-        elif o is not None:
-            cmp_fn = cb.lookup_o_cmp_fn
-            term.serialize_from_rdflib(o, &t1)
-        else:
-            cmp_fn = cb.lookup_none_cmp_fn
-
-        # Iterate over serialized triples.
-        cc.hashset_iter_init(&it, self._triples)
-        while cc.hashset_iter_next(&it, &cur) != cc.CC_ITER_END:
-            trp_p = <BufferTriple*>cur
-            if cmp_fn(trp_p, &t1, &t2):
-                callback_fn(gr, trp_p, ctx)
-
-
-
-cdef class Imr(SimpleGraph):
-    """
-    In-memory resource data container.
-
-    This is an extension of :py:class:`~SimpleGraph` that adds a subject URI to
-    the data set and some convenience methods.
-
-    An instance of this class can be converted to a ``rdflib.Resource``
-    instance.
-
-    Some set operations that produce a new object (``-``, ``|``, ``&``, ``^``)
-    will create a new ``Imr`` instance with the same subject URI.
-    """
-    def __init__(self, uri, *args, **kwargs):
-        """
-        Initialize the graph with pre-existing data or by looking up a store.
-
-        Either ``data``, or ``lookup`` *and* ``store``, can be provide.
-        ``lookup`` and ``store`` have precedence. If none of them is specified,
-        an empty graph is initialized.
-
-        :param rdflib.URIRef uri: The graph URI.
-            This will serve as the subject for some queries.
-        :param args: Positional arguments inherited from
-            ``SimpleGraph.__init__``.
-        :param kwargs: Keyword arguments inherited from
-            ``SimpleGraph.__init__``.
-        """
-        self.id = str(uri)
-        #super().__init(*args, **kwargs)
-
-
-    def __repr__(self):
-        """
-        String representation of an Imr.
-
-        This includes the subject URI, number of triples contained and the
-        memory address of the instance.
-        """
-        return (f'<{self.__class__.__name__} @{hex(id(self))} id={self.id}, '
-            f'length={len(self)}>')
-
-
-    def __getitem__(self, item):
-        """
-        Supports slicing notation.
-        """
-        if isinstance(item, slice):
-            s, p, o = item.start, item.stop, item.step
-            return self._slice(s, p, o)
-
-        elif isinstance(item, Node):
-            # If a Node is given, return all values for that predicate.
-            return self._slice(self.uri, item, None)
-        else:
-            raise TypeError(f'Wrong slice format: {item}.')
-
-
-    @property
-    def uri(self):
-        """
-        Get resource identifier as a RDFLib URIRef.
-
-        :rtype: rdflib.URIRef.
-        """
-        return URIRef(self.id)
-
-
-    cdef Imr empty_copy(self):
-        """
-        Create an empty instance carrying over some key properties.
-        """
-        return self.__class__(uri=self.id)
-
-
-    def value(self, p, strict=False):
-        """
-        Get an individual value.
-
-        :param rdflib.termNode p: Predicate to search for.
-        :param bool strict: If set to ``True`` the method raises an error if
-            more than one value is found. If ``False`` (the default) only
-            the first found result is returned.
-        :rtype: rdflib.term.Node
-        """
-        # TODO use slice.
-        values = {trp[2] for trp in self.lookup((self.uri, p, None))}
-        #logger.info(f'Values found: {values}')
-
-        if strict and len(values) > 1:
-            raise RuntimeError('More than one value found for {}, {}.'.format(
-                    self.id, p))
-
-        for ret in values:
-            return ret
-
-        return None
-
-
-    cpdef as_rdflib(self):
-        """
-        Return the IMR as a RDFLib Resource.
-
-        :rtype: rdflib.Resource
-        """
-        gr = Graph()
-        for trp in self.data:
-            gr.add(trp)
-
-        return gr.resource(identifier=self.uri)
-
-

+ 8 - 8
lakesuperior/model/ldp/ldp_factory.py

@@ -3,7 +3,6 @@ import logging
 from pprint import pformat
 from uuid import uuid4
 
-from rdflib import Graph, parser
 from rdflib.resource import Resource
 from rdflib.namespace import RDF
 
@@ -16,7 +15,7 @@ from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.exceptions import (
         IncompatibleLdpTypeError, InvalidResourceError, ResourceExistsError,
         ResourceNotExistsError, TombstoneError)
-from lakesuperior.model.graph.graph import Imr
+from lakesuperior.model.rdf.graph import Graph, from_rdf
 
 
 LDP_NR_TYPE = nsc['ldp'].NonRDFSource
@@ -37,7 +36,7 @@ class LdpFactory:
             raise InvalidResourceError(uid)
         if rdfly.ask_rsrc_exists(uid):
             raise ResourceExistsError(uid)
-        rsrc = Ldpc(uid, provided_imr=Imr(uri=nsc['fcres'][uid]))
+        rsrc = Ldpc(uid, provided_imr=Graph(uri=nsc['fcres'][uid]))
 
         return rsrc
 
@@ -100,14 +99,15 @@ class LdpFactory:
         """
         uri = nsc['fcres'][uid]
         if rdf_data:
-            data = set(Graph().parse(
-                data=rdf_data, format=rdf_fmt, publicID=nsc['fcres'][uid]))
+            provided_imr = from_rdf(
+                uri=uri, data=rdf_data, format=rdf_fmt,
+                publicID=nsc['fcres'][uid]
+            )
         elif graph:
-            data = set(graph)
+            provided_imr = Graph(uri=uri, data={*graph})
         else:
-            data = set()
+            provided_imr = Graph(uri=uri)
 
-        provided_imr = Imr(uri=uri, data=data)
         #logger.debug('Provided graph: {}'.format(
         #        pformat(set(provided_imr))))
 

+ 16 - 14
lakesuperior/model/ldp/ldpr.py

@@ -12,8 +12,9 @@ from urllib.parse import urldefrag
 from uuid import uuid4
 
 import arrow
+import rdflib
 
-from rdflib import Graph, URIRef, Literal
+from rdflib import URIRef, Literal
 from rdflib.compare import to_isomorphic
 from rdflib.namespace import RDF
 
@@ -26,7 +27,7 @@ from lakesuperior.dictionaries.srv_mgd_terms import (
 from lakesuperior.exceptions import (
     InvalidResourceError, RefIntViolationError, ResourceNotExistsError,
     ServerManagedTermError, TombstoneError)
-from lakesuperior.model.graph.graph import SimpleGraph, Imr
+from lakesuperior.model.rdf.graph import Graph
 from lakesuperior.store.ldp_rs.rsrc_centric_layout import VERS_CONT_LABEL
 from lakesuperior.toolbox import Toolbox
 
@@ -233,7 +234,7 @@ class Ldpr(metaclass=ABCMeta):
         :param v: New set of triples to populate the IMR with.
         :type v: set or rdflib.Graph
         """
-        self._imr = Imr(self.uri, data=set(data))
+        self._imr = Graph(uri=self.uri, data=set(data))
 
 
     @imr.deleter
@@ -266,8 +267,8 @@ class Ldpr(metaclass=ABCMeta):
         """
         Set resource metadata.
         """
-        if not isinstance(rsrc, Imr):
-            raise TypeError('Provided metadata is not an Imr object.')
+        if not isinstance(rsrc, Graph):
+            raise TypeError('Provided metadata is not a Graph object.')
         self._metadata = rsrc
 
 
@@ -292,7 +293,7 @@ class Ldpr(metaclass=ABCMeta):
             ):
                 out_trp.add(t)
 
-        return Imr(uri = self.uri, data=out_trp)
+        return Graph(uri=self.uri, data=out_trp)
 
 
     @property
@@ -304,7 +305,7 @@ class Ldpr(metaclass=ABCMeta):
             try:
                 self._version_info = rdfly.get_version_info(self.uid)
             except ResourceNotExistsError as e:
-                self._version_info = Imr(uri=self.uri)
+                self._version_info = Graph(uri=self.uri)
 
         return self._version_info
 
@@ -582,7 +583,7 @@ class Ldpr(metaclass=ABCMeta):
 
         ver_gr = rdfly.get_imr(
             self.uid, ver_uid=ver_uid, incl_children=False)
-        self.provided_imr = Imr(uri=self.uri)
+        self.provided_imr = Graph(uri=self.uri)
 
         for t in ver_gr:
             if not self._is_trp_managed(t):
@@ -675,15 +676,15 @@ class Ldpr(metaclass=ABCMeta):
         qry_str = (
                 re.sub('<#([^>]+)>', '<{}#\\1>'.format(self.uri), qry_str)
                 .replace('<>', '<{}>'.format(self.uri)))
-        pre_gr = self.imr.as_rdflib().graph
-        post_gr = Graph(identifier=self.uri)
+        pre_gr = self.imr.as_rdflib()
+        post_gr = rdflib.Graph(identifier=self.uri)
         post_gr |= pre_gr
 
         post_gr.update(qry_str)
 
         # FIXME Fix and  use SimpleGraph's native subtraction operation.
-        remove_gr = self.check_mgd_terms(SimpleGraph(set(pre_gr - post_gr)))
-        add_gr = self.check_mgd_terms(SimpleGraph(set(post_gr - pre_gr)))
+        remove_gr = self.check_mgd_terms(Graph(data=set(pre_gr - post_gr)))
+        add_gr = self.check_mgd_terms(Graph(data=set(post_gr - pre_gr)))
 
         return remove_gr, add_gr
 
@@ -895,8 +896,9 @@ class Ldpr(metaclass=ABCMeta):
         # Only update parent if the resource is new.
         if create:
             add_gr = Graph()
-            add_gr.add(
-                (nsc['fcres'][parent_uid], nsc['ldp'].contains, self.uri))
+            add_gr.add({
+                (nsc['fcres'][parent_uid], nsc['ldp'].contains, self.uri)
+            })
             parent_rsrc.modify(RES_UPDATED, add_trp=add_gr)
 
         # Direct or indirect container relationship.

+ 0 - 0
lakesuperior/model/graph/__init__.py → lakesuperior/model/rdf/__init__.pxd


+ 0 - 0
lakesuperior/model/rdf/__init__.py


+ 37 - 0
lakesuperior/model/rdf/graph.pxd

@@ -0,0 +1,37 @@
+from libc.stdint cimport uint32_t, uint64_t
+
+from cymem.cymem cimport Pool
+
+cimport lakesuperior.cy_include.collections as cc
+
+from lakesuperior.model.base cimport Key, TripleKey
+from lakesuperior.model.rdf.triple cimport BufferTriple
+from lakesuperior.model.structures.keyset cimport Keyset
+from lakesuperior.store.ldp_rs cimport lmdb_triplestore
+
+# Callback for an iterator.
+ctypedef void (*lookup_callback_fn_t)(
+    Graph gr, const TripleKey* spok_p, void* ctx
+)
+
+cdef class Graph:
+    cdef:
+        readonly lmdb_triplestore.LmdbTriplestore store
+        public Keyset keys
+        public object uri
+
+        cc.key_compare_ft term_cmp_fn
+        cc.key_compare_ft trp_cmp_fn
+
+        void _match_ptn_callback(
+            self, pattern, Graph gr, lookup_callback_fn_t callback_fn,
+            bint callback_cond=*, void* ctx=*
+        ) except *
+
+    cpdef Graph copy(self, str uri=*)
+    cpdef Graph empty_copy(self, str uri=*)
+    cpdef void set(self, tuple trp) except *
+
+
+cdef:
+    void add_trp_callback(Graph gr, const TripleKey* spok_p, void* ctx)

+ 613 - 0
lakesuperior/model/rdf/graph.pyx

@@ -0,0 +1,613 @@
+import logging
+
+import rdflib
+
+from lakesuperior import env
+
+from cpython.object cimport Py_LT, Py_EQ, Py_GT, Py_LE, Py_NE, Py_GE
+from libc.string cimport memcpy
+from libc.stdlib cimport free
+
+cimport lakesuperior.cy_include.collections as cc
+cimport lakesuperior.model.callbacks as cb
+cimport lakesuperior.model.structures.keyset as kset
+
+from lakesuperior.model.base cimport Key, TripleKey
+from lakesuperior.model.rdf cimport term
+from lakesuperior.model.rdf.triple cimport BufferTriple
+from lakesuperior.model.structures.hash cimport term_hash_seed32
+from lakesuperior.model.structures.keyset cimport Keyset
+
+logger = logging.getLogger(__name__)
+
+
+cdef class Graph:
+    """
+    Fast and simple implementation of a graph.
+
+    Most functions should mimic RDFLib's graph with less overhead. It uses
+    the same funny but functional slicing notation.
+
+    A Graph contains a :py:class:`lakesuperior.model.structures.keyset.Keyset`
+    at its core and is bound to a
+    :py:class:`~lakesuperior.store.ldp_rs.lmdb_triplestore.LmdbTriplestore`.
+    This makes lookups and boolean operations very efficient because all these
+    operations are performed on an array of integers.
+
+    In order to retrieve RDF values from a ``Graph``, the underlying store
+    must be looked up. This can be done in a different transaction than the
+    one used to create or otherwise manipulate the graph.
+
+    Every time a term is looked up or added to even a temporary graph, that
+    term is added to the store and creates a key. This is because in the
+    majority of cases that term is likely to be stored permanently anyway, and
+    it's more efficient to hash it and allocate it immediately. A cleanup
+    function to remove all orphaned terms (not in any triple or context index)
+    can be later devised to compact the database.
+
+    An instance of this class can also be converted to a ``rdflib.Graph``
+    instance.
+    """
+
+    def __cinit__(
+        self, store=None, size_t capacity=0, uri=None, set data=set()
+    ):
+        """
+        Initialize the graph, optionally from Python/RDFlib data.
+
+        When initializing a non-empty Graph, a store transaction must be
+        opened::
+
+            >>> from rdflib import URIRef
+            >>> from lakesuperior import env_setup, env
+            >>> store = env.app_globals.rdf_store
+            >>> # Or alternatively:
+            >>> # from lakesuperior.store.ldp_rs.lmdb_store import LmdbStore
+            >>> # store = LmdbStore('/tmp/test')
+            >>> trp = {(URIRef('urn:s:0'), URIRef('urn:p:0'), URIRef('urn:o:0'))}
+            >>> with store.txn_ctx():
+            >>>     gr = Graph(store, data=trp)
+
+        Similarly, any operation such as adding, changing or looking up triples
+        needs a store transaction.
+
+        Note that, even though any operation may involve adding new terms to
+        the store, a read-only transaction is sufficient. Lakesuperior will
+        open a write transaction automatically only if necessary and only for
+        the time needed to enter the new terms.
+
+        :type store: lakesuperior.store.ldp_rs.lmdb_triplestore.LmdbTriplestore
+        :param store: Triplestore where keys are mapped to terms. By default
+            this is the default application store
+            (``env.app_globals.rdf_store``).
+
+        :param size_t capacity: Initial number of allocated triples.
+
+        :param str uri: If specified, the graph becomes a named graph and can
+            utilize the :py:meth:`value()` method and special slicing notation.
+
+        :param set data: If specified, ``capacity`` is ignored and an initial key
+            set is created from a set of 3-tuples of :py:class:``rdflib.Term``
+            instances.
+        """
+        self.uri = rdflib.URIRef(uri) if uri else None
+
+        self.store = store if store is not None else env.app_globals.rdf_store
+        #logger.debug(f'Assigned store at {self.store.env_path}')
+
+        # Initialize empty data set.
+        if data:
+            # Populate with provided Python set.
+            self.keys = Keyset(len(data))
+            self.add(data)
+        else:
+            self.keys = Keyset(capacity)
+
+
+    ## PROPERTIES ##
+
+    property data:
+        def __get__(self):
+            """
+            Triple data as a Python/RDFlib set.
+
+            :rtype: set
+            """
+            cdef TripleKey spok
+
+            ret = set()
+
+            self.keys.seek()
+            while self.keys.get_next(&spok):
+                ret.add((
+                    self.store.from_key(spok[0]),
+                    self.store.from_key(spok[1]),
+                    self.store.from_key(spok[2])
+                ))
+
+            return ret
+
+
+    property capacity:
+        def __get__(self):
+            """
+            Total capacity of the underlying Keyset, in number of triples.
+            """
+            return self.keys.capacity
+
+
+    property txn_ctx:
+        def __get__(self):
+            """ Expose underlying store's ``txn_ctx``. """
+            return self.store.txn_ctx
+
+
+    ## MAGIC METHODS ##
+
+    def __len__(self):
+        """ Number of triples in the graph. """
+        return self.keys.size()
+
+
+    def __richcmp__(self, other, int op):
+        """ Comparators between ``Graph`` instances. """
+        if op == Py_LT:
+            raise NotImplementedError()
+        elif op == Py_EQ:
+            return len(self ^ other) == 0
+        elif op == Py_GT:
+            raise NotImplementedError()
+        elif op == Py_LE:
+            raise NotImplementedError()
+        elif op == Py_NE:
+            return len(self ^ other) != 0
+        elif op == Py_GE:
+            raise NotImplementedError()
+
+
+    def __repr__(self):
+        """
+        String representation of the graph.
+
+        This includes the subject URI, number of triples contained and the
+        memory address of the instance.
+        """
+        uri_repr = f', uri={self.uri}' if self.uri else ''
+        return (
+            f'<{self.__class__.__module__}.{self.__class__.__qualname__} '
+            f'@0x{id(self):02x} length={len(self)}{uri_repr}>'
+        )
+
+
+    def __str__(self):
+        """ String dump of the graph triples. """
+        return str(self.data)
+
+
+    def __add__(self, other):
+        """ Alias for set-theoretical union. """
+        return self.__or__(other)
+
+
+    def __iadd__(self, other):
+        """ Alias for in-place set-theoretical union. """
+        return self.__ior__(other)
+
+
+    def __sub__(self, other):
+        """ Set-theoretical subtraction. """
+        cdef Graph gr3 = self.empty_copy()
+
+        gr3.keys = kset.subtract(self.keys, other.keys)
+
+        return gr3
+
+
+    def __isub__(self, other):
+        """ In-place set-theoretical subtraction. """
+        self.keys = kset.subtract(self.keys, other.keys)
+
+        return self
+
+    def __and__(self, other):
+        """ Set-theoretical intersection. """
+        cdef Graph gr3 = self.empty_copy()
+
+        gr3.keys = kset.intersect(self.keys, other.keys)
+
+        return gr3
+
+
+    def __iand__(self, other):
+        """ In-place set-theoretical intersection. """
+        self.keys = kset.intersect(self.keys, other.keys)
+
+        return self
+
+
+    def __or__(self, other):
+        """ Set-theoretical union. """
+        cdef Graph gr3 = self.empty_copy()
+
+        gr3.keys = kset.merge(self.keys, other.keys)
+
+        return gr3
+
+
+    def __ior__(self, other):
+        """ In-place set-theoretical union. """
+        self.keys = kset.merge(self.keys, other.keys)
+
+        return self
+
+
+    def __xor__(self, other):
+        """ Set-theoretical exclusive disjunction (XOR). """
+        cdef Graph gr3 = self.empty_copy()
+
+        gr3.keys = kset.xor(self.keys, other.keys)
+
+        return gr3
+
+
+    def __ixor__(self, other):
+        """ In-place set-theoretical exclusive disjunction (XOR). """
+        self.keys = kset.xor(self.keys, other.keys)
+
+        return self
+
+
+    def __contains__(self, trp):
+        """
+        Whether the graph contains a triple.
+
+        :rtype: boolean
+        """
+        cdef TripleKey spok
+
+        spok = [
+            self.store.to_key(trp[0]),
+            self.store.to_key(trp[1]),
+            self.store.to_key(trp[2]),
+        ]
+
+        return self.keys.contains(&spok)
+
+
+    def __iter__(self):
+        """ Graph iterator. It iterates over the set triples. """
+        yield from self.data
+
+
+    # Slicing.
+
+    def __getitem__(self, item):
+        """
+        Slicing function.
+
+        It behaves similarly to `RDFLib graph slicing
+        <https://rdflib.readthedocs.io/en/stable/utilities.html#slicing-graphs>`__
+        """
+        if isinstance(item, slice):
+            s, p, o = item.start, item.stop, item.step
+            return self._slice(s, p, o)
+        elif self.uri and isinstance(item, rdflib.term.Identifier):
+            # If a Node is given, return all values for that predicate.
+            return self._slice(self.uri, item, None)
+        else:
+            raise TypeError(f'Wrong slice format: {item}.')
+
+
+    def __hash__(self):
+        """ TODO Not that great of a hash. """
+        return id(self)
+
+
+    ## BASIC PYTHON-ACCESSIBLE SET OPERATIONS ##
+
+    def value(self, p, strict=False):
+        """
+        Get an individual value.
+
+        :param rdflib.termNode p: Predicate to search for.
+        :param bool strict: If set to ``True`` the method raises an error if
+            more than one value is found. If ``False`` (the default) only
+            the first found result is returned.
+        :rtype: rdflib.term.Node
+        """
+        if not self.uri:
+            raise ValueError('Cannot use `value` on a non-named graph.')
+
+        # TODO use slice.
+        values = {trp[2] for trp in self.lookup((self.uri, p, None))}
+
+        if strict and len(values) > 1:
+            raise RuntimeError('More than one value found for {}, {}.'.format(
+                    self.uri, p))
+
+        for ret in values:
+            return ret
+
+        return None
+
+
+    def terms_by_type(self, type):
+        """
+        Get all terms of a type: subject, predicate or object.
+
+        :param str type: One of ``s``, ``p`` or ``o``.
+        """
+        i = 'spo'.index(type)
+        return {r[i] for r in self.data}
+
+
+    def add(self, triples):
+        """
+        Add triples to the graph.
+
+        This method checks for duplicates.
+
+        :param iterable triples: iterable of 3-tuple triples.
+        """
+        cdef:
+            TripleKey spok
+
+        for s, p, o in triples:
+            #logger.info(f'Adding {s} {p} {o} to store: {self.store}')
+            spok = [
+                self.store.to_key(s),
+                self.store.to_key(p),
+                self.store.to_key(o),
+            ]
+
+            self.keys.add(&spok, True)
+
+
+    def remove(self, pattern):
+        """
+        Remove triples by pattern.
+
+        The pattern used is similar to :py:meth:`LmdbTripleStore.delete`.
+        """
+        # create an empty copy of the current object.
+        new_gr = self.empty_copy()
+
+        # Reverse lookup: only triples not matching the pattern are added to
+        # the new set.
+        self._match_ptn_callback(
+            pattern, new_gr, add_trp_callback, False
+        )
+
+        # Replace the keyset.
+        self.keys = new_gr.keys
+
+
+    ## CYTHON-ACCESSIBLE BASIC METHODS ##
+
+    cpdef Graph copy(self, str uri=None):
+        """
+        Create copy of the graph with a different (or no) URI.
+
+        :param str uri: URI of the new graph. This should be different from
+            the original.
+        """
+        cdef Graph new_gr = Graph(self.store, self.capacity, uri=uri)
+
+        new_gr.keys = self.keys.copy()
+
+        return new_gr
+
+
+    cpdef Graph empty_copy(self, str uri=None):
+        """
+        Create an empty copy with same capacity and store binding.
+
+        :param str uri: URI of the new graph. This should be different from
+            the original.
+        """
+        return Graph(self.store, self.capacity, uri=uri)
+
+
+    cpdef void set(self, tuple trp) except *:
+        """
+        Set a single value for subject and predicate.
+
+        Remove all triples matching ``s`` and ``p`` before adding ``s p o``.
+        """
+        if None in trp:
+            raise ValueError(f'Invalid triple: {trp}')
+        self.remove((trp[0], trp[1], None))
+        self.add((trp,))
+
+
+    def as_rdflib(self):
+        """
+        Return the data set as an RDFLib Graph.
+
+        :rtype: rdflib.Graph
+        """
+        gr = rdflib.Graph(identifier=self.uri)
+        for trp in self.data:
+            gr.add(trp)
+
+        return gr
+
+
+    def _slice(self, s, p, o):
+        """
+        Return terms filtered by other terms.
+
+        This behaves like the rdflib.Graph slicing policy.
+        """
+        #logger.info(f'Slicing: {s} {p} {o}')
+        # If no terms are unbound, check for containment.
+        if s is not None and p is not None and o is not None: # s p o
+            return (s, p, o) in self
+
+        # If some terms are unbound, do a lookup.
+        res = self.lookup((s, p, o))
+        #logger.info(f'Slicing results: {res}')
+        if s is not None:
+            if p is not None: # s p ?
+                return {r[2] for r in res}
+
+            if o is not None: # s ? o
+                return {r[1] for r in res}
+
+            # s ? ?
+            return {(r[1], r[2]) for r in res}
+
+        if p is not None:
+            if o is not None: # ? p o
+                return {r[0] for r in res}
+
+            # ? p ?
+            return {(r[0], r[2]) for r in res}
+
+        if o is not None: # ? ? o
+            return {(r[0], r[1]) for r in res}
+
+        # ? ? ?
+        return res
+
+
+    def lookup(self, pattern):
+        """
+        Look up triples by a pattern.
+
+        This function converts RDFLib terms into the serialized format stored
+        in the graph's internal structure and compares them bytewise.
+
+        Any and all of the lookup terms msy be ``None``.
+
+        :rtype: Graph
+        "return: New Graph instance with matching triples.
+        """
+        cdef:
+            Graph res_gr = self.empty_copy()
+
+        self._match_ptn_callback(pattern, res_gr, add_trp_callback)
+        res_gr.keys.resize()
+
+        return res_gr
+
+
+    cdef void _match_ptn_callback(
+        self, pattern, Graph gr, lookup_callback_fn_t callback_fn,
+        bint callback_cond=True, void* ctx=NULL
+    ) except *:
+        """
+        Execute an arbitrary function on a list of triples matching a pattern.
+
+        The arbitrary function is applied to each triple found in the current
+        graph, and to a discrete graph that can be the current graph itself
+        or a different one.
+
+        :param tuple pattern: A 3-tuple of rdflib terms or None.
+        :param Graph gr: The graph instance to apply the callback function to.
+        :param lookup_callback_fn_t callback_fn: A callback function to be
+            applied to the target graph using the matching triples.
+        :param bint callback_cond: Whether to apply the callback function if
+            a match is found (``True``) or if it is not found (``False``).
+        :param void* ctx: Pointer to an arbitrary object that can be used by
+            the callback function.
+        """
+        cdef:
+            kset.key_cmp_fn_t cmp_fn
+            Key k1, k2, k3
+            TripleKey spok
+
+        s, p, o = pattern
+
+        #logger.info(f'Match Callback pattern: {pattern}')
+
+        self.keys.seek()
+        # Decide comparison logic outside the loop.
+        if all(pattern):
+            if callback_cond:
+                # Shortcut for 3-term match—only if callback_cond is True.
+                spok = [
+                    self.store.to_key(s),
+                    self.store.to_key(p),
+                    self.store.to_key(o),
+                ]
+                if self.keys.contains(&spok):
+                    callback_fn(gr, &spok, ctx)
+            else:
+                # For negative condition (i.e. "apply this function to all keys
+                # except the matching one"), the whole set must be scanned.
+                #logger.info('All terms bound and negative condition.')
+                k1 = self.store.to_key(s)
+                k2 = self.store.to_key(p)
+                k3 = self.store.to_key(o)
+                #logger.info(f'Keys to match: {k1} {k2} {k3}')
+                while self.keys.get_next(&spok):
+                    #logger.info(f'Verifying spok: {spok}')
+                    if k1 != spok[0] or k2 != spok[1] or k3 != spok[2]:
+                        #logger.info(f'Calling function for spok: {spok}')
+                        callback_fn(gr, &spok, ctx)
+            return
+
+        if s is not None:
+            k1 = self.store.to_key(s)
+            if p is not None:
+                k2 = self.store.to_key(p)
+                cmp_fn = cb.lookup_skpk_cmp_fn
+            elif o is not None:
+                k2 = self.store.to_key(o)
+                cmp_fn = cb.lookup_skok_cmp_fn
+            else:
+                cmp_fn = cb.lookup_sk_cmp_fn
+        elif p is not None:
+            k1 = self.store.to_key(p)
+            if o is not None:
+                k2 = self.store.to_key(o)
+                cmp_fn = cb.lookup_pkok_cmp_fn
+            else:
+                cmp_fn = cb.lookup_pk_cmp_fn
+        elif o is not None:
+            k1 = self.store.to_key(o)
+            cmp_fn = cb.lookup_ok_cmp_fn
+        else:
+            cmp_fn = cb.lookup_none_cmp_fn
+
+        # Iterate over serialized triples.
+        while self.keys.get_next(&spok):
+            if cmp_fn(&spok, k1, k2) == callback_cond:
+                callback_fn(gr, &spok, ctx)
+
+
+
+## FACTORY METHODS
+
+def from_rdf(store=None, uri=None, *args, **kwargs):
+    """
+    Create a Graph from a serialized RDF string.
+
+    This factory function takes the same arguments as
+    :py:meth:`rdflib.Graph.parse`.
+
+    :param store: see :py:meth:`Graph.__cinit__`.
+
+    :param uri: see :py:meth:`Graph.__cinit__`.
+
+    :param *args: Positional arguments passed to RDFlib's ``parse``.
+
+    :param *kwargs: Keyword arguments passed to RDFlib's ``parse``.
+
+    :rtype: Graph
+    """
+    gr = rdflib.Graph().parse(*args, **kwargs)
+
+    return Graph(store=store, uri=uri, data={*gr})
+
+
+## LOOKUP CALLBACK FUNCTIONS
+
+cdef inline void add_trp_callback(
+    Graph gr, const TripleKey* spok_p, void* ctx
+):
+    """
+    Add a triple to a graph as a result of a lookup callback.
+    """
+    gr.keys.add(spok_p)

+ 0 - 0
lakesuperior/model/graph/term.pxd → lakesuperior/model/rdf/term.pxd


+ 8 - 6
lakesuperior/model/graph/term.pyx → lakesuperior/model/rdf/term.pyx

@@ -139,7 +139,9 @@ cdef int serialize_from_rdflib(
         elif isinstance(term_obj, BNode):
             _term.type = LSUP_TERM_TYPE_BNODE
         else:
-            raise ValueError(f'Unsupported term type: {type(term_obj)}')
+            raise ValueError(
+                f'Unsupported term type: {term_obj} {type(term_obj)}'
+            )
 
     serialize(&_term, data, pool)
 
@@ -148,17 +150,17 @@ cdef object to_rdflib(const Term *term):
     """
     Return an RDFLib term.
     """
-    cdef str data = (<bytes>term[0].data).decode()
+    cdef str data = (<bytes>term.data).decode()
     if term[0].type == LSUP_TERM_TYPE_LITERAL:
         return Literal(
             data,
-            datatype=term[0].datatype if not term[0].lang else None,
-            lang=term[0].lang or None
+            datatype=term.datatype if not term.lang else None,
+            lang=term.lang or None
         )
     else:
-        if term[0].type == LSUP_TERM_TYPE_URIREF:
+        if term.type == LSUP_TERM_TYPE_URIREF:
             return URIRef(data)
-        elif term[0].type == LSUP_TERM_TYPE_BNODE:
+        elif term.type == LSUP_TERM_TYPE_BNODE:
             return BNode(data)
         else:
             raise IOError(f'Unknown term type code: {term[0].type}')

+ 1 - 1
lakesuperior/model/graph/triple.pxd → lakesuperior/model/rdf/triple.pxd

@@ -1,6 +1,6 @@
 #from lakesuperior.cy_include cimport cytpl as tpl
 from lakesuperior.model.base cimport Buffer
-from lakesuperior.model.graph.term cimport Term
+from lakesuperior.model.rdf.term cimport Term
 
 # Triple of Term structs.
 ctypedef struct Triple:

+ 0 - 0
lakesuperior/model/graph/triple.pyx → lakesuperior/model/rdf/triple.pyx


+ 0 - 21
lakesuperior/model/structures/callbacks.pxd

@@ -1,21 +0,0 @@
-from lakesuperior.model.base cimport Key, TripleKey
-
-cdef:
-    bint lookup_sk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    )
-    bint lookup_pk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    )
-    bint lookup_ok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    )
-    bint lookup_skpk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    )
-    bint lookup_skok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    )
-    bint lookup_pkok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    )

+ 0 - 33
lakesuperior/model/structures/callbacks.pyx

@@ -1,33 +0,0 @@
-from lakesuperior.model.base cimport Key, TripleKey
-
-cdef bint lookup_sk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
-    return spok[0] == k1
-
-cdef bint lookup_pk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
-    return spok[1] == k1
-
-cdef bint lookup_ok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
-    return spok[2] == k1
-
-cdef bint lookup_skpk_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
-    return spok[0] == k1 and spok[1] == k2
-
-cdef bint lookup_skok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
-    return spok[0] == k1 and spok[2] == k2
-
-cdef bint lookup_pkok_cmp_fn(
-        const TripleKey* spok, const Key* k1, const Key* k2
-    ):
-    return spok[1] == k1 and spok[2] == k2
-
-

+ 21 - 10
lakesuperior/model/structures/keyset.pxd

@@ -3,24 +3,35 @@ from lakesuperior.model.base cimport (
 )
 
 ctypedef bint (*key_cmp_fn_t)(
-    const TripleKey* spok, const Key* k1, const Key* k2
+    const TripleKey* spok, const Key k1, const Key k2
 )
 
 cdef class Keyset:
     cdef:
         TripleKey* data
-        size_t ct
-        size_t _cur # Index cursor used to look up values.
-        size_t _free_i # Index of next free slot.
+        size_t capacity
+        size_t cur # Index cursor used to look up values.
+        size_t free_i # Index of next free slot.
+        float expand_ratio # By how much storage is automatically expanded when
+                           # full. 1 means the size doubles, 0.5 a 50%
+                           # increase. 0 means that storage won't be
+                           # automatically expanded and adding above capacity
+                           # will raise an error.
 
         void seek(self, size_t idx=*)
+        size_t size(self)
         size_t tell(self)
-        bint get_at(self, size_t i, TripleKey* item)
         bint get_next(self, TripleKey* item)
-        void add(self, const TripleKey* val) except *
-        bint contains(self, const TripleKey* val)
+        void add(self, const TripleKey* val, bint check_dup=*) except *
+        void remove(self, const TripleKey* val) except *
+        bint contains(self, const TripleKey* val) nogil
         Keyset copy(self)
+        Keyset sparse_copy(self)
         void resize(self, size_t size=*) except *
-        Keyset lookup(
-            self, const Key* sk, const Key* pk, const Key* ok
-        )
+        Keyset lookup(self, const Key sk, const Key pk, const Key ok)
+
+cdef:
+    Keyset merge(Keyset ks1, Keyset ks2)
+    Keyset subtract(Keyset ks1, Keyset ks2)
+    Keyset intersect(Keyset ks1, Keyset ks2)
+    Keyset xor(Keyset ks1, Keyset ks2)

+ 199 - 56
lakesuperior/model/structures/keyset.pyx

@@ -2,10 +2,11 @@ import logging
 
 from libc.string cimport memcmp, memcpy
 from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
+from cython.parallel import prange
 
-cimport lakesuperior.model.structures.callbacks as cb
+cimport lakesuperior.model.callbacks as cb
 
-from lakesuperior.model.base cimport TripleKey, TRP_KLEN
+from lakesuperior.model.base cimport NULL_TRP, TRP_KLEN, TripleKey
 
 
 logger = logging.getLogger(__name__)
@@ -13,22 +14,32 @@ logger = logging.getLogger(__name__)
 
 cdef class Keyset:
     """
-    Pre-allocated array (not set, as the name may suggest) of ``TripleKey``s.
+    Pre-allocated set of ``TripleKey``s.
+
+    The set is not checked for duplicates all the time: e.g., when creating
+    from a single set of triples coming from the store, the duplicate check
+    is turned off for efficiency. When merging with other sets, duplicate
+    checking should be turned on.
+
+    Since this class is based on a contiguous block of memory, it is best to
+    do very little manipulation. Several operations involve copying the whole
+    data block, so e.g. bulk removal and intersection are much more efficient
+    than individual record operations.
     """
-    def __cinit__(self, size_t ct=0):
+    def __cinit__(self, size_t capacity=0, expand_ratio=.5):
         """
         Initialize and allocate memory for the data set.
 
-        :param size_t ct: Number of elements to be accounted for.
+        :param size_t capacity: Number of elements to be accounted for.
         """
-        self.ct = ct
-        self.data = <TripleKey*>PyMem_Malloc(self.ct * TRP_KLEN)
-        logger.info(f'data address: 0x{<size_t>self.data:02x}')
-        if ct and not self.data:
+        self.capacity = capacity
+        self.expand_ratio = expand_ratio
+        self.data = <TripleKey*>PyMem_Malloc(self.capacity * TRP_KLEN)
+        if capacity and not self.data:
             raise MemoryError('Error allocating Keyset data.')
 
-        self._cur = 0
-        self._free_i = 0
+        self.cur = 0
+        self.free_i = 0
 
 
     def __dealloc__(self):
@@ -38,12 +49,7 @@ cdef class Keyset:
         This is called when the Python instance is garbage collected, which
         makes it handy to safely pass a Keyset instance across functions.
         """
-        #logger.debug(
-        #    'Releasing {0} ({1}x{2}) bytes of Keyset @ {3:x}...'.format(
-        #        self.size, self.conf.capacity, self.itemsize,
-        #        <unsigned long>self.data))
         PyMem_Free(self.data)
-        #logger.debug('...done releasing.')
 
 
     # Access methods.
@@ -52,34 +58,29 @@ cdef class Keyset:
         """
         Place the cursor at a certain index, 0 by default.
         """
-        self._cur = idx
+        self.cur = idx
 
 
-    cdef size_t tell(self):
+    cdef size_t size(self):
         """
-        Tell the position of the cursor in the keyset.
+        Size of the object as the number of occupied data slots.
+
+        Note that this is different from :py:data:`capacity`_, which indicates
+        the number of allocated items in memory.
         """
-        return self._cur
+        return self.free_i
 
 
-    cdef bint get_at(self, size_t i, TripleKey* item):
+    cdef size_t tell(self):
         """
-        Get an item at a given index position. Cython-level method.
-
-        :rtype: TripleKey
+        Tell the position of the cursor in the keyset.
         """
-        if i >= self._free_i:
-            return False
+        return self.cur
 
-        self._cur = i
-        item[0] = self.data[i]
 
-        return True
-
-
-    cdef bint get_next(self, TripleKey* item):
+    cdef inline bint get_next(self, TripleKey* val):
         """
-        Populate the current value and advance the cursor by 1.
+        Get the current value and advance the cursor by 1.
 
         :param void *val: Addres of value returned. It is NULL if
             the end of the buffer was reached.
@@ -88,36 +89,68 @@ cdef class Keyset:
         :return: True if a value was found, False if the end of the buffer
             has been reached.
         """
-        if self._cur >= self._free_i:
+        if self.cur >= self.free_i:
             return False
 
-        item[0] = self.data[self._cur]
-        self._cur += 1
+        val[0] = self.data[self.cur]
+        self.cur += 1
 
         return True
 
 
-    cdef void add(self, const TripleKey* val) except *:
+    cdef void add(self, const TripleKey* val, bint check_dup=False) except *:
         """
         Add a triple key to the array.
         """
-        if self._free_i >= self.ct:
-            raise MemoryError('No slots left in key set.')
+        # Check for deleted triples and optionally duplicates.
+        if val[0] == NULL_TRP or (check_dup and self.contains(val)):
+            return
 
-        self.data[self._free_i] = val[0]
+        if self.free_i >= self.capacity:
+            if self.expand_ratio > 0:
+                # In some edge casees, a very small ratio may round down to a
+                # zero increase, so the baseline increase is 1 element.
+                self.resize(1 + <size_t>(self.capacity * (1 + self.expand_ratio)))
+            else:
+                raise MemoryError('No space left in key set.')
 
-        self._free_i += 1
+        self.data[self.free_i] = val[0]
 
+        self.free_i += 1
 
-    cdef bint contains(self, const TripleKey* val):
+
+    cdef void remove(self, const TripleKey* val) except *:
         """
-        Whether a value exists in the set.
+        Remove a triple key.
+
+        This method replaces a triple with NULL_TRP if found. It
+        does not reclaim space. Therefore, if many removal operations are
+        forseen, using :py:meth:`subtract`_ is advised.
         """
-        cdef TripleKey stored_val
+        cdef:
+            TripleKey stored_val
 
         self.seek()
         while self.get_next(&stored_val):
+            #logger.info(f'Looking up for removal: {stored_val}')
             if memcmp(val, stored_val, TRP_KLEN) == 0:
+                memcpy(&stored_val, NULL_TRP, TRP_KLEN)
+                return
+
+
+    cdef bint contains(self, const TripleKey* val) nogil:
+        """
+        Whether a value exists in the set.
+        """
+        cdef size_t i
+
+        for i in range(self.free_i):
+            # o is least likely to match.
+            if (
+                val[0][2] == self.data[i][2] and
+                val[0][0] == self.data[i][0] and
+                val[0][1] == self.data[i][1]
+            ):
                 return True
         return False
 
@@ -126,9 +159,33 @@ cdef class Keyset:
         """
         Copy a Keyset.
         """
-        cdef Keyset new_ks = Keyset(self.ct)
-        memcpy(new_ks.data, self.data, self.ct * TRP_KLEN)
+        cdef Keyset new_ks = Keyset(
+            self.capacity, expand_ratio=self.expand_ratio
+        )
+        memcpy(new_ks.data, self.data, self.capacity * TRP_KLEN)
         new_ks.seek()
+        new_ks.free_i = self.free_i
+
+        return new_ks
+
+
+    cdef Keyset sparse_copy(self):
+        """
+        Copy a Keyset and plug holes.
+
+        ``NULL_TRP`` values left from removing triple keys are skipped in the
+        copy and the set is shrunk to its used size.
+        """
+        cdef:
+            TripleKey val
+            Keyset new_ks = Keyset(self.capacity, self.expand_ratio)
+
+        self.seek()
+        while self.get_next(&val):
+            if val != NULL_TRP:
+                new_ks.add(&val)
+
+        new_ks.resize()
 
         return new_ks
 
@@ -145,7 +202,7 @@ cdef class Keyset:
             to 0.
         """
         if not size:
-            size = self._free_i
+            size = self.free_i
 
         tmp = <TripleKey*>PyMem_Realloc(self.data, size * TRP_KLEN)
 
@@ -153,17 +210,15 @@ cdef class Keyset:
             raise MemoryError('Could not reallocate Keyset data.')
 
         self.data = tmp
-        self.ct = size
+        self.capacity = size
         self.seek()
 
 
-    cdef Keyset lookup(
-            self, const Key* sk, const Key* pk, const Key* ok
-    ):
+    cdef Keyset lookup(self, const Key sk, const Key pk, const Key ok):
         """
         Look up triple keys.
 
-        This works in a similar way that the ``SimpleGraph`` and ``LmdbStore``
+        This works in a similar way that the ``Graph`` and ``LmdbStore``
         methods work.
 
         Any and all the terms may be NULL. A NULL term is treated as unbound.
@@ -174,9 +229,8 @@ cdef class Keyset:
         """
         cdef:
             TripleKey spok
-            Keyset ret = Keyset(self.ct)
-            Key* k1 = NULL
-            Key* k2 = NULL
+            Keyset ret = Keyset(self.capacity)
+            Key k1, k2
             key_cmp_fn_t cmp_fn
 
         if sk and pk and ok: # s p o
@@ -213,9 +267,98 @@ cdef class Keyset:
 
         self.seek()
         while self.get_next(&spok):
-            if cmp_fn(<TripleKey*>spok, k1, k2):
+            if cmp_fn(&spok, k1, k2):
                 ret.add(&spok)
 
         ret.resize()
 
         return ret
+
+
+
+## Boolean operations.
+
+cdef Keyset merge(Keyset ks1, Keyset ks2):
+    """
+    Create a Keyset by merging an``ks2`` Keyset with the current one.
+
+    :rtype: Keyset
+    """
+    cdef:
+        TripleKey val
+        Keyset ks3 = ks1.copy()
+
+    ks2.seek()
+    while ks2.get_next(&val):
+        ks3.add(&val, True)
+
+    ks3.resize()
+
+    return ks3
+
+
+cdef Keyset subtract(Keyset ks1, Keyset ks2):
+    """
+    Create a Keyset by subtracting an``ks2`` Keyset from the current one.
+
+    :rtype: Keyset
+    """
+    cdef:
+        TripleKey val
+        Keyset ks3 = Keyset(ks1.capacity)
+
+    ks1.seek()
+    while ks1.get_next(&val):
+        if val != NULL_TRP and not ks2.contains(&val):
+            ks3.add(&val)
+
+    ks3.resize()
+
+    return ks3
+
+
+cdef Keyset intersect(Keyset ks1, Keyset ks2):
+    """
+    Create a Keyset by intersection with an``ks2`` Keyset.
+
+    :rtype: Keyset
+    """
+    cdef:
+        TripleKey val
+        Keyset ks3 = Keyset(ks1.capacity)
+
+    ks1.seek()
+    while ks1.get_next(&val):
+        if val != NULL_TRP and ks2.contains(&val):
+            ks3.add(&val)
+
+    ks3.resize()
+
+    return ks3
+
+
+cdef Keyset xor(Keyset ks1, Keyset ks2):
+    """
+    Create a Keyset by disjunction (XOR) with an``ks2`` Keyset.
+
+    :rtype: Keyset
+    """
+    cdef:
+        TripleKey val
+        Keyset ks3 = Keyset(ks1.capacity + ks2.capacity)
+
+    ks1.seek()
+    while ks1.get_next(&val):
+        if val != NULL_TRP and not ks2.contains(&val):
+            ks3.add(&val)
+
+    ks2.seek()
+    while ks2.get_next(&val):
+        if val != NULL_TRP and not ks1.contains(&val):
+            ks3.add(&val)
+
+    ks3.resize()
+
+    return ks3
+
+

+ 50 - 5
lakesuperior/store/base_lmdb_store.pyx

@@ -27,6 +27,12 @@ cdef void _check(int rc, str message='') except *:
         raise KeyNotFoundError()
     if rc == lmdb.MDB_KEYEXIST:
         raise KeyExistsError()
+    if rc == errno.EINVAL:
+        raise InvalidParamError(
+            'Invalid LMDB parameter error.\n'
+            'Please verify that a transaction is open and valid for the '
+            'current operation.'
+        )
     if rc != lmdb.MDB_SUCCESS:
         out_msg = (
                 message + '\nInternal error ({}): '.format(rc)
@@ -44,6 +50,9 @@ class KeyNotFoundError(LmdbError):
 class KeyExistsError(LmdbError):
     pass
 
+class InvalidParamError(LmdbError):
+    pass
+
 
 
 cdef class BaseLmdbStore:
@@ -335,22 +344,46 @@ cdef class BaseLmdbStore:
         """
         Transaction context manager.
 
+        Open and close a transaction for the duration of the functions in the
+        context. If a transaction has already been opened in the store, a new
+        one is opened only if the current transaction is read-only and the new
+        requested transaction is read-write.
+
+        If a new write transaction is opened, the old one is kept on hold until
+        the new transaction is closed, then restored. All cursors are
+        invalidated and must be restored as well if one needs to reuse them.
+
         :param bool write: Whether a write transaction is to be opened.
 
         :rtype: lmdb.Transaction
         """
+        cdef lmdb.MDB_txn* hold_txn
+
+        will_open = False
+
         if not self.is_open:
             raise LmdbError('Store is not open.')
 
+        # If another transaction is open, only open the new transaction if
+        # the current one is RO and the new one RW.
         if self.is_txn_open:
-            logger.debug(
-                    'Transaction is already active. Not opening another one.')
-            #logger.debug('before yield')
-            yield
-            #logger.debug('after yield')
+            if write:
+                will_open = not self.is_txn_rw
         else:
+            will_open = True
+
+        # If a new transaction needs to be opened and replace the old one,
+        # the old one must be put on hold and swapped out when the new txn
+        # is closed.
+        if will_open:
+            will_reset = self.is_txn_open
+
+        if will_open:
             #logger.debug('Beginning {} transaction.'.format(
             #    'RW' if write else 'RO'))
+            if will_reset:
+                hold_txn = self.txn
+
             try:
                 self._txn_begin(write=write)
                 self.is_txn_rw = write
@@ -359,9 +392,21 @@ cdef class BaseLmdbStore:
                 #logger.debug('In txn_ctx, after yield')
                 self._txn_commit()
                 #logger.debug('after _txn_commit')
+                if will_reset:
+                    lmdb.mdb_txn_reset(hold_txn)
+                    self.txn = hold_txn
+                    _check(lmdb.mdb_txn_renew(self.txn))
+                    self.is_txn_rw = False
             except:
                 self._txn_abort()
                 raise
+        else:
+            logger.info(
+                'Transaction is already active. Not opening another one.'
+            )
+            #logger.debug('before yield')
+            yield
+            #logger.debug('after yield')
 
 
     def begin(self, write=False):

+ 0 - 53
lakesuperior/store/ldp_rs/lmdb_store.py

@@ -199,56 +199,3 @@ class LmdbStore(LmdbTriplestore, Store):
 
 
     ## PRIVATE METHODS ##
-
-    def _normalize_context(self, context):
-        """
-        Normalize a context parameter to conform to the model expectations.
-
-        :param context: Context URI or graph.
-        :type context: URIRef or Graph or None
-        """
-        if isinstance(context, Graph):
-            if context == self or isinstance(context.identifier, Variable):
-                context = None
-            else:
-                context = context.identifier
-                #logger.debug('Converted graph into URI: {}'.format(context))
-
-        return context
-
-
-    ## Convenience methods—not necessary for functioning but useful for
-    ## debugging.
-
-    #def _keys_in_ctx(self, pk_ctx):
-    #    """
-    #    Convenience method to list all keys in a context.
-
-    #    :param bytes pk_ctx: Pickled context URI.
-
-    #    :rtype: Iterator(tuple)
-    #    :return: Generator of triples.
-    #    """
-    #    with self.cur('c:spo') as cur:
-    #        if cur.set_key(pk_ctx):
-    #            tkeys = cur.iternext_dup()
-    #            return {self._key_to_triple(tk) for tk in tkeys}
-    #        else:
-    #            return set()
-
-
-    #def _ctx_for_key(self, tkey):
-    #    """
-    #    Convenience method to list all contexts that a key is in.
-
-    #    :param bytes tkey: Triple key.
-
-    #    :rtype: Iterator(rdflib.URIRef)
-    #    :return: Generator of context URIs.
-    #    """
-    #    with self.cur('spo:c') as cur:
-    #        if cur.set_key(tkey):
-    #            ctx = cur.iternext_dup()
-    #            return {self._unpickle(c) for c in ctx}
-    #        else:
-    #            return set()

+ 11 - 21
lakesuperior/store/ldp_rs/lmdb_triplestore.pxd

@@ -1,11 +1,8 @@
 cimport lakesuperior.cy_include.collections as cc
 cimport lakesuperior.cy_include.cylmdb as lmdb
-cimport lakesuperior.cy_include.cytpl as tpl
 
-from lakesuperior.model.base cimport (
-    Key, DoubleKey, TripleKey, Buffer
-)
-from lakesuperior.model.graph.graph cimport SimpleGraph
+from lakesuperior.model.base cimport Key, DoubleKey, TripleKey, Buffer
+from lakesuperior.model.rdf.graph cimport Graph
 from lakesuperior.model.structures.keyset cimport Keyset
 from lakesuperior.store.base_lmdb_store cimport BaseLmdbStore
 
@@ -23,33 +20,26 @@ cdef:
 cdef class LmdbTriplestore(BaseLmdbStore):
     cpdef dict stats(self)
     cpdef size_t _len(self, context=*) except -1
-    cpdef add(self, triple, context=*, quoted=*)
-    cpdef add_graph(self, graph)
+    cpdef void add(self, triple, context=*, quoted=*) except *
+    cpdef void add_graph(self, graph) except *
     cpdef void _remove(self, tuple triple_pattern, context=*) except *
     cpdef void _remove_graph(self, object gr_uri) except *
     cpdef tuple all_namespaces(self)
-    cpdef SimpleGraph graph_lookup(
-        self, triple_pattern, context=*, uri=*, copy=*
-    )
+    cpdef Graph triple_keys(self, tuple triple_pattern, context=*, uri=*)
 
     cdef:
-        void _add_graph(self, Buffer* pk_gr) except *
         void _index_triple(self, int op, TripleKey spok) except *
-        Keyset triple_keys(self, tuple triple_pattern, context=*)
         void _all_term_keys(self, term_type, cc.HashSet** tkeys) except *
-        void lookup_term(self, const Key* tk, Buffer* data) except *
-        Keyset _lookup(self, tuple triple_pattern)
-        Keyset _lookup_1bound(self, unsigned char idx, Key luk)
-        Keyset _lookup_2bound(
+        void lookup_term(self, const Key tk, Buffer* data) except *
+        Graph _lookup(self, tuple triple_pattern)
+        Graph _lookup_1bound(self, unsigned char idx, Key luk)
+        Graph _lookup_2bound(
             self, unsigned char idx1, unsigned char idx2, DoubleKey tks
         )
         object from_key(self, const Key tk)
-        Key _to_key_idx(self, term) except -1
+        Key to_key(self, term) except? 0
         void all_contexts(self, Key** ctx, size_t* sz, triple=*) except *
         Key _append(
                 self, Buffer *value,
                 unsigned char *dblabel=*, lmdb.MDB_txn *txn=*,
-                unsigned int flags=*)
-
-        #Key bytes_to_idx(self, const unsigned char* bs)
-        #unsigned char* idx_to_bytes(Key idx)
+                unsigned int flags=*) except? 0

文件差异内容过多而无法显示
+ 150 - 315
lakesuperior/store/ldp_rs/lmdb_triplestore.pyx


+ 26 - 26
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -9,7 +9,7 @@ from urllib.parse import urldefrag
 
 import arrow
 
-from rdflib import Dataset, Graph, Literal, URIRef, plugin
+from rdflib import Dataset, Literal, URIRef, plugin
 from rdflib.compare import to_isomorphic
 from rdflib.namespace import RDF
 from rdflib.query import ResultException
@@ -24,7 +24,7 @@ from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
 from lakesuperior.globals import ROOT_RSRC_URI
 from lakesuperior.exceptions import (InvalidResourceError,
         ResourceNotExistsError, TombstoneError, PathSegmentError)
-from lakesuperior.model.graph.graph import SimpleGraph, Imr
+from lakesuperior.model.rdf.graph import Graph
 
 
 META_GR_URI = nsc['fcsystem']['meta']
@@ -217,14 +217,15 @@ class RsrcCentricLayout:
         fname = path.join(
                 basedir, 'data', 'bootstrap', 'rsrc_centric_layout.sparql')
         with store.txn_ctx(True):
+            #import pdb; pdb.set_trace()
             with open(fname, 'r') as f:
                 data = Template(f.read())
                 self.ds.update(data.substitute(timestamp=arrow.utcnow()))
-            #import pdb; pdb.set_trace()
+        with store.txn_ctx():
             imr = self.get_imr('/', incl_inbound=False, incl_children=True)
 
-        gr = Graph(identifier=imr.uri)
-        gr += imr.data
+        #gr = Graph(identifier=imr.uri)
+        #gr += imr.data
         #checksum = to_isomorphic(gr).graph_digest()
         #digest = sha256(str(checksum).encode('ascii')).digest()
 
@@ -250,9 +251,9 @@ class RsrcCentricLayout:
         :param rdflib.term.URIRef ctx: URI of the optional context. If None,
             all named graphs are queried.
 
-        :rtype: SimpleGraph
+        :rtype: Graph
         """
-        return self.store.graph_lookup((subject, None, None), ctx, copy=True)
+        return self.store.triple_keys((subject, None, None), ctx)
 
 
     def count_rsrc(self):
@@ -291,15 +292,17 @@ class RsrcCentricLayout:
         if not incl_children:
             contexts.remove(nsc['fcstruct'][uid])
 
-        imr = Imr(uri=nsc['fcres'][uid])
+        imr = Graph(self.store, uri=nsc['fcres'][uid])
 
         for ctx in contexts:
-            gr = self.store.graph_lookup((None, None, None), ctx, copy=True)
+            gr = self.store.triple_keys((None, None, None), ctx)
             imr |= gr
 
         # Include inbound relationships.
         if incl_inbound and len(imr):
-            gr = SimpleGraph({*self.get_inbound_rel(nsc['fcres'][uid])})
+            gr = Graph(
+                self.store, data={*self.get_inbound_rel(nsc['fcres'][uid])}
+            )
             imr |= gr
 
         if strict:
@@ -332,11 +335,10 @@ class RsrcCentricLayout:
         logger.debug('Getting metadata for: {}'.format(uid))
         if ver_uid:
             uid = self.snapshot_uid(uid, ver_uid)
-        imr = self.store.graph_lookup(
+        imr = self.store.triple_keys(
             (None, None, None),
             context=nsc['fcadmin'][uid],
-            uri=nsc['fcres'][uid],
-            copy=True
+            uri=nsc['fcres'][uid]
         )
 
         if strict:
@@ -356,11 +358,10 @@ class RsrcCentricLayout:
         # graph. If multiple user-provided graphs will be supported, this
         # should use another query to get all of them.
         uri = nsc['fcres'][uid]
-        userdata = self.store.graph_lookup(
+        userdata = self.store.triple_keys(
             (None, None, None),
             context=nsc['fcmain'][uid],
-            uri=uri,
-            copy=True
+            uri=uri
         )
 
         return userdata
@@ -371,27 +372,27 @@ class RsrcCentricLayout:
         Get all metadata about a resource's versions.
 
         :param string uid: Resource UID.
-        :rtype: SimpleGraph
+        :rtype: Graph
         """
         # **Note:** This pretty much bends the ontology—it replaces the graph
         # URI with the subject URI. But the concepts of data and metadata in
         # Fedora are quite fluid anyways...
 
-        vmeta = Imr(uri=nsc['fcres'][uid])
+        vmeta = Graph(self.store, uri=nsc['fcres'][uid])
 
         #Get version graphs proper.
-        for vtrp in self.store.graph_lookup(
+        for vtrp in self.store.triple_keys(
             (nsc['fcres'][uid], nsc['fcrepo'].hasVersion, None),
             nsc['fcadmin'][uid]
         ):
             # Add the hasVersion triple to the result graph.
             vmeta.add((vtrp,))
-            vmeta_gr = self.store.graph_lookup(
+            vmeta_gr = self.store.triple_keys(
                 (None, nsc['foaf'].primaryTopic, vtrp[2]), HIST_GR_URI
             )
             # Get triples in the meta graph filtering out undesired triples.
             for vmtrp in vmeta_gr:
-                for trp in self.store.graph_lookup(
+                for trp in self.store.triple_keys(
                     (vmtrp[0], None, None), HIST_GR_URI
                 ):
                     if (
@@ -418,7 +419,7 @@ class RsrcCentricLayout:
         :return: Inbound triples or subjects.
         """
         # Only return non-historic graphs.
-        # TODO self.store.graph_lookup?
+        # TODO self.store.triple_keys?
         meta_gr = self.ds.graph(META_GR_URI)
         ptopic_uri = nsc['foaf'].primaryTopic
 
@@ -444,7 +445,7 @@ class RsrcCentricLayout:
         ctx_uri = nsc['fcstruct'][uid]
         cont_p = nsc['ldp'].contains
         def _recurse(dset, s, c):
-            new_dset = self.store.graph_lookup(
+            new_dset = self.store.triple_keys(
                 (s, cont_p, None), c
             )[s : cont_p]
             #new_dset = set(ds.graph(c)[s : cont_p])
@@ -465,9 +466,8 @@ class RsrcCentricLayout:
             return _recurse(set(), subj_uri, ctx_uri)
         else:
             #return ds.graph(ctx_uri)[subj_uri : cont_p : ])
-            return self.store.graph_lookup(
-                (subj_uri, cont_p, None), ctx_uri,
-                copy=True
+            return self.store.triple_keys(
+                (subj_uri, cont_p, None), ctx_uri
             )[subj_uri : cont_p]
 
 

+ 7 - 1
lakesuperior/util/benchmark.py

@@ -15,6 +15,7 @@ from matplotlib import pyplot as plt
 
 from lakesuperior.util.generators import (
         random_image, random_graph, random_utf8_string)
+from lakesuperior.exceptions import ResourceNotExistsError
 
 __doc__ = '''
 Benchmark script to measure write performance.
@@ -95,6 +96,7 @@ def run(
         parent = '{}/{}'.format(endpoint.strip('/'), parent.strip('/'))
 
         if delete_container:
+            print('Removing previously existing container.')
             requests.delete(parent, headers={'prefer': 'no-tombstone'})
         requests.put(parent)
 
@@ -103,7 +105,11 @@ def run(
         from lakesuperior.api import resource as rsrc_api
 
         if delete_container:
-            rsrc_api.delete(parent, soft=False)
+            try:
+                print('Removing previously existing container.')
+                rsrc_api.delete(parent, soft=False)
+            except ResourceNotExistsError:
+                pass
         rsrc_api.create_or_replace(parent)
     else:
         raise ValueError(f'Mode not supported: {mode}')

+ 15 - 0
sandbox/NOTES

@@ -0,0 +1,15 @@
+Uses for a graph:
+
+1. Create a graph from RDF input, manipulate or evaluate it, and output it as
+  serialized RDF (always detached) [NO USE CASE]
+2. Create a graph from RDF input, optionally manipulate it with other data from
+  the store or external RDF and store it (start detached, then convert keys;
+  or, start attached)
+3. Retrieve a graph from the store, optionally manipulate it, and output it as
+  serialized RDF (start attached, then detach)
+4. Retrieve a graph from the store, manipulate it, and put the changed graph
+  back in the store (always attached)
+
+Initially we might try to render the graph read-only when detached; this
+avoids implementing more complex operations such as add, remove and booleans.
+

+ 10 - 0
sandbox/txn_openLogic.txt

@@ -0,0 +1,10 @@
+txn_open    write       txn_rw      Open?
+n           -           -           y
+y           n           -           n
+y           y           y           n
+y           y           n           y
+
+txn_open    Open    Reset?
+n           y       n
+y           y       y
+

+ 31 - 17
setup.py

@@ -82,6 +82,17 @@ extensions = [
             path.join('lakesuperior', 'model', f'base.{ext}'),
         ],
         include_dirs=include_dirs,
+        extra_compile_args=['-fopenmp', '-g'],
+        extra_link_args=['-fopenmp', '-g']
+    ),
+    Extension(
+        'lakesuperior.model.callbacks',
+        [
+            path.join('lakesuperior', 'model', f'callbacks.{ext}'),
+        ],
+        include_dirs=include_dirs,
+        extra_compile_args=['-g'],
+        extra_link_args=['-g'],
         #extra_compile_args=['-fopenmp'],
         #extra_link_args=['-fopenmp']
     ),
@@ -96,39 +107,41 @@ extensions = [
             path.join('lakesuperior', 'model', 'structures', f'*.{ext}'),
         ],
         include_dirs=include_dirs,
-        #extra_compile_args=['-fopenmp'],
-        #extra_link_args=['-fopenmp']
+        extra_compile_args=['-fopenmp', '-g'],
+        extra_link_args=['-fopenmp', '-g']
     ),
     Extension(
-        'lakesuperior.model.graph.*',
+        'lakesuperior.store.base_lmdb_store',
         [
-            path.join(tpl_src_dir, 'tpl.c'),
-            path.join(spookyhash_src_dir, 'context.c'),
-            path.join(spookyhash_src_dir, 'globals.c'),
-            path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'array.c'),
             path.join(coll_src_dir, 'hashtable.c'),
             path.join(coll_src_dir, 'hashset.c'),
-            path.join('lakesuperior', 'model', 'graph', f'*.{ext}'),
+            path.join(tpl_src_dir, 'tpl.c'),
+            path.join(lmdb_src_dir, 'mdb.c'),
+            path.join(lmdb_src_dir, 'midl.c'),
+            path.join('lakesuperior', 'store', f'base_lmdb_store.{ext}'),
         ],
         include_dirs=include_dirs,
-        extra_compile_args=['-fopenmp'],
-        extra_link_args=['-fopenmp']
+        extra_compile_args=['-g'],
+        extra_link_args=['-g'],
     ),
     Extension(
-        'lakesuperior.store.base_lmdb_store',
+        'lakesuperior.model.rdf.*',
         [
+            path.join(tpl_src_dir, 'tpl.c'),
+            path.join(spookyhash_src_dir, 'context.c'),
+            path.join(spookyhash_src_dir, 'globals.c'),
+            path.join(spookyhash_src_dir, 'spookyhash.c'),
             path.join(coll_src_dir, 'common.c'),
             path.join(coll_src_dir, 'array.c'),
             path.join(coll_src_dir, 'hashtable.c'),
             path.join(coll_src_dir, 'hashset.c'),
-            path.join(tpl_src_dir, 'tpl.c'),
-            path.join(lmdb_src_dir, 'mdb.c'),
-            path.join(lmdb_src_dir, 'midl.c'),
-            path.join('lakesuperior', 'store', f'base_lmdb_store.{ext}'),
+            path.join('lakesuperior', 'model', 'rdf', f'*.{ext}'),
         ],
         include_dirs=include_dirs,
+        #extra_compile_args=['-fopenmp'],
+        #extra_link_args=['-fopenmp']
     ),
     Extension(
         'lakesuperior.store.ldp_rs.lmdb_triplestore',
@@ -143,8 +156,8 @@ extensions = [
                 'lakesuperior', 'store', 'ldp_rs', f'lmdb_triplestore.{ext}'),
         ],
         include_dirs=include_dirs,
-        extra_compile_args=['-fopenmp'],
-        extra_link_args=['-fopenmp']
+        extra_compile_args=['-g', '-fopenmp'],
+        extra_link_args=['-g', '-fopenmp']
     ),
 ]
 
@@ -179,6 +192,7 @@ if USE_CYTHON:
             'boundscheck': False,
             'wraparound': False,
             'profile': True,
+            'embedsignature': True
         }
     )
 

+ 850 - 0
tests/0_data_structures/test_0_0_graph.py

@@ -0,0 +1,850 @@
+import pdb
+import pytest
+
+from shutil import rmtree
+
+from rdflib import Graph, Namespace, URIRef
+
+from lakesuperior.model.rdf.graph import Graph
+from lakesuperior.store.ldp_rs.lmdb_store import LmdbStore
+
+
+@pytest.fixture(scope='class')
+def store():
+    """
+    Test LMDB store.
+
+    This store has a different life cycle than the one used for tests in higher
+    levels of the stack and is not bootstrapped (i.e. starts completely empty).
+    """
+    env_path = '/tmp/test_lmdbstore'
+    # Remove previous test DBs
+    rmtree(env_path, ignore_errors=True)
+    store = LmdbStore(env_path)
+    yield store
+    store.close()
+    store.destroy()
+
+
+@pytest.fixture(scope='class')
+def trp():
+    return (
+        (URIRef('urn:s:0'), URIRef('urn:p:0'), URIRef('urn:o:0')),
+        # Exact same as [0].
+        (URIRef('urn:s:0'), URIRef('urn:p:0'), URIRef('urn:o:0')),
+        # NOTE: s and o are in reversed order.
+        (URIRef('urn:o:0'), URIRef('urn:p:0'), URIRef('urn:s:0')),
+        (URIRef('urn:s:0'), URIRef('urn:p:1'), URIRef('urn:o:0')),
+        (URIRef('urn:s:0'), URIRef('urn:p:1'), URIRef('urn:o:1')),
+        (URIRef('urn:s:1'), URIRef('urn:p:1'), URIRef('urn:o:1')),
+        (URIRef('urn:s:1'), URIRef('urn:p:2'), URIRef('urn:o:2')),
+    )
+
+@pytest.mark.usefixtures('trp')
+@pytest.mark.usefixtures('store')
+class TestGraphInit:
+    """
+    Test initialization of graphs with different base data sets.
+    """
+    def test_empty(self, store):
+        """
+        Test creation of an empty graph.
+        """
+        # No transaction needed to init an empty graph.
+        gr = Graph(store)
+
+        # len() should not need a DB transaction open.
+        assert len(gr) == 0
+
+
+    def test_init_triples(self, trp, store):
+        """
+        Test creation using a Python set.
+        """
+        with store.txn_ctx():
+            gr = Graph(store, data=set(trp))
+
+            assert len(gr) == 6
+
+            for t in trp:
+                assert t in gr
+
+
+@pytest.mark.usefixtures('trp')
+@pytest.mark.usefixtures('store')
+class TestGraphLookup:
+    """
+    Test triple lookup.
+    """
+
+    def test_lookup_all_unbound(self, trp, store):
+        """
+        Test lookup ? ? ? (all unbound)
+        """
+        with store.txn_ctx():
+            gr = Graph(store, data=set(trp))
+
+            flt_gr = gr.lookup((None, None, None))
+
+            assert len(flt_gr) == 6
+
+            assert trp[0] in flt_gr
+            assert trp[2] in flt_gr
+            assert trp[3] in flt_gr
+            assert trp[4] in flt_gr
+            assert trp[5] in flt_gr
+            assert trp[6] in flt_gr
+
+
+    def test_lookup_s(self, trp, store):
+        """
+        Test lookup s ? ?
+        """
+        with store.txn_ctx():
+            gr = Graph(store, data=set(trp))
+
+            flt_gr = gr.lookup((URIRef('urn:s:0'), None, None))
+
+            assert len(flt_gr) == 3
+
+            assert trp[0] in flt_gr
+            assert trp[3] in flt_gr
+            assert trp[4] in flt_gr
+
+            assert trp[2] not in flt_gr
+            assert trp[5] not in flt_gr
+            assert trp[6] not in flt_gr
+
+            # Test for empty results.
+            empty_flt_gr = gr.lookup((URIRef('urn:s:8'), None, None))
+
+            assert len(empty_flt_gr) == 0
+
+
+    def test_lookup_p(self, trp, store):
+        """
+        Test lookup ? p ?
+        """
+        with store.txn_ctx():
+            gr = Graph(store, data=set(trp))
+
+            flt_gr = gr.lookup((None, URIRef('urn:p:0'), None))
+
+            assert len(flt_gr) == 2
+
+            assert trp[0] in flt_gr
+            assert trp[2] in flt_gr
+
+            assert trp[3] not in flt_gr
+            assert trp[4] not in flt_gr
+            assert trp[5] not in flt_gr
+            assert trp[6] not in flt_gr
+
+            # Test for empty results.
+            empty_flt_gr = gr.lookup((None, URIRef('urn:p:8'), None))
+
+            assert len(empty_flt_gr) == 0
+
+
+    def test_lookup_o(self, trp, store):
+        """
+        Test lookup ? ? o
+        """
+        with store.txn_ctx():
+            gr = Graph(store, data=set(trp))
+
+            flt_gr = gr.lookup((None, None, URIRef('urn:o:1')))
+
+            assert len(flt_gr) == 2
+
+            assert trp[4] in flt_gr
+            assert trp[5] in flt_gr
+
+            assert trp[0] not in flt_gr
+            assert trp[2] not in flt_gr
+            assert trp[3] not in flt_gr
+            assert trp[6] not in flt_gr
+
+            # Test for empty results.
+            empty_flt_gr = gr.lookup((None, None, URIRef('urn:o:8')))
+
+            assert len(empty_flt_gr) == 0
+
+
+    def test_lookup_sp(self, trp, store):
+        """
+        Test lookup s p ?
+        """
+        with store.txn_ctx():
+            gr = Graph(store, data=set(trp))
+
+            flt_gr = gr.lookup((URIRef('urn:s:0'), URIRef('urn:p:1'), None))
+
+            assert len(flt_gr) == 2
+
+            assert trp[3] in flt_gr
+            assert trp[4] in flt_gr
+
+            assert trp[0] not in flt_gr
+            assert trp[2] not in flt_gr
+            assert trp[5] not in flt_gr
+            assert trp[6] not in flt_gr
+
+            # Test for empty results.
+            empty_flt_gr = gr.lookup((URIRef('urn:s:0'), URIRef('urn:p:2'), None))
+
+            assert len(empty_flt_gr) == 0
+
+
+    def test_lookup_so(self, trp, store):
+        """
+        Test lookup s ? o
+        """
+        with store.txn_ctx():
+            gr = Graph(store, data=set(trp))
+
+            flt_gr = gr.lookup((URIRef('urn:s:0'), None, URIRef('urn:o:0')))
+
+            assert len(flt_gr) == 2
+
+            assert trp[0] in flt_gr
+            assert trp[3] in flt_gr
+
+            assert trp[2] not in flt_gr
+            assert trp[4] not in flt_gr
+            assert trp[5] not in flt_gr
+            assert trp[6] not in flt_gr
+
+            # Test for empty results.
+            empty_flt_gr = gr.lookup((URIRef('urn:s:0'), None, URIRef('urn:o:2')))
+
+            assert len(empty_flt_gr) == 0
+
+
+    def test_lookup_po(self, trp, store):
+        """
+        Test lookup ? p o
+        """
+        with store.txn_ctx():
+            gr = Graph(store, data=set(trp))
+
+            flt_gr = gr.lookup((None, URIRef('urn:p:1'), URIRef('urn:o:1')))
+
+            assert len(flt_gr) == 2
+
+            assert trp[4] in flt_gr
+            assert trp[5] in flt_gr
+
+            assert trp[0] not in flt_gr
+            assert trp[2] not in flt_gr
+            assert trp[3] not in flt_gr
+            assert trp[6] not in flt_gr
+
+            # Test for empty results.
+            empty_flt_gr = gr.lookup((None, URIRef('urn:p:1'), URIRef('urn:o:2')))
+
+            assert len(empty_flt_gr) == 0
+
+
+    def test_lookup_spo(self, trp, store):
+        """
+        Test lookup s p o
+        """
+        with store.txn_ctx():
+            gr = Graph(store, data=set(trp))
+
+            flt_gr = gr.lookup(
+                (URIRef('urn:s:1'), URIRef('urn:p:1'), URIRef('urn:o:1'))
+            )
+
+            assert len(flt_gr) == 1
+
+            assert trp[5] in flt_gr
+
+            assert trp[0] not in flt_gr
+            assert trp[2] not in flt_gr
+            assert trp[3] not in flt_gr
+            assert trp[4] not in flt_gr
+            assert trp[6] not in flt_gr
+
+            # Test for empty results.
+            empty_flt_gr = gr.lookup(
+                (URIRef('urn:s:1'), URIRef('urn:p:1'), URIRef('urn:o:2'))
+            )
+
+            assert len(empty_flt_gr) == 0
+
+
+@pytest.mark.usefixtures('trp')
+@pytest.mark.usefixtures('store')
+class TestGraphSlicing:
+    """
+    Test triple lookup.
+    """
+    # TODO
+    pass
+
+
+
+@pytest.mark.usefixtures('trp')
+@pytest.mark.usefixtures('store')
+class TestGraphOps:
+    """
+    Test various graph operations.
+    """
+    def test_len(self, trp, store):
+        """
+        Test the length of a graph with and without duplicates.
+        """
+        with store.txn_ctx():
+            gr = Graph(store)
+            assert len(gr) == 0
+
+            gr.add((trp[0],))
+            assert len(gr) == 1
+
+            gr.add((trp[1],)) # Same values
+            assert len(gr) == 1
+
+            gr.add((trp[2],))
+            assert len(gr) == 2
+
+            gr.add(trp)
+            assert len(gr) == 6
+
+
+    def test_dup(self, trp, store):
+        """
+        Test operations with duplicate triples.
+        """
+        with store.txn_ctx():
+            gr = Graph(store)
+
+            gr.add((trp[0],))
+            assert trp[1] in gr
+            assert trp[2] not in gr
+
+
+    def test_remove(self, trp, store):
+        """
+        Test adding and removing triples.
+        """
+        with store.txn_ctx():
+            gr = Graph(store)
+
+            gr.add(trp)
+            gr.remove(trp[0])
+            assert len(gr) == 5
+            assert trp[0] not in gr
+            assert trp[1] not in gr
+
+            # This is the duplicate triple.
+            gr.remove(trp[1])
+            assert len(gr) == 5
+
+            # This is the triple in reverse order.
+            gr.remove(trp[2])
+            assert len(gr) == 4
+
+            gr.remove(trp[4])
+            assert len(gr) == 3
+
+
+    def test_union(self, trp, store):
+        """
+        Test graph union.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:3]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr3 = gr1 | gr2
+
+            assert len(gr3) == 5
+            assert trp[0] in gr3
+            assert trp[4] in gr3
+
+
+    def test_ip_union(self, trp, store):
+        """
+        Test graph in-place union.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:3]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr1 |= gr2
+
+            assert len(gr1) == 5
+            assert trp[0] in gr1
+            assert trp[4] in gr1
+
+
+    def test_addition(self, trp, store):
+        """
+        Test graph addition.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:3]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr3 = gr1 + gr2
+
+            assert len(gr3) == 5
+            assert trp[0] in gr3
+            assert trp[4] in gr3
+
+
+    def test_ip_addition(self, trp, store):
+        """
+        Test graph in-place addition.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:3]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr1 += gr2
+
+            assert len(gr1) == 5
+            assert trp[0] in gr1
+            assert trp[4] in gr1
+
+
+    def test_subtraction(self, trp, store):
+        """
+        Test graph addition.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:4]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr3 = gr1 - gr2
+
+            assert len(gr3) == 1
+            assert trp[0] in gr3
+            assert trp[1] in gr3
+            assert trp[2] not in gr3
+            assert trp[3] not in gr3
+            assert trp[4] not in gr3
+
+            gr3 = gr2 - gr1
+
+            assert len(gr3) == 2
+            assert trp[0] not in gr3
+            assert trp[1] not in gr3
+            assert trp[2] not in gr3
+            assert trp[3] not in gr3
+            assert trp[4] in gr3
+            assert trp[5] in gr3
+
+
+    def test_ip_subtraction(self, trp, store):
+        """
+        Test graph in-place addition.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:4]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr1 -= gr2
+
+            assert len(gr1) == 1
+            assert trp[0] in gr1
+            assert trp[1] in gr1
+            assert trp[2] not in gr1
+            assert trp[3] not in gr1
+            assert trp[4] not in gr1
+
+
+
+    def test_intersect(self, trp, store):
+        """
+        Test graph intersextion.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:4]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr3 = gr1 & gr2
+
+            assert len(gr3) == 2
+            assert trp[2] in gr3
+            assert trp[3] in gr3
+            assert trp[0] not in gr3
+            assert trp[5] not in gr3
+
+
+    def test_ip_intersect(self, trp, store):
+        """
+        Test graph intersextion.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:4]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr1 &= gr2
+
+            assert len(gr1) == 2
+            assert trp[2] in gr1
+            assert trp[3] in gr1
+            assert trp[0] not in gr1
+            assert trp[5] not in gr1
+
+
+    def test_xor(self, trp, store):
+        """
+        Test graph intersextion.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:4]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr3 = gr1 ^ gr2
+
+            assert len(gr3) == 3
+            assert trp[2] not in gr3
+            assert trp[3] not in gr3
+            assert trp[0] in gr3
+            assert trp[5] in gr3
+
+
+    def test_ip_xor(self, trp, store):
+        """
+        Test graph intersextion.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:4]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr1 ^= gr2
+
+            assert len(gr1) == 3
+            assert trp[2] not in gr1
+            assert trp[3] not in gr1
+            assert trp[0] in gr1
+            assert trp[5] in gr1
+
+
+
+@pytest.mark.usefixtures('trp')
+@pytest.mark.usefixtures('store')
+class TestNamedGraphOps:
+    """
+    Test various operations on a named graph.
+    """
+    def test_len(self, trp, store):
+        """
+        Test the length of a graph with and without duplicates.
+        """
+        imr = Graph(store, uri='http://example.edu/imr01')
+        assert len(imr) == 0
+
+        with store.txn_ctx():
+            imr.add((trp[0],))
+            assert len(imr) == 1
+
+            imr.add((trp[1],)) # Same values
+            assert len(imr) == 1
+
+            imr.add((trp[2],))
+            assert len(imr) == 2
+
+            imr.add(trp)
+            assert len(imr) == 6
+
+
+    def test_dup(self, trp, store):
+        """
+        Test operations with duplicate triples.
+        """
+        imr = Graph(store, uri='http://example.edu/imr01')
+
+        with store.txn_ctx():
+            imr.add((trp[0],))
+            assert trp[1] in imr
+            assert trp[2] not in imr
+
+
+    def test_remove(self, trp, store):
+        """
+        Test adding and removing triples.
+        """
+        with store.txn_ctx():
+            imr = Graph(store, uri='http://example.edu/imr01', data={*trp})
+
+            imr.remove(trp[0])
+            assert len(imr) == 5
+            assert trp[0] not in imr
+            assert trp[1] not in imr
+
+            # This is the duplicate triple.
+            imr.remove(trp[1])
+            assert len(imr) == 5
+
+            # This is the triple in reverse order.
+            imr.remove(trp[2])
+            assert len(imr) == 4
+
+            imr.remove(trp[4])
+            assert len(imr) == 3
+
+
+    def test_union(self, trp, store):
+        """
+        Test graph union.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:3]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr3 = gr1 | gr2
+
+            assert len(gr3) == 5
+            assert trp[0] in gr3
+            assert trp[4] in gr3
+
+            assert gr3.uri == None
+
+
+    def test_ip_union(self, trp, store):
+        """
+        Test graph in-place union.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:3]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr1 |= gr2
+
+            assert len(gr1) == 5
+            assert trp[0] in gr1
+            assert trp[4] in gr1
+
+            assert gr1.uri == URIRef('http://example.edu/imr01')
+
+
+    def test_addition(self, trp, store):
+        """
+        Test graph addition.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:3]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr3 = gr1 + gr2
+
+            assert len(gr3) == 5
+            assert trp[0] in gr3
+            assert trp[4] in gr3
+
+            assert gr3.uri == None
+
+
+    def test_ip_addition(self, trp, store):
+        """
+        Test graph in-place addition.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:3]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr1 += gr2
+
+            assert len(gr1) == 5
+            assert trp[0] in gr1
+            assert trp[4] in gr1
+
+            assert gr1.uri == URIRef('http://example.edu/imr01')
+
+
+    def test_subtraction(self, trp, store):
+        """
+        Test graph addition.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:4]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr3 = gr1 - gr2
+
+            assert len(gr3) == 1
+            assert trp[0] in gr3
+            assert trp[1] in gr3
+            assert trp[2] not in gr3
+            assert trp[3] not in gr3
+            assert trp[4] not in gr3
+
+            assert gr3.uri == None
+
+            gr3 = gr2 - gr1
+
+            assert len(gr3) == 2
+            assert trp[0] not in gr3
+            assert trp[1] not in gr3
+            assert trp[2] not in gr3
+            assert trp[3] not in gr3
+            assert trp[4] in gr3
+            assert trp[5] in gr3
+
+            assert gr3.uri == None
+
+
+    def test_ip_subtraction(self, trp, store):
+        """
+        Test graph in-place addition.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:4]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr1 -= gr2
+
+            assert len(gr1) == 1
+            assert trp[0] in gr1
+            assert trp[1] in gr1
+            assert trp[2] not in gr1
+            assert trp[3] not in gr1
+            assert trp[4] not in gr1
+
+            assert gr1.uri == URIRef('http://example.edu/imr01')
+
+
+
+    def test_intersect(self, trp, store):
+        """
+        Test graph intersextion.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:4]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr3 = gr1 & gr2
+
+            assert len(gr3) == 2
+            assert trp[2] in gr3
+            assert trp[3] in gr3
+            assert trp[0] not in gr3
+            assert trp[5] not in gr3
+
+            assert gr3.uri == None
+
+
+    def test_ip_intersect(self, trp, store):
+        """
+        Test graph intersextion.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:4]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr1 &= gr2
+
+            assert len(gr1) == 2
+            assert trp[2] in gr1
+            assert trp[3] in gr1
+            assert trp[0] not in gr1
+            assert trp[5] not in gr1
+
+            assert gr1.uri == URIRef('http://example.edu/imr01')
+
+
+    def test_xor(self, trp, store):
+        """
+        Test graph intersextion.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:4]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr3 = gr1 ^ gr2
+
+            assert len(gr3) == 3
+            assert trp[2] not in gr3
+            assert trp[3] not in gr3
+            assert trp[0] in gr3
+            assert trp[5] in gr3
+
+            assert gr3.uri == None
+
+
+    def test_ip_xor(self, trp, store):
+        """
+        Test graph intersextion.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:4]})
+            gr2 = Graph(store, uri='http://example.edu/imr02', data={*trp[2:6]})
+
+            gr1 ^= gr2
+
+            assert len(gr1) == 3
+            assert trp[2] not in gr1
+            assert trp[3] not in gr1
+            assert trp[0] in gr1
+            assert trp[5] in gr1
+
+            assert gr1.uri == URIRef('http://example.edu/imr01')
+
+
+@pytest.mark.usefixtures('trp')
+@pytest.mark.usefixtures('store')
+class TestHybridOps:
+    """
+    Test operations between IMR and graph.
+    """
+    def test_hybrid_union(self, trp, store):
+        """
+        Test hybrid IMR + graph union.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:3]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr3 = gr1 | gr2
+
+            assert len(gr3) == 5
+            assert trp[0] in gr3
+            assert trp[4] in gr3
+
+            assert isinstance(gr3, Graph)
+            assert gr3.uri == None
+
+            gr4 = gr2 | gr1
+
+            assert isinstance(gr4, Graph)
+
+            assert gr3 == gr4
+
+
+    def test_ip_union_imr(self, trp, store):
+        """
+        Test IMR + graph in-place union.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, uri='http://example.edu/imr01', data={*trp[:3]})
+            gr2 = Graph(store, data={*trp[2:6]})
+
+            gr1 |= gr2
+
+            assert len(gr1) == 5
+            assert trp[0] in gr1
+            assert trp[4] in gr1
+
+            assert gr1.uri == URIRef('http://example.edu/imr01')
+
+
+    def test_ip_union_gr(self, trp, store):
+        """
+        Test graph + IMR in-place union.
+        """
+        with store.txn_ctx():
+            gr1 = Graph(store, data={*trp[:3]})
+            gr2 = Graph(store, uri='http://example.edu/imr01', data={*trp[2:6]})
+
+            gr1 |= gr2
+
+            assert len(gr1) == 5
+            assert trp[0] in gr1
+            assert trp[4] in gr1
+
+            assert isinstance(gr1, Graph)

+ 0 - 688
tests/0_data_structures/test_graph.py

@@ -1,688 +0,0 @@
-import pytest
-
-from shutil import rmtree
-
-from rdflib import Graph, Namespace, URIRef
-
-from lakesuperior.model.graph.graph import SimpleGraph, Imr
-from lakesuperior.store.ldp_rs.lmdb_store import LmdbStore
-
-
-@pytest.fixture(scope='class')
-def store():
-    """
-    Test LMDB store.
-
-    This store has a different life cycle than the one used for tests in higher
-    levels of the stack and is not bootstrapped (i.e. starts completely empty).
-    """
-    env_path = '/tmp/test_lmdbstore'
-    # Remove previous test DBs
-    rmtree(env_path, ignore_errors=True)
-    store = LmdbStore(env_path)
-    yield store
-    store.close()
-    store.destroy()
-
-
-@pytest.fixture(scope='class')
-def trp():
-    return (
-        (URIRef('urn:s:0'), URIRef('urn:p:0'), URIRef('urn:o:0')),
-        # Exact same as [0].
-        (URIRef('urn:s:0'), URIRef('urn:p:0'), URIRef('urn:o:0')),
-        # NOTE: s and o are in reversed order.
-        (URIRef('urn:o:0'), URIRef('urn:p:0'), URIRef('urn:s:0')),
-        (URIRef('urn:s:0'), URIRef('urn:p:1'), URIRef('urn:o:0')),
-        (URIRef('urn:s:0'), URIRef('urn:p:1'), URIRef('urn:o:1')),
-        (URIRef('urn:s:1'), URIRef('urn:p:1'), URIRef('urn:o:1')),
-        (URIRef('urn:s:1'), URIRef('urn:p:2'), URIRef('urn:o:2')),
-    )
-
-@pytest.mark.usefixtures('trp')
-@pytest.mark.usefixtures('store')
-class TestGraphInit:
-    """
-    Test initialization of graphs with different base data sets.
-    """
-    def test_empty(self):
-        """
-        Test creation of an empty graph.
-        """
-        gr = SimpleGraph()
-
-        assert len(gr) == 0
-
-
-    def test_init_triples(self, trp):
-        """
-        Test creation using a Python set.
-        """
-        gr = SimpleGraph(data=set(trp))
-
-        assert len(gr) == 6
-
-        for t in trp:
-            assert t in gr
-
-
-@pytest.mark.usefixtures('trp')
-class TestGraphLookup:
-    """
-    Test triple lookup.
-
-    TODO
-    """
-
-    @pytest.mark.skip(reason='TODO')
-    def test_lookup_pattern(self, trp):
-        """
-        Test lookup by basic pattern.
-        """
-        pass
-
-
-@pytest.mark.usefixtures('trp')
-class TestGraphOps:
-    """
-    Test various graph operations.
-    """
-    def test_len(self, trp):
-        """
-        Test the length of a graph with and without duplicates.
-        """
-        gr = SimpleGraph()
-        assert len(gr) == 0
-
-        gr.add((trp[0],))
-        assert len(gr) == 1
-
-        gr.add((trp[1],)) # Same values
-        assert len(gr) == 1
-
-        gr.add((trp[2],))
-        assert len(gr) == 2
-
-        gr.add(trp)
-        assert len(gr) == 6
-
-
-    def test_dup(self, trp):
-        """
-        Test operations with duplicate triples.
-        """
-        gr = SimpleGraph()
-        #import pdb; pdb.set_trace()
-
-        gr.add((trp[0],))
-        assert trp[1] in gr
-        assert trp[2] not in gr
-
-
-    def test_remove(self, trp):
-        """
-        Test adding and removing triples.
-        """
-        gr = SimpleGraph()
-
-        gr.add(trp)
-        gr.remove(trp[0])
-        assert len(gr) == 5
-        assert trp[0] not in gr
-        assert trp[1] not in gr
-
-        # This is the duplicate triple.
-        gr.remove(trp[1])
-        assert len(gr) == 5
-
-        # This is the triple in reverse order.
-        gr.remove(trp[2])
-        assert len(gr) == 4
-
-        gr.remove(trp[4])
-        assert len(gr) == 3
-
-
-    def test_union(self, trp):
-        """
-        Test graph union.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 | gr2
-
-        assert len(gr3) == 5
-        assert trp[0] in gr3
-        assert trp[4] in gr3
-
-
-    def test_ip_union(self, trp):
-        """
-        Test graph in-place union.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr1 |= gr2
-
-        assert len(gr1) == 5
-        assert trp[0] in gr1
-        assert trp[4] in gr1
-
-
-    def test_addition(self, trp):
-        """
-        Test graph addition.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 + gr2
-
-        assert len(gr3) == 5
-        assert trp[0] in gr3
-        assert trp[4] in gr3
-
-
-    def test_ip_addition(self, trp):
-        """
-        Test graph in-place addition.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr1 += gr2
-
-        assert len(gr1) == 5
-        assert trp[0] in gr1
-        assert trp[4] in gr1
-
-
-    def test_subtraction(self, trp):
-        """
-        Test graph addition.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 - gr2
-
-        assert len(gr3) == 1
-        assert trp[0] in gr3
-        assert trp[1] in gr3
-        assert trp[2] not in gr3
-        assert trp[3] not in gr3
-        assert trp[4] not in gr3
-
-        gr3 = gr2 - gr1
-
-        assert len(gr3) == 2
-        assert trp[0] not in gr3
-        assert trp[1] not in gr3
-        assert trp[2] not in gr3
-        assert trp[3] not in gr3
-        assert trp[4] in gr3
-        assert trp[5] in gr3
-
-
-    def test_ip_subtraction(self, trp):
-        """
-        Test graph in-place addition.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr1 -= gr2
-
-        assert len(gr1) == 1
-        assert trp[0] in gr1
-        assert trp[1] in gr1
-        assert trp[2] not in gr1
-        assert trp[3] not in gr1
-        assert trp[4] not in gr1
-
-
-
-    def test_intersect(self, trp):
-        """
-        Test graph intersextion.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 & gr2
-
-        assert len(gr3) == 2
-        assert trp[2] in gr3
-        assert trp[3] in gr3
-        assert trp[0] not in gr3
-        assert trp[5] not in gr3
-
-
-    def test_ip_intersect(self, trp):
-        """
-        Test graph intersextion.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr1 &= gr2
-
-        assert len(gr1) == 2
-        assert trp[2] in gr1
-        assert trp[3] in gr1
-        assert trp[0] not in gr1
-        assert trp[5] not in gr1
-
-
-    def test_xor(self, trp):
-        """
-        Test graph intersextion.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 ^ gr2
-
-        assert len(gr3) == 3
-        assert trp[2] not in gr3
-        assert trp[3] not in gr3
-        assert trp[0] in gr3
-        assert trp[5] in gr3
-
-
-    def test_ip_xor(self, trp):
-        """
-        Test graph intersextion.
-        """
-        gr1 = SimpleGraph()
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr1 ^= gr2
-
-        assert len(gr1) == 3
-        assert trp[2] not in gr1
-        assert trp[3] not in gr1
-        assert trp[0] in gr1
-        assert trp[5] in gr1
-
-
-
-@pytest.mark.usefixtures('trp')
-class TestImrOps:
-    """
-    Test various graph operations.
-    """
-    def test_len(self, trp):
-        """
-        Test the length of a graph with and without duplicates.
-        """
-        imr = Imr(uri='http://example.edu/imr01')
-        assert len(imr) == 0
-
-        imr.add((trp[0],))
-        assert len(imr) == 1
-
-        imr.add((trp[1],)) # Same values
-        assert len(imr) == 1
-
-        imr.add((trp[2],))
-        assert len(imr) == 2
-
-        imr.add(trp)
-        assert len(imr) == 6
-
-
-    def test_dup(self, trp):
-        """
-        Test operations with duplicate triples.
-        """
-        imr = Imr(uri='http://example.edu/imr01')
-        #import pdb; pdb.set_trace()
-
-        imr.add((trp[0],))
-        assert trp[1] in imr
-        assert trp[2] not in imr
-
-
-    def test_remove(self, trp):
-        """
-        Test adding and removing triples.
-        """
-        imr = Imr(uri='http://example.edu/imr01')
-
-        imr.add(trp)
-        imr.remove(trp[0])
-        assert len(imr) == 5
-        assert trp[0] not in imr
-        assert trp[1] not in imr
-
-        # This is the duplicate triple.
-        imr.remove(trp[1])
-        assert len(imr) == 5
-
-        # This is the triple in reverse order.
-        imr.remove(trp[2])
-        assert len(imr) == 4
-
-        imr.remove(trp[4])
-        assert len(imr) == 3
-
-
-    def test_union(self, trp):
-        """
-        Test graph union.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 | gr2
-
-        assert len(gr3) == 5
-        assert trp[0] in gr3
-        assert trp[4] in gr3
-
-        assert gr3.uri == URIRef('http://example.edu/imr01')
-
-
-    def test_ip_union(self, trp):
-        """
-        Test graph in-place union.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr1 |= gr2
-
-        assert len(gr1) == 5
-        assert trp[0] in gr1
-        assert trp[4] in gr1
-
-        assert gr1.uri == URIRef('http://example.edu/imr01')
-
-
-    def test_addition(self, trp):
-        """
-        Test graph addition.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 + gr2
-
-        assert len(gr3) == 5
-        assert trp[0] in gr3
-        assert trp[4] in gr3
-
-        assert gr3.uri == URIRef('http://example.edu/imr01')
-
-
-    def test_ip_addition(self, trp):
-        """
-        Test graph in-place addition.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr1 += gr2
-
-        assert len(gr1) == 5
-        assert trp[0] in gr1
-        assert trp[4] in gr1
-
-        assert gr1.uri == URIRef('http://example.edu/imr01')
-
-
-    def test_subtraction(self, trp):
-        """
-        Test graph addition.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 - gr2
-
-        assert len(gr3) == 1
-        assert trp[0] in gr3
-        assert trp[1] in gr3
-        assert trp[2] not in gr3
-        assert trp[3] not in gr3
-        assert trp[4] not in gr3
-
-        assert gr3.uri == URIRef('http://example.edu/imr01')
-
-        gr3 = gr2 - gr1
-
-        assert len(gr3) == 2
-        assert trp[0] not in gr3
-        assert trp[1] not in gr3
-        assert trp[2] not in gr3
-        assert trp[3] not in gr3
-        assert trp[4] in gr3
-        assert trp[5] in gr3
-
-        assert gr3.uri == URIRef('http://example.edu/imr02')
-
-
-    def test_ip_subtraction(self, trp):
-        """
-        Test graph in-place addition.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr1 -= gr2
-
-        assert len(gr1) == 1
-        assert trp[0] in gr1
-        assert trp[1] in gr1
-        assert trp[2] not in gr1
-        assert trp[3] not in gr1
-        assert trp[4] not in gr1
-
-        assert gr1.uri == URIRef('http://example.edu/imr01')
-
-
-
-    def test_intersect(self, trp):
-        """
-        Test graph intersextion.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 & gr2
-
-        assert len(gr3) == 2
-        assert trp[2] in gr3
-        assert trp[3] in gr3
-        assert trp[0] not in gr3
-        assert trp[5] not in gr3
-
-        assert gr3.uri == URIRef('http://example.edu/imr01')
-
-
-    def test_ip_intersect(self, trp):
-        """
-        Test graph intersextion.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr1 &= gr2
-
-        assert len(gr1) == 2
-        assert trp[2] in gr1
-        assert trp[3] in gr1
-        assert trp[0] not in gr1
-        assert trp[5] not in gr1
-
-        assert gr1.uri == URIRef('http://example.edu/imr01')
-
-
-    def test_xor(self, trp):
-        """
-        Test graph intersextion.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 ^ gr2
-
-        assert len(gr3) == 3
-        assert trp[2] not in gr3
-        assert trp[3] not in gr3
-        assert trp[0] in gr3
-        assert trp[5] in gr3
-
-        assert gr3.uri == URIRef('http://example.edu/imr01')
-
-
-    def test_ip_xor(self, trp):
-        """
-        Test graph intersextion.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = Imr(uri='http://example.edu/imr02')
-
-        gr1.add(trp[0:4])
-        gr2.add(trp[2:6])
-
-        gr1 ^= gr2
-
-        assert len(gr1) == 3
-        assert trp[2] not in gr1
-        assert trp[3] not in gr1
-        assert trp[0] in gr1
-        assert trp[5] in gr1
-
-        assert gr1.uri == URIRef('http://example.edu/imr01')
-
-
-@pytest.mark.usefixtures('trp')
-class TestHybridOps:
-    """
-    Test operations between IMR and graph.
-    """
-
-
-    def test_union(self, trp):
-        """
-        Test hybrid IMR + graph union.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr3 = gr1 | gr2
-
-        assert len(gr3) == 5
-        assert trp[0] in gr3
-        assert trp[4] in gr3
-
-        assert isinstance(gr3, Imr)
-        assert gr3.uri == URIRef('http://example.edu/imr01')
-
-        gr4 = gr2 | gr1
-
-        assert isinstance(gr4, SimpleGraph)
-
-        assert gr3 == gr4
-
-
-    def test_ip_union_imr(self, trp):
-        """
-        Test IMR + graph in-place union.
-        """
-        gr1 = Imr(uri='http://example.edu/imr01')
-        gr2 = SimpleGraph()
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr1 |= gr2
-
-        assert len(gr1) == 5
-        assert trp[0] in gr1
-        assert trp[4] in gr1
-
-        assert gr1.uri == URIRef('http://example.edu/imr01')
-
-
-    def test_ip_union_gr(self, trp):
-        """
-        Test graph + IMR in-place union.
-        """
-        gr1 = SimpleGraph()
-        gr2 = Imr(uri='http://example.edu/imr01')
-
-        gr1.add(trp[0:3])
-        gr2.add(trp[2:6])
-
-        gr1 |= gr2
-
-        assert len(gr1) == 5
-        assert trp[0] in gr1
-        assert trp[4] in gr1
-
-        assert isinstance(gr1, SimpleGraph)

+ 78 - 23
tests/1_store/test_lmdb_store.py → tests/1_store/test_1_0_lmdb_store.py

@@ -1,14 +1,16 @@
+import pdb
 import pytest
 
 from os import path
 from shutil import rmtree
 
-from rdflib import Graph, Namespace, URIRef
+from rdflib import Namespace, URIRef
 from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
 from rdflib.namespace import RDF, RDFS
 
+from lakesuperior.model.rdf.graph import Graph
+from lakesuperior.store.base_lmdb_store import LmdbError
 from lakesuperior.store.ldp_rs.lmdb_store import LmdbStore
-from lakesuperior.model.graph.graph import Imr
 
 
 @pytest.fixture(scope='class')
@@ -68,6 +70,12 @@ class TestStoreInit:
         assert not path.exists(env_path + '-lock')
 
 
+
+@pytest.mark.usefixtures('store')
+class TestTransactionContext:
+    '''
+    Tests for intializing and shutting down store and transactions.
+    '''
     def test_txn(self, store):
         '''
         Test opening and closing the main transaction.
@@ -107,20 +115,80 @@ class TestStoreInit:
         '''
         Test rolling back a transaction.
         '''
+        trp = (
+            URIRef('urn:nogo:s'), URIRef('urn:nogo:p'), URIRef('urn:nogo:o')
+        )
         try:
             with store.txn_ctx(True):
-                store.add((
-                    URIRef('urn:nogo:s'), URIRef('urn:nogo:p'),
-                    URIRef('urn:nogo:o')))
+                store.add(trp)
                 raise RuntimeError() # This should roll back the transaction.
         except RuntimeError:
             pass
 
         with store.txn_ctx():
-            res = set(store.triples((None, None, None)))
+            res = set(store.triples(trp))
         assert len(res) == 0
 
 
+    def test_nested_ro_txn(self, store):
+        """
+        Test two nested RO transactions.
+        """
+        trp = (URIRef('urn:s:0'), URIRef('urn:p:0'), URIRef('urn:o:0'))
+        with store.txn_ctx(True):
+            store.add(trp)
+        with store.txn_ctx():
+            with store.txn_ctx():
+                res = {*store.triples(trp)}
+                assert trp in {q[0] for q in res}
+            assert trp in {q[0] for q in res}
+
+
+    def test_nested_ro_txn_nowrite(self, store):
+        """
+        Test two nested RO transactions.
+        """
+        trp = (URIRef('urn:s:0'), URIRef('urn:p:0'), URIRef('urn:o:0'))
+        with pytest.raises(LmdbError):
+            with store.txn_ctx():
+                with store.txn_ctx():
+                    store.add(trp)
+
+
+    def test_nested_ro_rw_txn(self, store):
+        """
+        Test a RO transaction nested into a RW one.
+        """
+        trp = (URIRef('urn:s:1'), URIRef('urn:p:1'), URIRef('urn:o:1'))
+        with store.txn_ctx():
+            with store.txn_ctx(True):
+                store.add(trp)
+            # Outer txn should now see the new triple.
+            assert trp in {q[0] for q in store.triples(trp)}
+
+
+    def test_nested_rw_ro_txn(self, store):
+        """
+        Test that a RO transaction nested in a RW transaction can write.
+        """
+        trp = (URIRef('urn:s:2'), URIRef('urn:p:2'), URIRef('urn:o:2'))
+        with store.txn_ctx(True):
+            with store.txn_ctx():
+                store.add(trp)
+            assert trp in {q[0] for q in store.triples(trp)}
+
+
+    def test_nested_rw_rw_txn(self, store):
+        """
+        Test that a RW transaction nested in a RW transaction can write.
+        """
+        trp = (URIRef('urn:s:3'), URIRef('urn:p:3'), URIRef('urn:o:3'))
+        with store.txn_ctx(True):
+            with store.txn_ctx():
+                store.add(trp)
+            assert trp in {q[0] for q in store.triples(trp)}
+
+
 @pytest.mark.usefixtures('store')
 class TestBasicOps:
     '''
@@ -784,11 +852,11 @@ class TestContext:
             res_no_ctx = store.triples(trp3)
             res_ctx = store.triples(trp3, gr2_uri)
             for res in res_no_ctx:
-                assert Imr(uri=gr_uri) in res[1]
-                assert Imr(uri=gr2_uri) in res[1]
+                assert Graph(uri=gr_uri) in res[1]
+                assert Graph(uri=gr2_uri) in res[1]
             for res in res_ctx:
-                assert Imr(uri=gr_uri) in res[1]
-                assert Imr(uri=gr2_uri) in res[1]
+                assert Graph(uri=gr_uri) in res[1]
+                assert Graph(uri=gr2_uri) in res[1]
 
 
     def test_delete_from_ctx(self, store):
@@ -862,19 +930,6 @@ class TestContext:
             assert len(set(store.triples(trp3))) == 1
 
 
-
-
-
-
-@pytest.mark.usefixtures('store')
-class TestTransactions:
-    '''
-    Tests for transaction handling.
-    '''
-    # @TODO Test concurrent reads and writes.
-    pass
-
-
 #@pytest.mark.usefixtures('store')
 #class TestRdflib:
 #    '''

+ 137 - 107
tests/2_api/test_resource_api.py → tests/2_api/test_2_0_resource_api.py

@@ -4,7 +4,7 @@ import pytest
 from io import BytesIO
 from uuid import uuid4
 
-from rdflib import Graph, Literal, URIRef
+from rdflib import Literal, URIRef
 
 from lakesuperior import env
 from lakesuperior.api import resource as rsrc_api
@@ -14,7 +14,7 @@ from lakesuperior.exceptions import (
         TombstoneError)
 from lakesuperior.globals import RES_CREATED, RES_UPDATED
 from lakesuperior.model.ldp.ldpr import Ldpr
-from lakesuperior.model.graph.graph import SimpleGraph, Imr
+from lakesuperior.model.rdf.graph import Graph, from_rdf
 
 
 @pytest.fixture(scope='module')
@@ -67,12 +67,13 @@ class TestResourceCRUD:
         The ``dcterms:title`` property should NOT be included.
         """
         gr = rsrc_api.get_metadata('/')
-        assert isinstance(gr, SimpleGraph)
+        assert isinstance(gr, Graph)
         assert len(gr) == 9
-        assert gr[gr.uri : nsc['rdf'].type : nsc['ldp'].Resource ]
-        assert not gr[
-            gr.uri : nsc['dcterms'].title : Literal("Repository Root")
-        ]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert gr[gr.uri : nsc['rdf'].type : nsc['ldp'].Resource ]
+            assert not gr[
+                gr.uri : nsc['dcterms'].title : Literal("Repository Root")
+            ]
 
 
     def test_get_root_node(self):
@@ -85,9 +86,10 @@ class TestResourceCRUD:
         assert isinstance(rsrc, Ldpr)
         gr = rsrc.imr
         assert len(gr) == 10
-        assert gr[gr.uri : nsc['rdf'].type : nsc['ldp'].Resource ]
-        assert gr[
-            gr.uri : nsc['dcterms'].title : Literal('Repository Root')]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert gr[gr.uri : nsc['rdf'].type : nsc['ldp'].Resource ]
+            assert gr[
+                gr.uri : nsc['dcterms'].title : Literal('Repository Root')]
 
 
     def test_get_nonexisting_node(self):
@@ -104,16 +106,18 @@ class TestResourceCRUD:
         """
         uid = '/rsrc_from_graph'
         uri = nsc['fcres'][uid]
-        gr = Graph().parse(
-            data='<> a <http://ex.org/type#A> .', format='turtle',
-            publicID=uri)
+        with env.app_globals.rdf_store.txn_ctx():
+            gr = from_rdf(
+                data='<> a <http://ex.org/type#A> .', format='turtle',
+                publicID=uri)
         evt, _ = rsrc_api.create_or_replace(uid, graph=gr)
 
         rsrc = rsrc_api.get(uid)
-        assert rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : URIRef('http://ex.org/type#A')]
-        assert rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : nsc['ldp'].RDFSource]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : URIRef('http://ex.org/type#A')]
+            assert rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : nsc['ldp'].RDFSource]
 
 
     def test_create_ldp_nr(self):
@@ -126,38 +130,45 @@ class TestResourceCRUD:
                 uid, stream=BytesIO(data), mimetype='text/plain')
 
         rsrc = rsrc_api.get(uid)
-        assert rsrc.content.read() == data
+        with rsrc.imr.store.txn_ctx():
+            assert rsrc.content.read() == data
 
 
     def test_replace_rsrc(self):
         uid = '/test_replace'
         uri = nsc['fcres'][uid]
-        gr1 = Graph().parse(
-            data='<> a <http://ex.org/type#A> .', format='turtle',
-            publicID=uri)
+        with env.app_globals.rdf_store.txn_ctx():
+            gr1 = from_rdf(
+                data='<> a <http://ex.org/type#A> .', format='turtle',
+                publicID=uri
+            )
         evt, _ = rsrc_api.create_or_replace(uid, graph=gr1)
         assert evt == RES_CREATED
 
         rsrc = rsrc_api.get(uid)
-        assert rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : URIRef('http://ex.org/type#A')]
-        assert rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : nsc['ldp'].RDFSource]
-
-        gr2 = Graph().parse(
-            data='<> a <http://ex.org/type#B> .', format='turtle',
-            publicID=uri)
+        with env.app_globals.rdf_store.txn_ctx():
+            assert rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : URIRef('http://ex.org/type#A')]
+            assert rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : nsc['ldp'].RDFSource]
+
+        with env.app_globals.rdf_store.txn_ctx():
+            gr2 = from_rdf(
+                data='<> a <http://ex.org/type#B> .', format='turtle',
+                publicID=uri
+            )
         #pdb.set_trace()
         evt, _ = rsrc_api.create_or_replace(uid, graph=gr2)
         assert evt == RES_UPDATED
 
         rsrc = rsrc_api.get(uid)
-        assert not rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : URIRef('http://ex.org/type#A')]
-        assert rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : URIRef('http://ex.org/type#B')]
-        assert rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : nsc['ldp'].RDFSource]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert not rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : URIRef('http://ex.org/type#A')]
+            assert rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : URIRef('http://ex.org/type#B')]
+            assert rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : nsc['ldp'].RDFSource]
 
 
     def test_replace_incompatible_type(self):
@@ -169,9 +180,11 @@ class TestResourceCRUD:
         uid_rs = '/test_incomp_rs'
         uid_nr = '/test_incomp_nr'
         data = b'mock binary content'
-        gr = Graph().parse(
-            data='<> a <http://ex.org/type#A> .', format='turtle',
-            publicID=nsc['fcres'][uid_rs])
+        with env.app_globals.rdf_store.txn_ctx():
+            gr = from_rdf(
+                data='<> a <http://ex.org/type#A> .', format='turtle',
+                publicID=nsc['fcres'][uid_rs]
+            )
 
         rsrc_api.create_or_replace(uid_rs, graph=gr)
         rsrc_api.create_or_replace(
@@ -205,17 +218,18 @@ class TestResourceCRUD:
             (URIRef(uri), nsc['rdf'].type, nsc['foaf'].Organization),
         }
 
-        gr = Graph()
-        gr += init_trp
+        with env.app_globals.rdf_store.txn_ctx():
+            gr = Graph(data=init_trp)
         rsrc_api.create_or_replace(uid, graph=gr)
         rsrc_api.update_delta(uid, remove_trp, add_trp)
         rsrc = rsrc_api.get(uid)
 
-        assert rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : nsc['foaf'].Organization]
-        assert rsrc.imr[rsrc.uri : nsc['foaf'].name : Literal('Joe Bob')]
-        assert not rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : nsc['foaf'].Person]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : nsc['foaf'].Organization]
+            assert rsrc.imr[rsrc.uri : nsc['foaf'].name : Literal('Joe Bob')]
+            assert not rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : nsc['foaf'].Person]
 
 
     def test_delta_update_wildcard(self):
@@ -237,20 +251,21 @@ class TestResourceCRUD:
             (URIRef(uri), nsc['foaf'].name, Literal('Joan Knob')),
         }
 
-        gr = Graph()
-        gr += init_trp
+        with env.app_globals.rdf_store.txn_ctx():
+            gr = Graph(data=init_trp)
         rsrc_api.create_or_replace(uid, graph=gr)
         rsrc_api.update_delta(uid, remove_trp, add_trp)
         rsrc = rsrc_api.get(uid)
 
-        assert rsrc.imr[
-                rsrc.uri : nsc['rdf'].type : nsc['foaf'].Person]
-        assert rsrc.imr[rsrc.uri : nsc['foaf'].name : Literal('Joan Knob')]
-        assert not rsrc.imr[rsrc.uri : nsc['foaf'].name : Literal('Joe Bob')]
-        assert not rsrc.imr[
-            rsrc.uri : nsc['foaf'].name : Literal('Joe Average Bob')]
-        assert not rsrc.imr[
-            rsrc.uri : nsc['foaf'].name : Literal('Joe 12oz Bob')]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert rsrc.imr[
+                    rsrc.uri : nsc['rdf'].type : nsc['foaf'].Person]
+            assert rsrc.imr[rsrc.uri : nsc['foaf'].name : Literal('Joan Knob')]
+            assert not rsrc.imr[rsrc.uri : nsc['foaf'].name : Literal('Joe Bob')]
+            assert not rsrc.imr[
+                rsrc.uri : nsc['foaf'].name : Literal('Joe Average Bob')]
+            assert not rsrc.imr[
+                rsrc.uri : nsc['foaf'].name : Literal('Joe 12oz Bob')]
 
 
     def test_sparql_update(self):
@@ -274,19 +289,20 @@ class TestResourceCRUD:
         ver_uid = rsrc_api.create_version(uid, 'v1').split('fcr:versions/')[-1]
 
         rsrc = rsrc_api.update(uid, update_str)
-        assert (
-            (rsrc.uri, nsc['dcterms'].title, Literal('Original title.'))
-            not in set(rsrc.imr))
-        assert (
-            (rsrc.uri, nsc['dcterms'].title, Literal('Title #2.'))
-            in set(rsrc.imr))
-        assert (
-            (rsrc.uri, nsc['dcterms'].title, Literal('Title #3.'))
-            in set(rsrc.imr))
-        assert ((
-                URIRef(str(rsrc.uri) + '#h1'),
-                nsc['dcterms'].title, Literal('This is a hash.'))
-            in set(rsrc.imr))
+        with env.app_globals.rdf_store.txn_ctx():
+            assert (
+                (rsrc.uri, nsc['dcterms'].title, Literal('Original title.'))
+                not in set(rsrc.imr))
+            assert (
+                (rsrc.uri, nsc['dcterms'].title, Literal('Title #2.'))
+                in set(rsrc.imr))
+            assert (
+                (rsrc.uri, nsc['dcterms'].title, Literal('Title #3.'))
+                in set(rsrc.imr))
+            assert ((
+                    URIRef(str(rsrc.uri) + '#h1'),
+                    nsc['dcterms'].title, Literal('This is a hash.'))
+                in set(rsrc.imr))
 
 
     def test_create_ldp_dc_post(self, dc_rdf):
@@ -299,8 +315,9 @@ class TestResourceCRUD:
 
         member_rsrc = rsrc_api.get('/member')
 
-        assert nsc['ldp'].Container in dc_rsrc.ldp_types
-        assert nsc['ldp'].DirectContainer in dc_rsrc.ldp_types
+        with env.app_globals.rdf_store.txn_ctx():
+            assert nsc['ldp'].Container in dc_rsrc.ldp_types
+            assert nsc['ldp'].DirectContainer in dc_rsrc.ldp_types
 
 
     def test_create_ldp_dc_put(self, dc_rdf):
@@ -313,8 +330,9 @@ class TestResourceCRUD:
 
         member_rsrc = rsrc_api.get('/member')
 
-        assert nsc['ldp'].Container in dc_rsrc.ldp_types
-        assert nsc['ldp'].DirectContainer in dc_rsrc.ldp_types
+        with env.app_globals.rdf_store.txn_ctx():
+            assert nsc['ldp'].Container in dc_rsrc.ldp_types
+            assert nsc['ldp'].DirectContainer in dc_rsrc.ldp_types
 
 
     def test_add_dc_member(self, dc_rdf):
@@ -328,8 +346,9 @@ class TestResourceCRUD:
         child_uid = rsrc_api.create(dc_uid, None).uid
         member_rsrc = rsrc_api.get('/member')
 
-        assert member_rsrc.imr[
-            member_rsrc.uri: nsc['dcterms'].relation: nsc['fcres'][child_uid]]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert member_rsrc.imr[
+                member_rsrc.uri: nsc['dcterms'].relation: nsc['fcres'][child_uid]]
 
 
     def test_indirect_container(self, ic_rdf):
@@ -351,15 +370,17 @@ class TestResourceCRUD:
                 member_uid, rdf_data=ic_member_rdf, rdf_fmt='turtle')
 
         ic_rsrc = rsrc_api.get(ic_uid)
-        assert nsc['ldp'].Container in ic_rsrc.ldp_types
-        assert nsc['ldp'].IndirectContainer in ic_rsrc.ldp_types
-        assert nsc['ldp'].DirectContainer not in ic_rsrc.ldp_types
+        with env.app_globals.rdf_store.txn_ctx():
+            assert nsc['ldp'].Container in ic_rsrc.ldp_types
+            assert nsc['ldp'].IndirectContainer in ic_rsrc.ldp_types
+            assert nsc['ldp'].DirectContainer not in ic_rsrc.ldp_types
 
         member_rsrc = rsrc_api.get(member_uid)
         top_cont_rsrc = rsrc_api.get(cont_uid)
-        assert top_cont_rsrc.imr[
-            top_cont_rsrc.uri: nsc['dcterms'].relation:
-            nsc['fcres'][target_uid]]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert top_cont_rsrc.imr[
+                top_cont_rsrc.uri: nsc['dcterms'].relation:
+                nsc['fcres'][target_uid]]
 
 
 
@@ -389,7 +410,8 @@ class TestAdvancedDelete:
         rsrc_api.resurrect(uid)
 
         rsrc = rsrc_api.get(uid)
-        assert nsc['ldp'].Resource in rsrc.ldp_types
+        with env.app_globals.rdf_store.txn_ctx():
+            assert nsc['ldp'].Resource in rsrc.ldp_types
 
 
     def test_hard_delete(self):
@@ -433,10 +455,12 @@ class TestAdvancedDelete:
         uid = '/test_soft_delete_children01'
         rsrc_api.resurrect(uid)
         parent_rsrc = rsrc_api.get(uid)
-        assert nsc['ldp'].Resource in parent_rsrc.ldp_types
+        with env.app_globals.rdf_store.txn_ctx():
+            assert nsc['ldp'].Resource in parent_rsrc.ldp_types
         for i in range(3):
             child_rsrc = rsrc_api.get('{}/child{}'.format(uid, i))
-            assert nsc['ldp'].Resource in child_rsrc.ldp_types
+            with env.app_globals.rdf_store.txn_ctx():
+                assert nsc['ldp'].Resource in child_rsrc.ldp_types
 
 
     def test_hard_delete_children(self):
@@ -515,24 +539,26 @@ class TestResourceVersioning:
         rsrc_api.create_or_replace(uid, rdf_data=rdf_data, rdf_fmt='turtle')
         ver_uid = rsrc_api.create_version(uid, 'v1').split('fcr:versions/')[-1]
         #FIXME Without this, the test fails.
-        set(rsrc_api.get_version(uid, ver_uid))
+        #set(rsrc_api.get_version(uid, ver_uid))
 
         rsrc_api.update(uid, update_str)
         current = rsrc_api.get(uid)
-        assert (
-            (current.uri, nsc['dcterms'].title, Literal('Title #2.'))
-            in current.imr)
-        assert (
-            (current.uri, nsc['dcterms'].title, Literal('Original title.'))
-            not in current.imr)
+        with env.app_globals.rdf_store.txn_ctx():
+            assert (
+                (current.uri, nsc['dcterms'].title, Literal('Title #2.'))
+                in current.imr)
+            assert (
+                (current.uri, nsc['dcterms'].title, Literal('Original title.'))
+                not in current.imr)
 
         v1 = rsrc_api.get_version(uid, ver_uid)
-        assert (
-            (v1.uri, nsc['dcterms'].title, Literal('Original title.'))
-            in set(v1))
-        assert (
-            (v1.uri, nsc['dcterms'].title, Literal('Title #2.'))
-            not in set(v1))
+        with env.app_globals.rdf_store.txn_ctx():
+            assert (
+                (v1.uri, nsc['dcterms'].title, Literal('Original title.'))
+                in set(v1))
+            assert (
+                (v1.uri, nsc['dcterms'].title, Literal('Title #2.'))
+                    not in set(v1))
 
 
     def test_revert_to_version(self):
@@ -545,9 +571,10 @@ class TestResourceVersioning:
         ver_uid = 'v1'
         rsrc_api.revert_to_version(uid, ver_uid)
         rev = rsrc_api.get(uid)
-        assert (
-            (rev.uri, nsc['dcterms'].title, Literal('Original title.'))
-            in rev.imr)
+        with env.app_globals.rdf_store.txn_ctx():
+            assert (
+                (rev.uri, nsc['dcterms'].title, Literal('Original title.'))
+                in rev.imr)
 
 
     def test_versioning_children(self):
@@ -569,18 +596,21 @@ class TestResourceVersioning:
         rsrc_api.create_or_replace(ch1_uid)
         ver_uid = rsrc_api.create_version(uid, ver_uid).split('fcr:versions/')[-1]
         rsrc = rsrc_api.get(uid)
-        assert nsc['fcres'][ch1_uid] in rsrc.imr[
-                rsrc.uri : nsc['ldp'].contains]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert nsc['fcres'][ch1_uid] in rsrc.imr[
+                    rsrc.uri : nsc['ldp'].contains]
 
         rsrc_api.create_or_replace(ch2_uid)
         rsrc = rsrc_api.get(uid)
-        assert nsc['fcres'][ch2_uid] in rsrc.imr[
-                rsrc.uri : nsc['ldp'].contains]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert nsc['fcres'][ch2_uid] in rsrc.imr[
+                    rsrc.uri : nsc['ldp'].contains]
 
         rsrc_api.revert_to_version(uid, ver_uid)
         rsrc = rsrc_api.get(uid)
-        assert nsc['fcres'][ch1_uid] in rsrc.imr[
-                rsrc.uri : nsc['ldp'].contains]
-        assert nsc['fcres'][ch2_uid] in rsrc.imr[
-                rsrc.uri : nsc['ldp'].contains]
+        with env.app_globals.rdf_store.txn_ctx():
+            assert nsc['fcres'][ch1_uid] in rsrc.imr[
+                    rsrc.uri : nsc['ldp'].contains]
+            assert nsc['fcres'][ch2_uid] in rsrc.imr[
+                    rsrc.uri : nsc['ldp'].contains]
 

+ 11 - 6
tests/2_api/test_admin_api.py → tests/2_api/test_2_1_admin_api.py

@@ -4,13 +4,14 @@ import pytest
 from io import BytesIO
 from uuid import uuid4
 
-from rdflib import Graph, URIRef
+from rdflib import URIRef
 
 from lakesuperior import env
 from lakesuperior.api import resource as rsrc_api
 from lakesuperior.api import admin as admin_api
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.exceptions import ChecksumValidationError
+from lakesuperior.model.rdf.graph import Graph, from_rdf
 
 
 @pytest.mark.usefixtures('db')
@@ -25,9 +26,12 @@ class TestAdminApi:
         """
         uid1 = '/test_refint1'
         uid2 = '/test_refint2'
-        gr = Graph().parse(
-                data='<> <http://ex.org/ns#p1> <info:fcres{}> .'.format(uid1),
-                format='turtle', publicID=nsc['fcres'][uid2])
+        with env.app_globals.rdf_store.txn_ctx():
+            gr = from_rdf(
+                store=env.app_globals.rdf_store,
+                data=f'<> <http://ex.org/ns#p1> <info:fcres{uid1}> .',
+                format='turtle', publicID=nsc['fcres'][uid2]
+            )
         rsrc_api.create_or_replace(uid1, graph=gr)
 
         assert admin_api.integrity_check() == set()
@@ -76,8 +80,9 @@ class TestAdminApi:
 
         _, rsrc = rsrc_api.create_or_replace(uid, stream=content)
 
-        with open(rsrc.local_path, 'wb') as fh:
-            fh.write(uuid4().bytes)
+        with env.app_globals.rdf_store.txn_ctx():
+            with open(rsrc.local_path, 'wb') as fh:
+                fh.write(uuid4().bytes)
 
         with pytest.raises(ChecksumValidationError):
             admin_api.fixity_check(uid)

+ 0 - 0
tests/3_endpoints/test_ldp.py → tests/3_endpoints/test_3_0_ldp.py


+ 4 - 1
tests/3_endpoints/test_admin.py → tests/3_endpoints/test_3_1_admin.py

@@ -3,6 +3,7 @@ import pytest
 from io import BytesIO
 from uuid import uuid4
 
+from lakesuperior import env
 from lakesuperior.api import resource as rsrc_api
 
 
@@ -42,7 +43,9 @@ class TestAdminApi:
 
         rsrc = rsrc_api.get(f'/{uid}')
 
-        with open(rsrc.local_path, 'wb') as fh:
+        with env.app_globals.rdf_store.txn_ctx():
+            fname = rsrc.local_path
+        with open(fname, 'wb') as fh:
             fh.write(uuid4().bytes)
 
         assert self.client.get(fix_path).status_code == 412

+ 0 - 0
tests/3_endpoints/test_query.py → tests/3_endpoints/test_3_2_query.py


+ 0 - 0
tests/4_ancillary/test_toolbox.py → tests/4_ancillary/test_4_0_toolbox.py


部分文件因为文件数量过多而无法显示