7 years ago · 3844bbfad1
--- a/lakesuperior/store_layouts/ldp_rs/lmdb_store.py
+++ b/lakesuperior/store_layouts/ldp_rs/lmdb_store.py
@@ -12,8 +12,9 @@ from urllib.request import pathname2url
 
															 import lmdb
														
 
															-from rdflib.store import Store, VALID_STORE, NO_STORE
														
 
															 from rdflib import Graph, Namespace, URIRef, Variable
														
 
															+from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
														
 
															+from rdflib.store import Store, VALID_STORE, NO_STORE
														
 
															 logger = logging.getLogger(__name__)
														
@@ -175,7 +176,7 @@ class LmdbStore(Store):
 
															     - t:st (term key: serialized term; 1:1)
														
 
															     - spo:c (joined S, P, O keys: context key; dupsort, dupfixed)
														
 
															     - c: (context keys only, values are the empty bytestring; 1:1)
														
 
															-    - pfx:ns (prefix: pickled namespace; unique)
														
 
															+    - pfx:ns (prefix: pickled namespace; 1:1)
														
 
															     And 6 indices to optimize lookup for all possible bound/unbound term
														
 
															     combination in a triple:
														
@@ -185,7 +186,7 @@ class LmdbStore(Store):
 
															     - p:so (P key: joined S, O keys; dupsort, dupfixed)
														
 
															     - o:sp (O key: joined S, P keys; dupsort, dupfixed)
														
 
															     - c:spo (context → triple association; dupsort, dupfixed)
														
 
															-    - ns:pfx (pickled namespace: prefix; unique)
														
 
															+    - ns:pfx (pickled namespace: prefix; 1:1)
														
 
															     These two data sets are stored in separate environments, i.e. separate
														
 
															     files in the filesystem. The index could be recreated from the main data
														
@@ -213,8 +214,6 @@ class LmdbStore(Store):
 
															     '''Separator byte. Used to join and split individual term keys.'''
														
 
															     SEP_BYTE = b'\x00'
														
 
															-    DEFAULT_GRAPH_URI = URIRef('urn:fcrepo:default_graph')
														
 
															-
														
 
															     KEY_LENGTH = 5 # Max key length for terms. That allows for A LOT of terms.
														
 
															     KEY_START = 2 # \x00 is reserved as a separator. \x01 is spare.
														
@@ -299,11 +298,11 @@ class LmdbStore(Store):
 
															     def __len__(self, context=None):
														
 
															         '''
														
 
															         Return length of the dataset.
														
 
															+
														
 
															+        @param context (rdflib.URIRef | rdflib.Graph) Context to restrict count
														
 
															+        to.
														
 
															         '''
														
 
															-        if context == self:
														
 
															-            context = None
														
 
															-        if isinstance(context, Graph):
														
 
															-            context = context.identifier
														
 
															+        context = self._normalize_context(context)
														
 
															         if context is not None:
														
 
															             #dataset = self.triples((None, None, None), context)
														
@@ -348,7 +347,7 @@ class LmdbStore(Store):
 
															         '''
														
 
															         if not self.is_open:
														
 
															             raise RuntimeError('Store must be opened first.')
														
 
															-        logger.info('Beginning a {} transaction.'.format(
														
 
															+        logger.debug('Beginning a {} transaction.'.format(
														
 
															             'read/write' if write else 'read-only'))
														
 
															         self.data_txn = self.data_env.begin(buffers=True, write=write)
														
@@ -480,17 +479,16 @@ class LmdbStore(Store):
 
															         @param quoted (bool) Not used.
														
 
															         '''
														
 
															         #import pdb; pdb.set_trace()
														
 
															-        assert context != self, "Cannot add triple directly to store"
														
 
															+        context = self._normalize_context(context)
														
 
															+        if context is None:
														
 
															+            context = RDFLIB_DEFAULT_GRAPH_URI
														
 
															+
														
 
															         Store.add(self, triple, context)
														
 
															         #logger.info('Adding triple: {}'.format(triple))
														
 
															-        if context is None:
														
 
															-            context = Graph(identifier=self.DEFAULT_GRAPH_URI)
														
 
															         pk_trp = self._pickle(triple)
														
 
															         pk_s, pk_p, pk_o = [self._pickle(t) for t in triple]
														
 
															-        if isinstance(context, Graph):
														
 
															-            context = context.identifier
														
 
															         #logger.debug('Adding quad: {} {}'.format(triple, context))
														
 
															         pk_c = self._pickle(context)
														
@@ -532,9 +530,8 @@ class LmdbStore(Store):
 
															         '''
														
 
															         #logger.debug('Removing triples by pattern: {} on context: {}'.format(
														
 
															         #    triple_pattern, context))
														
 
															+        context = self._normalize_context(context)
														
 
															         if context is not None:
														
 
															-            if isinstance(context, Graph):
														
 
															-                context = context.identifier
														
 
															             ck = self._to_key(context)
														
 
															             # If context is specified but not found, return to avoid deleting
														
 
															             # the wrong triples.
														
@@ -585,12 +582,8 @@ class LmdbStore(Store):
 
															         #    triple_pattern, context))
														
 
															         # This sounds strange, RDFLib should be passing None at this point,
														
 
															         # but anyway...
														
 
															-        if isinstance(context, Graph) and isinstance(
														
 
															-                context.identifier, Variable):
														
 
															-            context = None
														
 
															-        if isinstance(context, Graph):
														
 
															-            context = context.identifier
														
 
															-            #logger.debug('Converted graph into URI: {}'.format(context))
														
 
															+        context = self._normalize_context(context)
														
 
															+
														
 
															         with self.cur('spo:c') as cur:
														
 
															             for spok in self._triple_keys(triple_pattern, context):
														
 
															                 if context is not None:
														
@@ -825,10 +818,9 @@ class LmdbStore(Store):
 
															                 # Regular lookup.
														
 
															                 else:
														
 
															-                    for spok in self._lookup(triple_pattern):
														
 
															-                        if cur.set_key_dup(ck, spok):
														
 
															-                            yield spok
														
 
															-                    return
														
 
															+                    yield from (
														
 
															+                            spok for spok in self._lookup(triple_pattern)
														
 
															+                            if cur.set_key_dup(ck, spok))
														
 
															         else:
														
 
															             yield from self._lookup(triple_pattern)
														
@@ -928,95 +920,28 @@ class LmdbStore(Store):
 
															         return hashlib.new(self.KEY_HASH_ALGO, s).digest()
														
 
															-    def _lookup(self, triple_pattern):
														
 
															+    def _normalize_context(self, context):
														
 
															         '''
														
 
															-        Look up triples in the indices based on a triple pattern.
														
 
															+        Normalize a context parameter to conform to the model expectations.
														
 
															-        @return iterator of matching triple keys.
														
 
															+        @param context (URIRef | Graph | None) Context URI or graph.
														
 
															         '''
														
 
															-        def lookup_1bound(label, term):
														
 
															-            '''
														
 
															-            Lookup triples for a pattern with one bound term.
														
 
															-            '''
														
 
															-            #import pdb; pdb.set_trace()
														
 
															-            k = self._to_key(term)
														
 
															-            if not k:
														
 
															-                return iter(())
														
 
															-            idx_name = label + ':' + 'spo'.replace(label, '')
														
 
															-            term_order = self._lookup_ordering[idx_name]
														
 
															-            with self.cur(idx_name) as cur:
														
 
															-                if cur.set_key(k):
														
 
															-                    for match in cur.iternext_dup():
														
 
															-                        subkeys = bytes(match).split(self.SEP_BYTE)
														
 
															-
														
 
															-                        # Compose result.
														
 
															-                        out = [None, None, None]
														
 
															-                        out[term_order[0]] = k
														
 
															-                        out[term_order[1]] = subkeys[0]
														
 
															-                        out[term_order[2]] = subkeys[1]
														
 
															+        if isinstance(context, Graph):
														
 
															+            if context == self or isinstance(context.identifier, Variable):
														
 
															+                context = None
														
 
															+            else:
														
 
															+                context = context.identifier
														
 
															+                #logger.debug('Converted graph into URI: {}'.format(context))
														
 
															-                        yield self.SEP_BYTE.join(out)
														
 
															+        return context
														
 
															-        def lookup_2bound(bound_terms):
														
 
															-            '''
														
 
															-            Look up triples for a pattern with two bound terms.
														
 
															-
														
 
															-            @param bound terms (dict) Triple labels and terms to search for,
														
 
															-            in the format of, e.g. {'s': URIRef('urn:s:1'), 'o':
														
 
															-            URIRef('urn:o:1')}
														
 
															-            '''
														
 
															-            #import pdb; pdb.set_trace()
														
 
															-            if len(bound_terms) != 2:
														
 
															-                raise ValueError(
														
 
															-                        'Exactly 2 terms need to be bound. Got {}'.format(
														
 
															-                            len(bound_terms)))
														
 
															-
														
 
															-            # Establish lookup ranking.
														
 
															-            luc = None
														
 
															-            for k_label in self._lookup_rank:
														
 
															-                if k_label in bound_terms.keys():
														
 
															-                    # First match is lookup term.
														
 
															-                    if not luc:
														
 
															-                        v_label = 'spo'.replace(k_label, '')
														
 
															-                        # Lookup database key (cursor) name
														
 
															-                        luc = k_label + ':' + v_label
														
 
															-                        term_order = self._lookup_ordering[luc]
														
 
															-                        # Term to look up
														
 
															-                        luk = self._to_key(bound_terms[k_label])
														
 
															-                        if not luk:
														
 
															-                            return iter(())
														
 
															-                        # Position of key in final triple.
														
 
															-                    # Second match is the filter.
														
 
															-                    else:
														
 
															-                        # Filter key (position of sub-key in lookup results)
														
 
															-                        fpos = v_label.index(k_label)
														
 
															-                        # Fliter term
														
 
															-                        ft = self._to_key(bound_terms[k_label])
														
 
															-                        if not ft:
														
 
															-                            return iter(())
														
 
															-                        break
														
 
															-
														
 
															-            # Look up in index.
														
 
															-            with self.cur(luc) as cur:
														
 
															-                if cur.set_key(luk):
														
 
															-                    # Iterate over matches and filter by second term.
														
 
															-                    for match in cur.iternext_dup():
														
 
															-                        subkeys = bytes(match).split(self.SEP_BYTE)
														
 
															-                        flt_subkey = subkeys[fpos]
														
 
															-                        if flt_subkey == ft:
														
 
															-                            # Remainder (not filter) key used to complete the
														
 
															-                            # triple.
														
 
															-                            r_subkey = subkeys[1-fpos]
														
 
															-
														
 
															-                            # Compose result.
														
 
															-                            out = [None, None, None]
														
 
															-                            out[term_order[0]] = luk
														
 
															-                            out[term_order[fpos+1]] = flt_subkey
														
 
															-                            out[term_order[2-fpos]] = r_subkey
														
 
															-
														
 
															-                            yield self.SEP_BYTE.join(out)
														
 
															+    def _lookup(self, triple_pattern):
														
 
															+        '''
														
 
															+        Look up triples in the indices based on a triple pattern.
														
 
															+        @return iterator of matching triple keys.
														
 
															+        '''
														
 
															         s, p, o = triple_pattern
														
 
															         if s is not None:
														
@@ -1032,26 +957,26 @@ class LmdbStore(Store):
 
															                             return iter(())
														
 
															                 # s p ?
														
 
															                 else:
														
 
															-                    yield from lookup_2bound({'s': s, 'p': p})
														
 
															+                    yield from self._lookup_2bound({'s': s, 'p': p})
														
 
															             else:
														
 
															                 # s ? o
														
 
															                 if o is not None:
														
 
															-                    yield from lookup_2bound({'s': s, 'o': o})
														
 
															+                    yield from self._lookup_2bound({'s': s, 'o': o})
														
 
															                 # s ? ?
														
 
															                 else:
														
 
															-                    yield from lookup_1bound('s', s)
														
 
															+                    yield from self._lookup_1bound('s', s)
														
 
															         else:
														
 
															             if p is not None:
														
 
															                 # ? p o
														
 
															                 if o is not None:
														
 
															-                    yield from lookup_2bound({'p': p, 'o': o})
														
 
															+                    yield from self._lookup_2bound({'p': p, 'o': o})
														
 
															                 # ? p ?
														
 
															                 else:
														
 
															-                    yield from lookup_1bound('p', p)
														
 
															+                    yield from self._lookup_1bound('p', p)
														
 
															             else:
														
 
															                 # ? ? o
														
 
															                 if o is not None:
														
 
															-                    yield from lookup_1bound('o', o)
														
 
															+                    yield from self._lookup_1bound('o', o)
														
 
															                 # ? ? ?
														
 
															                 else:
														
 
															                     # Get all triples in the database.
														
@@ -1059,6 +984,92 @@ class LmdbStore(Store):
 
															                         yield from cur.iternext_nodup()
														
 
															+    def _lookup_1bound(self, label, term):
														
 
															+        '''
														
 
															+        Lookup triples for a pattern with one bound term.
														
 
															+
														
 
															+        @TODO This can be called millions of times in a larger SPARQL
														
 
															+        query, so it better be as efficient as it gets.
														
 
															+        '''
														
 
															+        #import pdb; pdb.set_trace()
														
 
															+        k = self._to_key(term)
														
 
															+        if not k:
														
 
															+            return iter(())
														
 
															+        idx_name = '{}:{}'.format(label, 'spo'.replace(label, ''))
														
 
															+        term_order = self._lookup_ordering[idx_name]
														
 
															+        with self.cur(idx_name) as cur:
														
 
															+            if cur.set_key(k):
														
 
															+                for match in cur.iternext_dup():
														
 
															+                    subkeys = bytes(match).split(self.SEP_BYTE)
														
 
															+
														
 
															+                    # Compose result.
														
 
															+                    out = [None, None, None]
														
 
															+                    out[term_order[0]] = k
														
 
															+                    out[term_order[1]] = subkeys[0]
														
 
															+                    out[term_order[2]] = subkeys[1]
														
 
															+
														
 
															+                    yield self.SEP_BYTE.join(out)
														
 
															+
														
 
															+
														
 
															+    def _lookup_2bound(self, bound_terms):
														
 
															+        '''
														
 
															+        Look up triples for a pattern with two bound terms.
														
 
															+
														
 
															+        @param bound terms (dict) Triple labels and terms to search for,
														
 
															+        in the format of, e.g. {'s': URIRef('urn:s:1'), 'o':
														
 
															+        URIRef('urn:o:1')}
														
 
															+        '''
														
 
															+        #import pdb; pdb.set_trace()
														
 
															+        if len(bound_terms) != 2:
														
 
															+            raise ValueError(
														
 
															+                    'Exactly 2 terms need to be bound. Got {}'.format(
														
 
															+                        len(bound_terms)))
														
 
															+
														
 
															+        # Establish lookup ranking.
														
 
															+        luc = None
														
 
															+        for k_label in self._lookup_rank:
														
 
															+            if k_label in bound_terms.keys():
														
 
															+                # First match is lookup term.
														
 
															+                if not luc:
														
 
															+                    v_label = 'spo'.replace(k_label, '')
														
 
															+                    # Lookup database key (cursor) name
														
 
															+                    luc = k_label + ':' + v_label
														
 
															+                    term_order = self._lookup_ordering[luc]
														
 
															+                    # Term to look up
														
 
															+                    luk = self._to_key(bound_terms[k_label])
														
 
															+                    if not luk:
														
 
															+                        return iter(())
														
 
															+                    # Position of key in final triple.
														
 
															+                # Second match is the filter.
														
 
															+                else:
														
 
															+                    # Filter key (position of sub-key in lookup results)
														
 
															+                    fpos = v_label.index(k_label)
														
 
															+                    # Fliter term
														
 
															+                    ft = self._to_key(bound_terms[k_label])
														
 
															+                    if not ft:
														
 
															+                        return iter(())
														
 
															+                    break
														
 
															+
														
 
															+        # Look up in index.
														
 
															+        with self.cur(luc) as cur:
														
 
															+            if cur.set_key(luk):
														
 
															+                # Iterate over matches and filter by second term.
														
 
															+                for match in cur.iternext_dup():
														
 
															+                    subkeys = bytes(match).split(self.SEP_BYTE)
														
 
															+                    flt_subkey = subkeys[fpos]
														
 
															+                    if flt_subkey == ft:
														
 
															+                        # Remainder (not filter) key used to complete the
														
 
															+                        # triple.
														
 
															+                        r_subkey = subkeys[1-fpos]
														
 
															+
														
 
															+                        # Compose result.
														
 
															+                        out = [None, None, None]
														
 
															+                        out[term_order[0]] = luk
														
 
															+                        out[term_order[fpos+1]] = flt_subkey
														
 
															+                        out[term_order[2-fpos]] = r_subkey
														
 
															+
														
 
															+                        yield self.SEP_BYTE.join(out)
														
 
															+
														
 
															     def _append(self, cur, values, **kwargs):
														
 
															         '''
														
 
															         Append one or more values to the end of a database.
														
--- a/lakesuperior/store_layouts/ldp_rs/rsrc_centric_layout.py
+++ b/lakesuperior/store_layouts/ldp_rs/rsrc_centric_layout.py
@@ -183,7 +183,7 @@ class RsrcCentricLayout:
 
															                 self.ds.update(f.read())
														
 
															-    def get_raw(self, uri, ctx):
														
 
															+    def get_raw(self, uri, ctx=None):
														
 
															         '''
														
 
															         Get a raw graph of a non-LDP resource.
														
@@ -195,18 +195,7 @@ class RsrcCentricLayout:
 
															         return rdflib.Graph
														
 
															         '''
														
 
															-        bindings = {'s': uri}
														
 
															-        if ctx:
														
 
															-            bindings['g'] = ctx
														
 
															-
														
 
															-        qry = '''
														
 
															-        CONSTRUCT { ?s ?p ?o . } {
														
 
															-          GRAPH ?g {
														
 
															-            ?s ?p ?o .
														
 
															-          }
														
 
															-        }'''
														
 
															-
														
 
															-        return self._parse_construct(qry, init_bindings=bindings)
														
 
															+        return self.store.triples((nsc['fcres'][uid], None, None), ctx)
														
 
															     def count_rsrc(self):
														
--- a/tests/store/test_lmdb_store.py
+++ b/tests/store/test_lmdb_store.py
@@ -3,6 +3,7 @@ import pytest
 
															 from shutil import rmtree
														
 
															 from rdflib import Namespace, URIRef
														
 
															+from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
														
 
															 from rdflib.namespace import RDF, RDFS
														
 
															 from lakesuperior.store_layouts.ldp_rs.lmdb_store import LmdbStore, TxnManager
														
@@ -269,25 +270,25 @@ class TestContext:
 
															         with TxnManager(store, True) as txn:
														
 
															             store.add(trp1, gr_uri)
														
 
															             store.add(trp2, gr_uri)
														
 
															-            store.add(trp2, store.DEFAULT_GRAPH_URI)
														
 
															+            store.add(trp2, None)
														
 
															             store.add(trp3, gr2_uri)
														
 
															             store.add(trp3)
														
 
															             assert len(set(store.triples((None, None, None)))) == 3
														
 
															             assert len(set(store.triples((None, None, None),
														
 
															-                store.DEFAULT_GRAPH_URI))) == 2
														
 
															+                RDFLIB_DEFAULT_GRAPH_URI))) == 2
														
 
															             assert len(set(store.triples((None, None, None), gr_uri))) == 2
														
 
															             assert len(set(store.triples((None, None, None), gr2_uri))) == 1
														
 
															             assert gr2_uri in {gr.identifier for gr in store.contexts()}
														
 
															             assert trp1 in _clean(store.triples((None, None, None)))
														
 
															             assert trp1 not in _clean(store.triples((None, None, None),
														
 
															-                    store.DEFAULT_GRAPH_URI))
														
 
															+                    RDFLIB_DEFAULT_GRAPH_URI))
														
 
															             assert trp2 in _clean(store.triples((None, None, None), gr_uri))
														
 
															             assert trp2 in _clean(store.triples((None, None, None)))
														
 
															             assert trp3 in _clean(store.triples((None, None, None), gr2_uri))
														
 
															             assert trp3 in _clean(store.triples((None, None, None),
														
 
															-                    store.DEFAULT_GRAPH_URI))
														
 
															+                    RDFLIB_DEFAULT_GRAPH_URI))
														
 
															     #def test_delete_from_ctx(self, store):