Browse Source

Place lookup_1bound and lookup_2bound in main class; merge default graph.

Stefano Cossu 7 years ago
parent
commit
3844bbfad1

+ 126 - 115
lakesuperior/store_layouts/ldp_rs/lmdb_store.py

@@ -12,8 +12,9 @@ from urllib.request import pathname2url
 
 
 import lmdb
 import lmdb
 
 
-from rdflib.store import Store, VALID_STORE, NO_STORE
 from rdflib import Graph, Namespace, URIRef, Variable
 from rdflib import Graph, Namespace, URIRef, Variable
+from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
+from rdflib.store import Store, VALID_STORE, NO_STORE
 
 
 
 
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
@@ -175,7 +176,7 @@ class LmdbStore(Store):
     - t:st (term key: serialized term; 1:1)
     - t:st (term key: serialized term; 1:1)
     - spo:c (joined S, P, O keys: context key; dupsort, dupfixed)
     - spo:c (joined S, P, O keys: context key; dupsort, dupfixed)
     - c: (context keys only, values are the empty bytestring; 1:1)
     - c: (context keys only, values are the empty bytestring; 1:1)
-    - pfx:ns (prefix: pickled namespace; unique)
+    - pfx:ns (prefix: pickled namespace; 1:1)
 
 
     And 6 indices to optimize lookup for all possible bound/unbound term
     And 6 indices to optimize lookup for all possible bound/unbound term
     combination in a triple:
     combination in a triple:
@@ -185,7 +186,7 @@ class LmdbStore(Store):
     - p:so (P key: joined S, O keys; dupsort, dupfixed)
     - p:so (P key: joined S, O keys; dupsort, dupfixed)
     - o:sp (O key: joined S, P keys; dupsort, dupfixed)
     - o:sp (O key: joined S, P keys; dupsort, dupfixed)
     - c:spo (context → triple association; dupsort, dupfixed)
     - c:spo (context → triple association; dupsort, dupfixed)
-    - ns:pfx (pickled namespace: prefix; unique)
+    - ns:pfx (pickled namespace: prefix; 1:1)
 
 
     These two data sets are stored in separate environments, i.e. separate
     These two data sets are stored in separate environments, i.e. separate
     files in the filesystem. The index could be recreated from the main data
     files in the filesystem. The index could be recreated from the main data
@@ -213,8 +214,6 @@ class LmdbStore(Store):
     '''Separator byte. Used to join and split individual term keys.'''
     '''Separator byte. Used to join and split individual term keys.'''
     SEP_BYTE = b'\x00'
     SEP_BYTE = b'\x00'
 
 
-    DEFAULT_GRAPH_URI = URIRef('urn:fcrepo:default_graph')
-
     KEY_LENGTH = 5 # Max key length for terms. That allows for A LOT of terms.
     KEY_LENGTH = 5 # Max key length for terms. That allows for A LOT of terms.
     KEY_START = 2 # \x00 is reserved as a separator. \x01 is spare.
     KEY_START = 2 # \x00 is reserved as a separator. \x01 is spare.
 
 
@@ -299,11 +298,11 @@ class LmdbStore(Store):
     def __len__(self, context=None):
     def __len__(self, context=None):
         '''
         '''
         Return length of the dataset.
         Return length of the dataset.
+
+        @param context (rdflib.URIRef | rdflib.Graph) Context to restrict count
+        to.
         '''
         '''
-        if context == self:
-            context = None
-        if isinstance(context, Graph):
-            context = context.identifier
+        context = self._normalize_context(context)
 
 
         if context is not None:
         if context is not None:
             #dataset = self.triples((None, None, None), context)
             #dataset = self.triples((None, None, None), context)
@@ -348,7 +347,7 @@ class LmdbStore(Store):
         '''
         '''
         if not self.is_open:
         if not self.is_open:
             raise RuntimeError('Store must be opened first.')
             raise RuntimeError('Store must be opened first.')
-        logger.info('Beginning a {} transaction.'.format(
+        logger.debug('Beginning a {} transaction.'.format(
             'read/write' if write else 'read-only'))
             'read/write' if write else 'read-only'))
 
 
         self.data_txn = self.data_env.begin(buffers=True, write=write)
         self.data_txn = self.data_env.begin(buffers=True, write=write)
@@ -480,17 +479,16 @@ class LmdbStore(Store):
         @param quoted (bool) Not used.
         @param quoted (bool) Not used.
         '''
         '''
         #import pdb; pdb.set_trace()
         #import pdb; pdb.set_trace()
-        assert context != self, "Cannot add triple directly to store"
+        context = self._normalize_context(context)
+        if context is None:
+            context = RDFLIB_DEFAULT_GRAPH_URI
+
         Store.add(self, triple, context)
         Store.add(self, triple, context)
 
 
         #logger.info('Adding triple: {}'.format(triple))
         #logger.info('Adding triple: {}'.format(triple))
-        if context is None:
-            context = Graph(identifier=self.DEFAULT_GRAPH_URI)
         pk_trp = self._pickle(triple)
         pk_trp = self._pickle(triple)
 
 
         pk_s, pk_p, pk_o = [self._pickle(t) for t in triple]
         pk_s, pk_p, pk_o = [self._pickle(t) for t in triple]
-        if isinstance(context, Graph):
-            context = context.identifier
         #logger.debug('Adding quad: {} {}'.format(triple, context))
         #logger.debug('Adding quad: {} {}'.format(triple, context))
         pk_c = self._pickle(context)
         pk_c = self._pickle(context)
 
 
@@ -532,9 +530,8 @@ class LmdbStore(Store):
         '''
         '''
         #logger.debug('Removing triples by pattern: {} on context: {}'.format(
         #logger.debug('Removing triples by pattern: {} on context: {}'.format(
         #    triple_pattern, context))
         #    triple_pattern, context))
+        context = self._normalize_context(context)
         if context is not None:
         if context is not None:
-            if isinstance(context, Graph):
-                context = context.identifier
             ck = self._to_key(context)
             ck = self._to_key(context)
             # If context is specified but not found, return to avoid deleting
             # If context is specified but not found, return to avoid deleting
             # the wrong triples.
             # the wrong triples.
@@ -585,12 +582,8 @@ class LmdbStore(Store):
         #    triple_pattern, context))
         #    triple_pattern, context))
         # This sounds strange, RDFLib should be passing None at this point,
         # This sounds strange, RDFLib should be passing None at this point,
         # but anyway...
         # but anyway...
-        if isinstance(context, Graph) and isinstance(
-                context.identifier, Variable):
-            context = None
-        if isinstance(context, Graph):
-            context = context.identifier
-            #logger.debug('Converted graph into URI: {}'.format(context))
+        context = self._normalize_context(context)
+
         with self.cur('spo:c') as cur:
         with self.cur('spo:c') as cur:
             for spok in self._triple_keys(triple_pattern, context):
             for spok in self._triple_keys(triple_pattern, context):
                 if context is not None:
                 if context is not None:
@@ -825,10 +818,9 @@ class LmdbStore(Store):
 
 
                 # Regular lookup.
                 # Regular lookup.
                 else:
                 else:
-                    for spok in self._lookup(triple_pattern):
-                        if cur.set_key_dup(ck, spok):
-                            yield spok
-                    return
+                    yield from (
+                            spok for spok in self._lookup(triple_pattern)
+                            if cur.set_key_dup(ck, spok))
         else:
         else:
             yield from self._lookup(triple_pattern)
             yield from self._lookup(triple_pattern)
 
 
@@ -928,95 +920,28 @@ class LmdbStore(Store):
         return hashlib.new(self.KEY_HASH_ALGO, s).digest()
         return hashlib.new(self.KEY_HASH_ALGO, s).digest()
 
 
 
 
-    def _lookup(self, triple_pattern):
+    def _normalize_context(self, context):
         '''
         '''
-        Look up triples in the indices based on a triple pattern.
+        Normalize a context parameter to conform to the model expectations.
 
 
-        @return iterator of matching triple keys.
+        @param context (URIRef | Graph | None) Context URI or graph.
         '''
         '''
-        def lookup_1bound(label, term):
-            '''
-            Lookup triples for a pattern with one bound term.
-            '''
-            #import pdb; pdb.set_trace()
-            k = self._to_key(term)
-            if not k:
-                return iter(())
-            idx_name = label + ':' + 'spo'.replace(label, '')
-            term_order = self._lookup_ordering[idx_name]
-            with self.cur(idx_name) as cur:
-                if cur.set_key(k):
-                    for match in cur.iternext_dup():
-                        subkeys = bytes(match).split(self.SEP_BYTE)
-
-                        # Compose result.
-                        out = [None, None, None]
-                        out[term_order[0]] = k
-                        out[term_order[1]] = subkeys[0]
-                        out[term_order[2]] = subkeys[1]
+        if isinstance(context, Graph):
+            if context == self or isinstance(context.identifier, Variable):
+                context = None
+            else:
+                context = context.identifier
+                #logger.debug('Converted graph into URI: {}'.format(context))
 
 
-                        yield self.SEP_BYTE.join(out)
+        return context
 
 
 
 
-        def lookup_2bound(bound_terms):
-            '''
-            Look up triples for a pattern with two bound terms.
-
-            @param bound terms (dict) Triple labels and terms to search for,
-            in the format of, e.g. {'s': URIRef('urn:s:1'), 'o':
-            URIRef('urn:o:1')}
-            '''
-            #import pdb; pdb.set_trace()
-            if len(bound_terms) != 2:
-                raise ValueError(
-                        'Exactly 2 terms need to be bound. Got {}'.format(
-                            len(bound_terms)))
-
-            # Establish lookup ranking.
-            luc = None
-            for k_label in self._lookup_rank:
-                if k_label in bound_terms.keys():
-                    # First match is lookup term.
-                    if not luc:
-                        v_label = 'spo'.replace(k_label, '')
-                        # Lookup database key (cursor) name
-                        luc = k_label + ':' + v_label
-                        term_order = self._lookup_ordering[luc]
-                        # Term to look up
-                        luk = self._to_key(bound_terms[k_label])
-                        if not luk:
-                            return iter(())
-                        # Position of key in final triple.
-                    # Second match is the filter.
-                    else:
-                        # Filter key (position of sub-key in lookup results)
-                        fpos = v_label.index(k_label)
-                        # Fliter term
-                        ft = self._to_key(bound_terms[k_label])
-                        if not ft:
-                            return iter(())
-                        break
-
-            # Look up in index.
-            with self.cur(luc) as cur:
-                if cur.set_key(luk):
-                    # Iterate over matches and filter by second term.
-                    for match in cur.iternext_dup():
-                        subkeys = bytes(match).split(self.SEP_BYTE)
-                        flt_subkey = subkeys[fpos]
-                        if flt_subkey == ft:
-                            # Remainder (not filter) key used to complete the
-                            # triple.
-                            r_subkey = subkeys[1-fpos]
-
-                            # Compose result.
-                            out = [None, None, None]
-                            out[term_order[0]] = luk
-                            out[term_order[fpos+1]] = flt_subkey
-                            out[term_order[2-fpos]] = r_subkey
-
-                            yield self.SEP_BYTE.join(out)
+    def _lookup(self, triple_pattern):
+        '''
+        Look up triples in the indices based on a triple pattern.
 
 
+        @return iterator of matching triple keys.
+        '''
         s, p, o = triple_pattern
         s, p, o = triple_pattern
 
 
         if s is not None:
         if s is not None:
@@ -1032,26 +957,26 @@ class LmdbStore(Store):
                             return iter(())
                             return iter(())
                 # s p ?
                 # s p ?
                 else:
                 else:
-                    yield from lookup_2bound({'s': s, 'p': p})
+                    yield from self._lookup_2bound({'s': s, 'p': p})
             else:
             else:
                 # s ? o
                 # s ? o
                 if o is not None:
                 if o is not None:
-                    yield from lookup_2bound({'s': s, 'o': o})
+                    yield from self._lookup_2bound({'s': s, 'o': o})
                 # s ? ?
                 # s ? ?
                 else:
                 else:
-                    yield from lookup_1bound('s', s)
+                    yield from self._lookup_1bound('s', s)
         else:
         else:
             if p is not None:
             if p is not None:
                 # ? p o
                 # ? p o
                 if o is not None:
                 if o is not None:
-                    yield from lookup_2bound({'p': p, 'o': o})
+                    yield from self._lookup_2bound({'p': p, 'o': o})
                 # ? p ?
                 # ? p ?
                 else:
                 else:
-                    yield from lookup_1bound('p', p)
+                    yield from self._lookup_1bound('p', p)
             else:
             else:
                 # ? ? o
                 # ? ? o
                 if o is not None:
                 if o is not None:
-                    yield from lookup_1bound('o', o)
+                    yield from self._lookup_1bound('o', o)
                 # ? ? ?
                 # ? ? ?
                 else:
                 else:
                     # Get all triples in the database.
                     # Get all triples in the database.
@@ -1059,6 +984,92 @@ class LmdbStore(Store):
                         yield from cur.iternext_nodup()
                         yield from cur.iternext_nodup()
 
 
 
 
+    def _lookup_1bound(self, label, term):
+        '''
+        Lookup triples for a pattern with one bound term.
+
+        @TODO This can be called millions of times in a larger SPARQL
+        query, so it better be as efficient as it gets.
+        '''
+        #import pdb; pdb.set_trace()
+        k = self._to_key(term)
+        if not k:
+            return iter(())
+        idx_name = '{}:{}'.format(label, 'spo'.replace(label, ''))
+        term_order = self._lookup_ordering[idx_name]
+        with self.cur(idx_name) as cur:
+            if cur.set_key(k):
+                for match in cur.iternext_dup():
+                    subkeys = bytes(match).split(self.SEP_BYTE)
+
+                    # Compose result.
+                    out = [None, None, None]
+                    out[term_order[0]] = k
+                    out[term_order[1]] = subkeys[0]
+                    out[term_order[2]] = subkeys[1]
+
+                    yield self.SEP_BYTE.join(out)
+
+
+    def _lookup_2bound(self, bound_terms):
+        '''
+        Look up triples for a pattern with two bound terms.
+
+        @param bound terms (dict) Triple labels and terms to search for,
+        in the format of, e.g. {'s': URIRef('urn:s:1'), 'o':
+        URIRef('urn:o:1')}
+        '''
+        #import pdb; pdb.set_trace()
+        if len(bound_terms) != 2:
+            raise ValueError(
+                    'Exactly 2 terms need to be bound. Got {}'.format(
+                        len(bound_terms)))
+
+        # Establish lookup ranking.
+        luc = None
+        for k_label in self._lookup_rank:
+            if k_label in bound_terms.keys():
+                # First match is lookup term.
+                if not luc:
+                    v_label = 'spo'.replace(k_label, '')
+                    # Lookup database key (cursor) name
+                    luc = k_label + ':' + v_label
+                    term_order = self._lookup_ordering[luc]
+                    # Term to look up
+                    luk = self._to_key(bound_terms[k_label])
+                    if not luk:
+                        return iter(())
+                    # Position of key in final triple.
+                # Second match is the filter.
+                else:
+                    # Filter key (position of sub-key in lookup results)
+                    fpos = v_label.index(k_label)
+                    # Fliter term
+                    ft = self._to_key(bound_terms[k_label])
+                    if not ft:
+                        return iter(())
+                    break
+
+        # Look up in index.
+        with self.cur(luc) as cur:
+            if cur.set_key(luk):
+                # Iterate over matches and filter by second term.
+                for match in cur.iternext_dup():
+                    subkeys = bytes(match).split(self.SEP_BYTE)
+                    flt_subkey = subkeys[fpos]
+                    if flt_subkey == ft:
+                        # Remainder (not filter) key used to complete the
+                        # triple.
+                        r_subkey = subkeys[1-fpos]
+
+                        # Compose result.
+                        out = [None, None, None]
+                        out[term_order[0]] = luk
+                        out[term_order[fpos+1]] = flt_subkey
+                        out[term_order[2-fpos]] = r_subkey
+
+                        yield self.SEP_BYTE.join(out)
+
     def _append(self, cur, values, **kwargs):
     def _append(self, cur, values, **kwargs):
         '''
         '''
         Append one or more values to the end of a database.
         Append one or more values to the end of a database.

+ 2 - 13
lakesuperior/store_layouts/ldp_rs/rsrc_centric_layout.py

@@ -183,7 +183,7 @@ class RsrcCentricLayout:
                 self.ds.update(f.read())
                 self.ds.update(f.read())
 
 
 
 
-    def get_raw(self, uri, ctx):
+    def get_raw(self, uri, ctx=None):
         '''
         '''
         Get a raw graph of a non-LDP resource.
         Get a raw graph of a non-LDP resource.
 
 
@@ -195,18 +195,7 @@ class RsrcCentricLayout:
 
 
         return rdflib.Graph
         return rdflib.Graph
         '''
         '''
-        bindings = {'s': uri}
-        if ctx:
-            bindings['g'] = ctx
-
-        qry = '''
-        CONSTRUCT { ?s ?p ?o . } {
-          GRAPH ?g {
-            ?s ?p ?o .
-          }
-        }'''
-
-        return self._parse_construct(qry, init_bindings=bindings)
+        return self.store.triples((nsc['fcres'][uid], None, None), ctx)
 
 
 
 
     def count_rsrc(self):
     def count_rsrc(self):

+ 5 - 4
tests/store/test_lmdb_store.py

@@ -3,6 +3,7 @@ import pytest
 from shutil import rmtree
 from shutil import rmtree
 
 
 from rdflib import Namespace, URIRef
 from rdflib import Namespace, URIRef
+from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
 from rdflib.namespace import RDF, RDFS
 from rdflib.namespace import RDF, RDFS
 
 
 from lakesuperior.store_layouts.ldp_rs.lmdb_store import LmdbStore, TxnManager
 from lakesuperior.store_layouts.ldp_rs.lmdb_store import LmdbStore, TxnManager
@@ -269,25 +270,25 @@ class TestContext:
         with TxnManager(store, True) as txn:
         with TxnManager(store, True) as txn:
             store.add(trp1, gr_uri)
             store.add(trp1, gr_uri)
             store.add(trp2, gr_uri)
             store.add(trp2, gr_uri)
-            store.add(trp2, store.DEFAULT_GRAPH_URI)
+            store.add(trp2, None)
             store.add(trp3, gr2_uri)
             store.add(trp3, gr2_uri)
             store.add(trp3)
             store.add(trp3)
 
 
             assert len(set(store.triples((None, None, None)))) == 3
             assert len(set(store.triples((None, None, None)))) == 3
             assert len(set(store.triples((None, None, None),
             assert len(set(store.triples((None, None, None),
-                store.DEFAULT_GRAPH_URI))) == 2
+                RDFLIB_DEFAULT_GRAPH_URI))) == 2
             assert len(set(store.triples((None, None, None), gr_uri))) == 2
             assert len(set(store.triples((None, None, None), gr_uri))) == 2
             assert len(set(store.triples((None, None, None), gr2_uri))) == 1
             assert len(set(store.triples((None, None, None), gr2_uri))) == 1
 
 
             assert gr2_uri in {gr.identifier for gr in store.contexts()}
             assert gr2_uri in {gr.identifier for gr in store.contexts()}
             assert trp1 in _clean(store.triples((None, None, None)))
             assert trp1 in _clean(store.triples((None, None, None)))
             assert trp1 not in _clean(store.triples((None, None, None),
             assert trp1 not in _clean(store.triples((None, None, None),
-                    store.DEFAULT_GRAPH_URI))
+                    RDFLIB_DEFAULT_GRAPH_URI))
             assert trp2 in _clean(store.triples((None, None, None), gr_uri))
             assert trp2 in _clean(store.triples((None, None, None), gr_uri))
             assert trp2 in _clean(store.triples((None, None, None)))
             assert trp2 in _clean(store.triples((None, None, None)))
             assert trp3 in _clean(store.triples((None, None, None), gr2_uri))
             assert trp3 in _clean(store.triples((None, None, None), gr2_uri))
             assert trp3 in _clean(store.triples((None, None, None),
             assert trp3 in _clean(store.triples((None, None, None),
-                    store.DEFAULT_GRAPH_URI))
+                    RDFLIB_DEFAULT_GRAPH_URI))
 
 
 
 
     #def test_delete_from_ctx(self, store):
     #def test_delete_from_ctx(self, store):