7 years ago · d900a13a3f
--- a/doc/notes/performance.txt
+++ b/doc/notes/performance.txt
@@ -31,8 +31,13 @@ Modified Duchamp VIAF dataset (343 triples; changed all subjects to `<>`)
 
				 
			
 
				 Retrieval of parent resource (11400 triples), pipe to /dev/null: 3.6"
			
 
				 
			
 
				+Database size: 1.2 Gb
			
 
				+
			
 
				+
			
 
				 ## LMDB Back End Test
			
 
				 
			
 
				+### Strategy #4
			
 
				+
			
 
				 10K PUTs to new resources under the same container:
			
 
				 
			
 
				 ~29' running time
			
@@ -46,3 +51,19 @@ Database size: 633 Mb
 
				 
			
 
				 Retrieval of parent resource (11400 triples), pipe to /dev/null: 3.48"
			
 
				 
			
 
				+
			
 
				+### Strategy #5
			
 
				+
			
 
				+10K PUTs to new resources under the same container:
			
 
				+
			
 
				+29' running time
			
 
				+0.176" per resource
			
 
				+3.4M triples total in repo at the end of the process
			
 
				+
			
 
				+Less gaps than strategy #4, however overall timing is almost identical. The
			
 
				+blocker seems to be somewhere else.
			
 
				+
			
 
				+Database size: 422 Mb
			
 
				+
			
 
				+Retrieval of parent resource (11400 triples), pipe to /dev/null: 7.5"
			
 
				+
			
--- a/lakesuperior/store_layouts/ldp_rs/lmdb_store.py
+++ b/lakesuperior/store_layouts/ldp_rs/lmdb_store.py
@@ -235,8 +235,8 @@ class LmdbStore(Store):
 
				         'ns:pfx',
			
 
				         # Term hash to triple key: 1:1
			
 
				         'th:t',
			
 
				-        # Lookups for one known term: 1:m, fixed-length values
			
 
				-        's:spo', 'p:spo', 'o:spo', 'c:spo',
			
 
				+        # Lookups: 1:m, fixed-length values
			
 
				+        's:po', 'p:so', 'o:sp', 'c:spo',
			
 
				     )
			
 
				 
			
 
				     '''
			
@@ -249,6 +249,16 @@ class LmdbStore(Store):
 
				     '''
			
 
				     _lookup_rank = ('s', 'o', 'p')
			
 
				 
			
 
				+    '''
			
 
				+    Order of terms in the lookup indices. Used to rebuild a triple from lookup.
			
 
				+    '''
			
 
				+    _lookup_ordering = {
			
 
				+        's:po': (0, 1, 2),
			
 
				+        'p:so': (1, 0, 2),
			
 
				+        'o:sp': (2, 0, 1),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				     data_env = None
			
 
				     idx_env = None
			
 
				     db = None
			
@@ -515,6 +525,7 @@ class LmdbStore(Store):
 
				                     if cur.set_key(trp_key):
			
 
				                         cur.delete(dupdata=True)
			
 
				 
			
 
				+            #import pdb; pdb.set_trace()
			
 
				             self._index('remove', trp_key, ck)
			
 
				 
			
 
				 
			
@@ -801,7 +812,7 @@ class LmdbStore(Store):
 
				         self.data_env = lmdb.open(path + '/main', subdir=False, create=create,
			
 
				                 map_size=self.MAP_SIZE, max_dbs=4, readahead=False)
			
 
				         self.idx_env = lmdb.open(path + '/index', subdir=False, create=create,
			
 
				-                map_size=self.MAP_SIZE, max_dbs=9, readahead=False)
			
 
				+                map_size=self.MAP_SIZE, max_dbs=6, readahead=False)
			
 
				 
			
 
				         # Open and optionally create main databases.
			
 
				         self.dbs = {
			
@@ -881,40 +892,88 @@ class LmdbStore(Store):
 
				 
			
 
				         @return iterator of matching triple keys.
			
 
				         '''
			
 
				+        def lookup_1bound(label, term):
			
 
				+            '''
			
 
				+            Lookup triples for a pattern with one bound term.
			
 
				+            '''
			
 
				+            #import pdb; pdb.set_trace()
			
 
				+            k = self._to_key(term)
			
 
				+            if not k:
			
 
				+                return iter(())
			
 
				+            idx_name = label + ':' + 'spo'.replace(label, '')
			
 
				+            term_order = self._lookup_ordering[idx_name]
			
 
				+            with self.cur(idx_name) as cur:
			
 
				+                if cur.set_key(k):
			
 
				+                    for match in cur.iternext_dup():
			
 
				+                        subkeys = bytes(match).split(self.SEP_BYTE)
			
 
				+
			
 
				+                        # Compose result.
			
 
				+                        out = [None, None, None]
			
 
				+                        out[term_order[0]] = k
			
 
				+                        out[term_order[1]] = subkeys[0]
			
 
				+                        out[term_order[2]] = subkeys[1]
			
 
				+
			
 
				+                        yield self.SEP_BYTE.join(out)
			
 
				+
			
 
				+
			
 
				         def lookup_2bound(bound_terms):
			
 
				             '''
			
 
				             Look up triples for a pattern with two bound terms.
			
 
				+
			
 
				+            @param bound terms (dict) Triple labels and terms to search for,
			
 
				+            in the format of, e.g. {'s': URIRef('urn:s:1'), 'o':
			
 
				+            URIRef('urn:o:1')}
			
 
				             '''
			
 
				-            if not len(bound_terms) == 2:
			
 
				+            #import pdb; pdb.set_trace()
			
 
				+            if len(bound_terms) != 2:
			
 
				                 raise ValueError(
			
 
				                         'Exactly 2 terms need to be bound. Got {}'.format(
			
 
				                             len(bound_terms)))
			
 
				 
			
 
				             # Establish lookup ranking.
			
 
				-            luk = None
			
 
				-            for k in self._lookup_rank:
			
 
				-                if k in bound_terms.keys():
			
 
				+            luc = None
			
 
				+            for k_label in self._lookup_rank:
			
 
				+                if k_label in bound_terms.keys():
			
 
				                     # First match is lookup term.
			
 
				-                    if not luk:
			
 
				-                        # Lookup database key (cursor)
			
 
				-                        luk = k + ':spo'
			
 
				+                    if not luc:
			
 
				+                        v_label = 'spo'.replace(k_label, '')
			
 
				+                        # Lookup database key (cursor) name
			
 
				+                        luc = k_label + ':' + v_label
			
 
				+                        term_order = self._lookup_ordering[luc]
			
 
				                         # Term to look up
			
 
				-                        lut = bound_terms[k]
			
 
				+                        luk = self._to_key(bound_terms[k_label])
			
 
				+                        if not luk:
			
 
				+                            return iter(())
			
 
				+                        # Position of key in final triple.
			
 
				                     # Second match is the filter.
			
 
				                     else:
			
 
				-                        # Filter key (position in triple key)
			
 
				-                        fpos = 'spo'.index(k)
			
 
				+                        # Filter key (position of sub-key in lookup results)
			
 
				+                        fpos = v_label.index(k_label)
			
 
				                         # Fliter term
			
 
				-                        ft = bound_terms[k]
			
 
				+                        ft = self._to_key(bound_terms[k_label])
			
 
				+                        if not ft:
			
 
				+                            return iter(())
			
 
				+                        break
			
 
				 
			
 
				             # Look up in index.
			
 
				-            with self.cur(luk) as cur:
			
 
				-                if cur.set_key(lut):
			
 
				+            with self.cur(luc) as cur:
			
 
				+                if cur.set_key(luk):
			
 
				                     # Iterate over matches and filter by second term.
			
 
				                     for match in cur.iternext_dup():
			
 
				-                        subkey = bytes(match).split(self.SEP_BYTE)[fpos]
			
 
				-                        if subkey == ft:
			
 
				-                            yield match
			
 
				+                        subkeys = bytes(match).split(self.SEP_BYTE)
			
 
				+                        flt_subkey = subkeys[fpos]
			
 
				+                        if flt_subkey == ft:
			
 
				+                            # Remainder (not filter) key used to complete the
			
 
				+                            # triple.
			
 
				+                            r_subkey = subkeys[1-fpos]
			
 
				+
			
 
				+                            # Compose result.
			
 
				+                            out = [None, None, None]
			
 
				+                            out[term_order[0]] = luk
			
 
				+                            out[term_order[fpos+1]] = flt_subkey
			
 
				+                            out[term_order[2-fpos]] = r_subkey
			
 
				+
			
 
				+                            yield self.SEP_BYTE.join(out)
			
 
				 
			
 
				         s, p, o = triple_pattern
			
 
				 
			
@@ -938,9 +997,7 @@ class LmdbStore(Store):
 
				                     yield from lookup_2bound({'s': s, 'o': o})
			
 
				                 # s ? ?
			
 
				                 else:
			
 
				-                    bound_terms = [s]
			
 
				-                    cur_label = 's:po'
			
 
				-                    order = (0, 1, 2)
			
 
				+                    yield from lookup_1bound('s', s)
			
 
				         else:
			
 
				             if p is not None:
			
 
				                 # ? p o
			
@@ -948,40 +1005,16 @@ class LmdbStore(Store):
 
				                     yield from lookup_2bound({'p': p, 'o': o})
			
 
				                 # ? p ?
			
 
				                 else:
			
 
				-                    bound_terms = [p]
			
 
				-                    cur_label = 'p:so'
			
 
				-                    order = (1, 0, 2)
			
 
				+                    yield from lookup_1bound('p', p)
			
 
				             else:
			
 
				                 # ? ? o
			
 
				                 if o is not None:
			
 
				-                    bound_terms = [o]
			
 
				-                    cur_label = 'o:sp'
			
 
				-                    order = (1, 2, 0)
			
 
				+                    yield from lookup_1bound('o', o)
			
 
				                 # ? ? ?
			
 
				                 else:
			
 
				                     # Get all triples in the database.
			
 
				                     with self.cur('spo:c') as cur:
			
 
				                         yield from cur.iternext_nodup()
			
 
				-                    return
			
 
				-
			
 
				-        tkey = self._to_key(bound_terms)
			
 
				-        if not tkey:
			
 
				-            return iter(())
			
 
				-
			
 
				-        with self.cur(cur_label) as cur:
			
 
				-            if cur.set_key(tkey):
			
 
				-                # @FIXME For some reason LMDB blows up if this iterator is not
			
 
				-                # wrapped in a set. This may not be too bad because we can get
			
 
				-                # rid of duplicates here.
			
 
				-                for match in set(cur.iternext_dup()):
			
 
				-                    # Combine bound and found in search order.
			
 
				-                    comb_keys = (
			
 
				-                            bytes(tkey).split(self.SEP_BYTE)
			
 
				-                            + bytes(match).split(self.SEP_BYTE))
			
 
				-                    # Rearrange term keys according to given order.
			
 
				-                    yield self.SEP_BYTE.join([comb_keys[i] for i in order])
			
 
				-            else:
			
 
				-                return iter(())
			
 
				 
			
 
				 
			
 
				     def _append(self, cur, values, **kwargs):
			
@@ -1018,13 +1051,16 @@ class LmdbStore(Store):
 
				         # Split and rearrange-join keys for association and indices.
			
 
				         triple = bytes(spok).split(self.SEP_BYTE)
			
 
				         sk, pk, ok = triple[:3]
			
 
				+        spk = self.SEP_BYTE.join(triple[:2])
			
 
				+        sok = bytes(triple[0]) + self.SEP_BYTE + bytes(triple[2])
			
 
				+        pok = self.SEP_BYTE.join(triple[1:3])
			
 
				         spok = self.SEP_BYTE.join(triple[:3])
			
 
				 
			
 
				         # Associate cursor labels with k/v pairs.
			
 
				         curs = {
			
 
				-            's:spo': (sk, spok),
			
 
				-            'p:spo': (pk, spok),
			
 
				-            'o:spo': (ok, spok),
			
 
				+            's:po': (sk, pok),
			
 
				+            'p:so': (pk, sok),
			
 
				+            'o:sp': (ok, spk),
			
 
				             'c:spo': (ck, spok),
			
 
				         }
			
 
				 
			
--- a/lakesuperior/store_layouts/ldp_rs/rsrc_centric_layout.py
+++ b/lakesuperior/store_layouts/ldp_rs/rsrc_centric_layout.py
@@ -338,6 +338,7 @@ class RsrcCentricLayout:
 
				 
			
 
				         @param subj_uri Subject URI.
			
 
				         '''
			
 
				+        #import pdb; pdb.set_trace()
			
 
				         # Only search in non-historic graphs.
			
 
				         qry = '''
			
 
				         CONSTRUCT { ?s1 ?p1 ?s }