7 years ago · 3b6a0717ae
--- a/lakesuperior/store_layouts/ldp_rs/lmdb_store.py
+++ b/lakesuperior/store_layouts/ldp_rs/lmdb_store.py
@@ -72,19 +72,88 @@ class TxnManager(ContextDecorator):
 
															             # jobs left from other requests.
														
 
															         else:
														
 
															             self.store.commit()
														
 
															-            if len(self.store._data_queue):
														
 
															-                self.store._apply_changes()
														
 
															-            if len(self.store._idx_queue):
														
 
															-                # Ditch index data. For testing data entry only.
														
 
															-                #self.store._idx_queue = []
														
 
															-                # Synchronous.
														
 
															-                self.store._run_indexing()
														
 
															-                # Threading.
														
 
															-                #job = Thread(target=self.store._run_indexing)
														
 
															-                # Multiprocess.
														
 
															-                #job = Process(target=self.store._run_indexing)
														
 
															-                #job.start()
														
 
															-                #logger.info('Started indexing job #{}'.format(job.ident))
														
 
															+            if self.write:
														
 
															+                if len(self.store._data_queue):
														
 
															+                    self.store._apply_changes()
														
 
															+                if len(self.store._idx_queue):
														
 
															+                    # Ditch index data. For testing data entry only.
														
 
															+                    #self.store._idx_queue = []
														
 
															+                    # Synchronous.
														
 
															+                    self.store._run_indexing()
														
 
															+                    # Threading.
														
 
															+                    #job = Thread(target=self.store._run_indexing)
														
 
															+                    # Multiprocess.
														
 
															+                    #job = Process(target=self.store._run_indexing)
														
 
															+                    #job.start()
														
 
															+                    #logger.info('Started indexing job #{}'.format(job.ident))
														
 
															+
														
 
															+
														
 
															+class LexicalSequence:
														
 
															+    '''
														
 
															+    Fixed-length lexicographically ordered byte sequence.
														
 
															+
														
 
															+    Useful to generate optimized sequences of keys in LMDB.
														
 
															+    '''
														
 
															+    def __init__(self, start=1, max_len=5):
														
 
															+        '''
														
 
															+        @param start (bytes) Starting byte value. Bytes below this value are
														
 
															+        never found in this sequence. This is useful to allot special bytes
														
 
															+        to be used e.g. as separators.
														
 
															+        @param max_len (int) Maximum number of bytes that a byte string can
														
 
															+        contain. This should be chosen carefully since the number of all
														
 
															+        possible key combinations is determined by this value and the `start`
														
 
															+        value. The default args provide 255**5 (~1 Tn) unique combinations.
														
 
															+        '''
														
 
															+        self.start = start
														
 
															+        self.length = max_len
														
 
															+
														
 
															+
														
 
															+    def first(self):
														
 
															+        '''
														
 
															+        First possible combination.
														
 
															+        '''
														
 
															+        return bytearray([self.start] * self.length)
														
 
															+
														
 
															+
														
 
															+    def next(self, n):
														
 
															+        '''
														
 
															+        Calculate the next closest byte sequence in lexicographical order.
														
 
															+
														
 
															+        This is used to fill the next available slot after the last one in
														
 
															+        LMDB. Keys are byte strings, which is a convenient way to keep key
														
 
															+        lengths as small as possible when they are referenced in several
														
 
															+        indices.
														
 
															+
														
 
															+        This function assumes that all the keys are padded with the `start`
														
 
															+        value up to the `max_len` length.
														
 
															+
														
 
															+        @param n (bytes) Current byte sequence to add to.
														
 
															+        '''
														
 
															+        if not n:
														
 
															+            n = self.first()
														
 
															+        elif isinstance(n, bytes) or isinstance(n, memoryview):
														
 
															+            n = bytearray(n)
														
 
															+        elif not isinstance(n, bytearray):
														
 
															+            raise ValueError('Input sequence must be bytes or a bytearray.')
														
 
															+
														
 
															+        if not len(n) == self.length:
														
 
															+            raise ValueError('Incorrect sequence length.')
														
 
															+
														
 
															+        for i, b in list(enumerate(n))[::-1]:
														
 
															+            try:
														
 
															+                n[i] += 1
														
 
															+            # If the value exceeds 255, i.e. the current value is the last one
														
 
															+            except ValueError:
														
 
															+                if i == 0:
														
 
															+                    raise RuntimeError('BAD DAY: Sequence exhausted. No more '
														
 
															+                            'combinations are possible.')
														
 
															+                # Move one position up and try to increment that.
														
 
															+                else:
														
 
															+                    n[i] = self.start
														
 
															+                    continue
														
 
															+            else:
														
 
															+                return bytes(n)
														
 
															+
														
 
															 class LmdbStore(Store):
														
@@ -140,18 +209,36 @@ class LmdbStore(Store):
 
															     '''
														
 
															     KEY_HASH_ALGO = 'sha1'
														
 
															+    '''Separator byte. Used to join and plit individual term keys.'''
														
 
															+    SEP_BYTE = b'\x00'
														
 
															+
														
 
															     '''
														
 
															-    Whether the default graph is the union graph. At the moment only False
														
 
															-    is supported.
														
 
															+    Dummy bytestriung to associate with a "no triple" statement in the c:spo
														
 
															+    index. Used to keep track of empty graphs.
														
 
															     '''
														
 
															-    DEFAULT_UNION = False
														
 
															+    NO_TRIPLE = b'\x01' * 5
														
 
															     DEFAULT_GRAPH_URI = URIRef('urn:fcrepo:default_graph')
														
 
															-    data_keys = ('tk:c', 'tk:t', 'pfx:ns')
														
 
															+    KEY_LENGTH = 5 # Max key length for terms. That allows for A LOT of terms.
														
 
															+    KEY_START = 2 # \x00 is reserved as a separator. \x01 is spare.
														
 
															+
														
 
															+    data_keys = (
														
 
															+        # Term key to serialized term content: 1:1
														
 
															+        't:st',
														
 
															+        # Joined triple keys to context key: 1:m
														
 
															+        'spo:c',
														
 
															+    )
														
 
															     idx_keys = (
														
 
															-            'c:tk', 'sk:tk', 'pk:tk', 'ok:tk', 'spk:tk', 'sok:tk', 'pok:tk',
														
 
															-            'ns:pfx')
														
 
															+        # Namespace to prefix: 1:1
														
 
															+        'ns:pfx',
														
 
															+        # Term hash to triple key: 1:1
														
 
															+        'th:t',
														
 
															+        # Lookups for one known term: 1:m
														
 
															+        's:po', 'p:so', 'o:sp', 'c:spo',
														
 
															+        # Lookups for two known terms: 1:m
														
 
															+        'sp:o', 'so:p', 'po:s',
														
 
															+    )
														
 
															     data_env = None
														
 
															     idx_env = None
														
@@ -188,6 +275,8 @@ class LmdbStore(Store):
 
															         self._pickle = self.node_pickler.dumps
														
 
															         self._unpickle = self.node_pickler.loads
														
 
															+        self._key_seq = LexicalSequence(self.KEY_START, self.KEY_LENGTH)
														
 
															+
														
 
															     def __len__(self, context=None):
														
 
															         '''
														
@@ -198,8 +287,9 @@ class LmdbStore(Store):
 
															         if context.identifier is not self.DEFAULT_GRAPH_URI:
														
 
															             #dataset = self.triples((None, None, None), context)
														
 
															-            dataset = (tk for tk in self.curs['c:tk'].iternext_dup())
														
 
															-            return len(set(dataset))
														
 
															+            with self.cur('c:spo') as cur:
														
 
															+                dataset = set(cur.iternext_dup())
														
 
															+                return len(dataset)
														
 
															         else:
														
 
															             return self.data_txn.stat(self.dbs['tk:t'])['entries']
														
@@ -237,12 +327,10 @@ class LmdbStore(Store):
 
															             raise RuntimeError('Store must be opened first.')
														
 
															         logger.info('Beginning a {} transaction.'.format(
														
 
															             'read/write' if write else 'read-only'))
														
 
															-        self.data_txn = self.data_env.begin(buffers=True)
														
 
															-        self.idx_txn = self.idx_env.begin(buffers=True)
														
 
															-        self.is_txn_rw = write
														
 
															-        # Cursors.
														
 
															-        self.curs = self.get_data_cursors(self.data_txn)
														
 
															-        self.curs.update(self.get_idx_cursors(self.idx_txn))
														
 
															+        self.data_txn = self.data_env.begin(buffers=True, write=write)
														
 
															+        self.idx_txn = self.idx_env.begin(buffers=True, write=write)
														
 
															+
														
 
															+        self.is_txn_rw = write==True
														
 
															     @property
														
@@ -350,61 +438,121 @@ class LmdbStore(Store):
 
															         Store.add(self, triple, context)
														
 
															         #logger.info('Adding triple: {}'.format(triple))
														
 
															-        if self.DEFAULT_UNION:
														
 
															-            raise NotImplementedError()
														
 
															-            # @TODO
														
 
															-        elif context is None:
														
 
															+        if context is None:
														
 
															             context = self.DEFAULT_GRAPH_URI
														
 
															         pk_trp = self._pickle(triple)
														
 
															-        trp_key = hashlib.new(self.KEY_HASH_ALGO, pk_trp).digest()
														
 
															-
														
 
															-        needs_indexing = False
														
 
															-        with self.cur('tk:t') as cur:
														
 
															-            if not cur.set_key(trp_key):
														
 
															-                self._enqueue_action('put', 'tk:t', trp_key, pk_trp)
														
 
															-                needs_indexing = True
														
 
															-        pk_ctx = self._pickle(context.identifier) \
														
 
															+        pk_s, pk_p, pk_o = [self._pickle(t) for t in triple]
														
 
															+        pk_c = self._pickle(context.identifier) \
														
 
															                 if isinstance(context, Graph) \
														
 
															                 else self._pickle(context)
														
 
															-        with self.cur('tk:c') as cur:
														
 
															-            if not cur.set_key_dup(trp_key, pk_ctx):
														
 
															-                self._enqueue_action('put', 'tk:c', trp_key, pk_ctx)
														
 
															-                needs_indexing = True
														
 
															-        if needs_indexing:
														
 
															-            self._idx_queue.append((trp_key, pk_ctx, triple))
														
 
															+        # Add new individual terms or gather keys for existing ones.
														
 
															+        keys = [None, None, None, None]
														
 
															+        with self.cur('th:t') as idx_cur:
														
 
															+            for i, pk_t in enumerate((pk_s, pk_p, pk_o, pk_c)):
														
 
															+                thash = self._hash(pk_t)
														
 
															+                if idx_cur.set_key(thash):
														
 
															+                    keys[i] = idx_cur.value()
														
 
															+                else:
														
 
															+                    # Put new term.
														
 
															+                    with self.cur('t:st') as cur:
														
 
															+                        keys[i] = self._append(cur, (pk_t,))[0]
														
 
															+                    # Index.
														
 
															+                    idx_cur.put(thash, keys[i])
														
 
															+
														
 
															+        # Add triple:context association.
														
 
															+        ck = keys[3]
														
 
															+        spok = self.SEP_BYTE.join(keys[:3])
														
 
															+        with self.cur('spo:c') as cur:
														
 
															+            triple_exists = cur.set_key_dup(spok, ck)
														
 
															+            if not triple_exists:
														
 
															+                cur.put(spok, ck)
														
 
															+
														
 
															+        self._index('add', spok, ck)
														
 
															+
														
 
															+
														
 
															+    def _index(self, action, spok, ck=None):
														
 
															+        '''
														
 
															+        Update index for a triple and context (add or remove).
														
 
															+
														
 
															+        @param action (string) 'add' or 'remove'.
														
 
															+        @param spok (bytes) Triple key.
														
 
															+        @param ck (bytes|None) Context key. If None, all contexts found are
														
 
															+        indexed. Context MUST be specified for 'add'.
														
 
															+        '''
														
 
															+        # Split and rearrange-join keys for association and indices.
														
 
															+        triple = spok.split(self.SEP_BYTE)
														
 
															+        sk, pk, ok = triple[:3]
														
 
															+        spk = self.SEP_BYTE.join(triple[:2])
														
 
															+        sok = bytes(triple[0]) + self.SEP_BYTE + bytes(triple[2])
														
 
															+        pok = self.SEP_BYTE.join(triple[1:3])
														
 
															+        spok = self.SEP_BYTE.join(triple[:3])
														
 
															+
														
 
															+        # Associate cursor labels with k/v pairs.
														
 
															+        curs = {
														
 
															+            's:po': (sk, pok),
														
 
															+            'p:so': (pk, sok),
														
 
															+            'o:sp': (ok, spk),
														
 
															+            'sp:o': (spk, ok),
														
 
															+            'so:p': (sok, pk),
														
 
															+            'po:s': (pok, sk),
														
 
															+        }
														
 
															+
														
 
															+        # Index context association.
														
 
															+        if ck:
														
 
															+            cks = (ck,)
														
 
															+        elif action == 'remove':
														
 
															+            # Delete all contexts if none is specified.
														
 
															+            with self.cur('spo:c') as spo_cur:
														
 
															+                if spo_cur.set_key(spok):
														
 
															+                    cks = iternext_dup()
														
 
															+                    with self.cur('c:spo') as c_cur:
														
 
															+                        if c_cur.set_key_dup(ck, spok):
														
 
															+                            c_cur.delete()
														
 
															+        else:
														
 
															+            raise ValueError('Cannot run an \'add\' index without context.')
														
 
															+
														
 
															+        # Loop over contexts.
														
 
															+        for ck in cks:
														
 
															+            for clabel, terms in curs.items():
														
 
															+                with self.cur(clabel) as cur:
														
 
															+                    if action == 'remove':
														
 
															+                        if cur.set_key_dup(*terms):
														
 
															+                            cur.delete()
														
 
															+                    elif action == 'add':
														
 
															+                        cur.put(*terms)
														
 
															+                    else:
														
 
															+                        raise ValueError(
														
 
															+                                'Index action \'{}\' not supported.'
														
 
															+                                .format(action))
														
 
															     def remove(self, triple_pattern, context=None):
														
 
															         '''
														
 
															         Remove a triple and start indexing.
														
 
															         '''
														
 
															-        if self.DEFAULT_UNION:
														
 
															-            raise NotImplementedError()
														
 
															-            # @TODO
														
 
															-        elif context is None:
														
 
															-            context = self.DEFAULT_GRAPH_URI
														
 
															-
														
 
															         #import pdb; pdb.set_trace()
														
 
															-        pk_ctx = self._pickle(context.identifier) \
														
 
															-                if isinstance(context, Graph) \
														
 
															-                else self._pickle(context)
														
 
															+        if context is not None:
														
 
															+            pk_c = self._pickle(context.identifier) \
														
 
															+                    if isinstance(context, Graph) \
														
 
															+                    else self._pickle(context)
														
 
															+            ck = self._to_key(context)
														
 
															+        else:
														
 
															+            ck = None
														
 
															+
														
 
															         for trp_key in self._triple_keys(triple_pattern, context):
														
 
															             # Delete context association.
														
 
															-            with self.cur('tk:c') as cur:
														
 
															-                if cur.set_key_dup(trp_key, pk_ctx):
														
 
															-                    triple = self._key_to_triple(trp_key)
														
 
															-                    self._enqueue_action('delete', 'tk:c', trp_key, pk_ctx)
														
 
															-
														
 
															-                    # If no other contexts are associated with the triple,
														
 
															-                    # delete it.
														
 
															-                    with self.cur('tk:t') as trp_cur:
														
 
															-                        if not cur.set_key(trp_key):
														
 
															-                            self._enqueue_action(
														
 
															-                                    'delete', 'tk:c', trp_key, None)
														
 
															+            with self.cur('spo:c') as cur:
														
 
															+                if ck:
														
 
															+                    if cur.set_key_dup(trp_key, ck):
														
 
															+                        cur.delete()
														
 
															+                else:
														
 
															+                    # If no context is specified, remove all associations.
														
 
															+                    if cur.set_key(trp_key):
														
 
															+                        cur.delete(dupdata=True)
														
 
															-                    self._idx_queue.append((trp_key, pk_ctx, triple))
														
 
															+            self._index('remove', trp_key, ck)
														
 
															     def triples(self, triple_pattern, context=None):
														
@@ -413,10 +561,23 @@ class LmdbStore(Store):
 
															         @param triple_pattern (tuple) 3 RDFLib terms
														
 
															         @param context (rdflib.Graph | None) Context graph, if available.
														
 
															-        If a graph is given, only its identifier is stored.
														
 
															+
														
 
															+        @return Generator over triples and contexts in which each result has
														
 
															+        the following format:
														
 
															+        > (s, p, o), generator(contexts)
														
 
															+        Where the contexts generator lists all context that the triple appears
														
 
															+        in.
														
 
															         '''
														
 
															-        for tk in self._triple_keys(triple_pattern, context):
														
 
															-            yield self._key_to_triple(tk), context
														
 
															+        #import pdb; pdb.set_trace()
														
 
															+        with self.cur('spo:c') as cur:
														
 
															+            for spok in self._triple_keys(triple_pattern, context):
														
 
															+                if context is not None:
														
 
															+                    yield self._from_key(spok), (context,)
														
 
															+                else:
														
 
															+                    if cur.set_key(spok):
														
 
															+                        contexts = (self._from_key(ck)
														
 
															+                                for ck in cur.iternext_dup())
														
 
															+                        yield self._from_key(spok), contexts
														
 
															     def bind(self, prefix, namespace):
														
@@ -468,14 +629,14 @@ class LmdbStore(Store):
 
															         @return generator:URIRef
														
 
															         '''
														
 
															         if triple:
														
 
															-            with self.cur('tk:c') as cur:
														
 
															+            with self.cur('spo:c') as cur:
														
 
															                 cur.set_key(self._to_key(triple))
														
 
															                 contexts = cur.iternext_dup()
														
 
															         else:
														
 
															-            with self.cur('c:tk') as cur:
														
 
															+            with self.cur('c:spo') as cur:
														
 
															                 contexts = cur.iternext_nodup()
														
 
															-        return (self._unpickle(ctx) for ctx in contexts)
														
 
															+        return (self._from_key(ctx)[0] for ctx in contexts)
														
 
															     def add_graph(self, graph):
														
@@ -486,22 +647,39 @@ class LmdbStore(Store):
 
															         pickled `None` value. This prevents from removing the graph when all
														
 
															         triples are removed.
														
 
															-        This may be called by supposedly read-only operations:
														
 
															+        This may be called by read-only operations:
														
 
															         https://github.com/RDFLib/rdflib/blob/master/rdflib/graph.py#L1623
														
 
															         Therefore it needs to open a write transaction. This is not ideal
														
 
															         but the only way to handle datasets in RDFLib.
														
 
															         @param graph (URIRef) URI of the named graph to add.
														
 
															         '''
														
 
															-        pk_none = self._pickle(None)
														
 
															-        pk_ctx = self._pickle(graph)
														
 
															-        with self.data_env.begin(write=True).cursor(self.dbs['tk:c']) \
														
 
															-                as tk2c_cur:
														
 
															-            tk2c_cur.put(pk_none, pk_ctx)
														
 
															-
														
 
															-        with self.idx_env.begin(write=True)\
														
 
															-                .cursor(self.dbs['c:tk']) as c2tk_cur:
														
 
															-            c2tk_cur.put(pk_ctx, pk_none)
														
 
															+        if isinstance(graph, Graph):
														
 
															+            graph = graph.identifier
														
 
															+        pk_c = self._pickle(graph)
														
 
															+        c_hash = self._hash(pk_c)
														
 
															+        with self.cur('th:t') as cur:
														
 
															+            c_exists = cur.set_key(c_hash)
														
 
															+        if not c_exists:
														
 
															+            # Insert context term if not existing.
														
 
															+            if self.is_txn_rw:
														
 
															+                # Use existing R/W transaction.
														
 
															+                with self.cur('t:st') as cur:
														
 
															+                    ck = self._append(cur, (pk_c,))[0]
														
 
															+                with self.cur('th:t') as cur:
														
 
															+                    cur.put(c_hash, ck)
														
 
															+                with self.cur('c:spo') as cur:
														
 
															+                    cur.put(ck, self.NO_TRIPLE)
														
 
															+            else:
														
 
															+                # Open new R/W transactions.
														
 
															+                with self.data_env.begin(write=True).cursor(self.dbs['t:st']) \
														
 
															+                        as cur:
														
 
															+                    ck = self._append(cur, (pk_c,))[0]
														
 
															+                with self.idx_env.begin(write=True) as wtxn:
														
 
															+                    with wtxn.cursor(self.dbs['th:t']) as cur:
														
 
															+                        cur.put(c_hash, ck)
														
 
															+                    with wtxn.cursor(self.dbs['c:spo']) as cur:
														
 
															+                        cur.put(ck, b'')
														
 
															     def remove_graph(self, graph):
														
@@ -510,16 +688,15 @@ class LmdbStore(Store):
 
															         @param graph (URIRef) URI of the named graph to remove.
														
 
															         '''
														
 
															+        if isinstance(graph, Graph):
														
 
															+            graph = graph.identifier
														
 
															+
														
 
															         self.remove((None, None, None), graph)
														
 
															-        pk_none = self._pickle(None)
														
 
															-        pk_ctx = self._pickle(graph)
														
 
															-        self._enqueue_action('delete', 'tk:c', pk_none, pk_ctx)
														
 
															-        self._idx_queue.append((None, pk_ctx, None))
														
 
															-
														
 
															-        with self.cur('c:tk') as cur:
														
 
															-            if cur.set_key_dup(self._pickle(graph), self._pickle(None)):
														
 
															-                self.curs['tk:c'].delete()
														
 
															+        ck = self._to_key(graph)
														
 
															+        with self.cur('c:spo') as cur:
														
 
															+            if cur.set_key_dup(ck, self.NO_TRIPLE):
														
 
															+                cur.delete()
														
 
															     def commit(self):
														
@@ -529,7 +706,7 @@ class LmdbStore(Store):
 
															         if self.is_txn_open:
														
 
															             self.data_txn.commit()
														
 
															             self.idx_txn.commit()
														
 
															-        self.data_txn = self.idx_txn = self.is_txn_rw = None
														
 
															+        self.data_txn = self.idx_txn = None
														
 
															     def rollback(self):
														
@@ -539,34 +716,28 @@ class LmdbStore(Store):
 
															         if self.is_txn_open:
														
 
															             self.data_txn.abort()
														
 
															             self.idx_txn.abort()
														
 
															-        self.data_txn = self.idx_txn = self.is_txn_rw = None
														
 
															-
														
 
															-
														
 
															-    #def _next_lex_key(self, db=None):
														
 
															-    #    '''
														
 
															-    #    Calculate the next closest byte sequence in lexicographical order.
														
 
															-
														
 
															-    #    This is needed to fill the next available slot after the last one in
														
 
															-    #    LMDB. Keys are byte strings. This is convenient to keep key
														
 
															-    #    lengths as small as possible because they are referenced in several
														
 
															-    #    indices.
														
 
															-    #    '''
														
 
															-    #    with self.env.begin(buffers=True) as txn:
														
 
															-    #        with txn.cursor(db) as cur:
														
 
															-    #            has_entries = cur.last()
														
 
															-    #            if has_entries:
														
 
															-    #                next = bytearray(cur.key())
														
 
															-    #            else:
														
 
															-    #                # First key in db.
														
 
															-    #                return b'\x00'
														
 
															-    #    try:
														
 
															-    #        next[-1] += 1
														
 
															-    #    # If the value exceeds 256, i.e. the current value is the last one,
														
 
															-    #    # append a new \x00 and the next iteration will start incrementing that
														
 
															-    #    except ValueError:
														
 
															-    #        next.append(0)
														
 
															-
														
 
															-    #    return next
														
 
															+        self.data_txn = self.idx_txn = None
														
 
															+
														
 
															+
														
 
															+    def rebase(self, n, start=1):
														
 
															+        '''
														
 
															+        Create a bytearray translating an integer to an arbitrary base.
														
 
															+
														
 
															+        the base is between the `start` value and 255 to fit in one-byte
														
 
															+        chunks.
														
 
															+
														
 
															+        @param n (int) Number to rebase.
														
 
															+        @param start (int) Starting byte. This is useful to leave out "special"
														
 
															+        bytes for purposes such as separators.
														
 
															+
														
 
															+        @return bytearray
														
 
															+        '''
														
 
															+        map = list(range(start, 255))
														
 
															+        base = len(map)
														
 
															+        if n < base:
														
 
															+            return bytearray([map[n]])
														
 
															+        else:
														
 
															+            return self.rebase(n // base, start) + bytearray([map[n % base]])
														
 
															     ## PRIVATE METHODS ##
														
@@ -580,54 +751,50 @@ class LmdbStore(Store):
 
															         keys without incurring in the overhead of converting them to triples.
														
 
															         @param triple_pattern (tuple) 3 RDFLib terms
														
 
															-        @param context (rdflib.Graph | None) Context graph, if available.
														
 
															-        If a graph is given, only its identifier is stored.
														
 
															+        @param context (rdflib.Graph | None) Context graph or URI, or None.
														
 
															         '''
														
 
															         if context == self:
														
 
															             context = None
														
 
															-        if self.DEFAULT_UNION:
														
 
															-            raise NotImplementedError()
														
 
															-            # In theory, this is what should happen:
														
 
															-            #if context == self.DEFAULT_GRAPH_URI
														
 
															-            #    # Any pattern with unbound context
														
 
															-            #    for tk in self._lookup(triple_pattern, tkey):
														
 
															-            #        yield self._key_to_triple(tk)
														
 
															-            #    return
														
 
															-        elif context is None:
														
 
															-            context = self.DEFAULT_GRAPH_URI
														
 
															-
														
 
															-        tkey = self._to_key(triple_pattern)
														
 
															+        if context:
														
 
															+            pk_c = self._pickle(context.identifier) \
														
 
															+                    if isinstance(context, Graph) \
														
 
															+                    else self._pickle(context)
														
 
															+            ck = self._to_key(context)
														
 
															-        # Shortcuts
														
 
															-        pk_ctx = self._pickle(context.identifier) \
														
 
															-                if isinstance(context, Graph) \
														
 
															-                else self._pickle(context)
														
 
															-        if not self.curs['c:tk'].set_key(pk_ctx):
														
 
															-            # Context not found.
														
 
															-            return iter(())
														
 
															-
														
 
															-        # s p o c
														
 
															-        if all(triple_pattern):
														
 
															-            if self.curs['tk:c'].set_key_dup(tkey, pk_ctx):
														
 
															-                yield tkey
														
 
															-                return
														
 
															-            else:
														
 
															-                # Triple not found.
														
 
															+            # Shortcuts
														
 
															+            if not ck:
														
 
															+                # Context not found.
														
 
															                 return iter(())
														
 
															-        # ? ? ? c
														
 
															-        elif not any(triple_pattern):
														
 
															-            # Get all triples from the context
														
 
															-            for tk in self.curs['c:tk'].iternext_dup():
														
 
															-                yield tk
														
 
															+            with self.cur('c:spo') as cur:
														
 
															+                # s p o c
														
 
															+                if all(triple_pattern):
														
 
															+                    spok = self._to_key(triple_pattern)
														
 
															+                    if not spok:
														
 
															+                        # A term in the triple is not found.
														
 
															+                        return iter(())
														
 
															+                    if cur.set_key_dup(ck, spok):
														
 
															+                        yield spok
														
 
															+                        return
														
 
															+                    else:
														
 
															+                        # Triple not found.
														
 
															+                        return iter(())
														
 
															+
														
 
															+                # ? ? ? c
														
 
															+                elif not any(triple_pattern):
														
 
															+                    # Get all triples from the context
														
 
															+                    for spok in cur.iternext_dup():
														
 
															+                        yield spok
														
 
															-        # Regular lookup.
														
 
															+                # Regular lookup.
														
 
															+                else:
														
 
															+                    for spok in self._lookup(triple_pattern):
														
 
															+                        if cur.set_key_dup(ck, spok):
														
 
															+                            yield spok
														
 
															+                    return
														
 
															         else:
														
 
															-            for tk in self._lookup(triple_pattern, tkey):
														
 
															-                if self.curs['c:tk'].set_key_dup(pk_ctx, tk):
														
 
															-                    yield tk
														
 
															-            return
														
 
															+            yield from self._lookup(triple_pattern)
														
 
															     def _init_db_environments(self, path, create=True):
														
@@ -655,16 +822,35 @@ class LmdbStore(Store):
 
															         # Open and optionally create main databases.
														
 
															         self.dbs = {
														
 
															             # Main databases.
														
 
															-            'tk:t': self.data_env.open_db(b'tk:t', create=create),
														
 
															-            'tk:c': self.data_env.open_db(b'tk:c', create=create, dupsort=True),
														
 
															+            't:st': self.data_env.open_db(b't:st', create=create),
														
 
															+            'spo:c': self.data_env.open_db(
														
 
															+                    b'spo:c', create=create, dupsort=True, dupfixed=True),
														
 
															             'pfx:ns': self.data_env.open_db(b'pfx:ns', create=create),
														
 
															-            # Index.
														
 
															+            # One-off indices.
														
 
															             'ns:pfx': self.idx_env.open_db(b'ns:pfx', create=create),
														
 
															+            'th:t': self.idx_env.open_db(b'th:t', create=create),
														
 
															         }
														
 
															         # Other index databases.
														
 
															         for db_key in self.idx_keys:
														
 
															-            self.dbs[db_key] = self.idx_env.open_db(s2b(db_key),
														
 
															-                    dupsort=True, dupfixed=True, create=create)
														
 
															+            if db_key not in ('ns:pfx', 'th:t'):
														
 
															+                self.dbs[db_key] = self.idx_env.open_db(s2b(db_key),
														
 
															+                        dupsort=True, dupfixed=True, create=create)
														
 
															+
														
 
															+
														
 
															+    def _from_key(self, key):
														
 
															+        '''
														
 
															+        Convert a key into one or more terms.
														
 
															+
														
 
															+        @param key (bytes) The key to be converted. It can be a compound one
														
 
															+        in which case the function will return multiple terms.
														
 
															+        '''
														
 
															+        terms = []
														
 
															+        with self.cur('t:st') as cur:
														
 
															+            for k in bytes(key).split(self.SEP_BYTE):
														
 
															+                pk_t = cur.get(k)
														
 
															+                terms.append(self._unpickle(pk_t))
														
 
															+
														
 
															+        return terms
														
 
															     def _to_key(self, obj):
														
@@ -674,217 +860,132 @@ class LmdbStore(Store):
 
															         The key is the checksum of the pickled object, therefore unique for
														
 
															         that object. The hashing algorithm is specified in `KEY_HASH_ALGO`.
														
 
															-        @param obj (Object) Anything that can be pickled. Pairs of terms, as
														
 
															-        well as triples and quads, are expressed as tuples within the scope of
														
 
															-        this application.
														
 
															+        @param obj (Object) Anything that can be reduced to terms stored in the
														
 
															+        database. Pairs of terms, as well as triples and quads, are expressed
														
 
															+        as tuples.
														
 
															+
														
 
															+        If more than one term is provided, the keys are concatenated using the
														
 
															+        designated separator byte (`\x00`).
														
 
															         @return bytes
														
 
															         '''
														
 
															-        return hashlib.new(self.KEY_HASH_ALGO, self._pickle(obj)).digest()
														
 
															-
														
 
															+        if not isinstance(obj, list) and not isinstance(obj, tuple):
														
 
															+            obj = (obj,)
														
 
															+        key = []
														
 
															+        with self.cur('th:t') as cur:
														
 
															+            for term in obj:
														
 
															+                tk = cur.get(self._hash(self._pickle(term)))
														
 
															+                if not tk:
														
 
															+                    # If any of the terms is not found, return None immediately
														
 
															+                    return None
														
 
															+                key.append(tk)
														
 
															-    def _key_to_triple(self, key):
														
 
															-        '''
														
 
															-        Look up for the hash key of a triple and return the triple as a tuple.
														
 
															+        return self.SEP_BYTE.join(key)
														
 
															-        @param key (bytes) Hash key of triple.
														
 
															-        @return Tuple with triple elements or None if key is not found.
														
 
															+    def _hash(self, s):
														
 
															         '''
														
 
															-        pk_trp = self.curs['tk:t'].get(key)
														
 
															-
														
 
															-        return self._unpickle(pk_trp) if pk_trp else None
														
 
															+        Get the hash value of a serialized object.
														
 
															+        '''
														
 
															+        return hashlib.new(self.KEY_HASH_ALGO, s).digest()
														
 
															-    def _lookup(self, triple_pattern, tkey=None):
														
 
															+    def _lookup(self, triple_pattern):
														
 
															         '''
														
 
															-        Look up triples based on a triple pattern.
														
 
															+        Look up triples in the indices based on a triple pattern.
														
 
															         @return iterator of matching triple keys.
														
 
															         '''
														
 
															+        #import pdb; pdb.set_trace()
														
 
															         s, p, o = triple_pattern
														
 
															         if s is not None:
														
 
															             if p is not None:
														
 
															                 # s p o
														
 
															                 if o is not None:
														
 
															-                    if self.curs['tk:t'].set_key(tkey):
														
 
															-                        yield tkey
														
 
															-                        return
														
 
															-                    else:
														
 
															-                        return iter(())
														
 
															+                    with self.cur('spo:c') as cur:
														
 
															+                        tkey = self._to_key(triple_pattern)
														
 
															+                        if cur.set_key(tkey):
														
 
															+                            yield tkey
														
 
															+                            return
														
 
															+                        else:
														
 
															+                            return iter(())
														
 
															                 # s p ?
														
 
															                 else:
														
 
															-                    cur = self.curs['spk:tk']
														
 
															-                    term = self._pickle((s, p))
														
 
															+                    bound_terms = [s, p]
														
 
															+                    cur_label = 'sp:o'
														
 
															+                    order = (0, 1, 2)
														
 
															             else:
														
 
															                 # s ? o
														
 
															                 if o is not None:
														
 
															-                    cur = self.curs['sok:tk']
														
 
															-                    term = self._pickle((s, o))
														
 
															+                    bound_terms = [s, o]
														
 
															+                    cur_label = 'so:p'
														
 
															+                    order = (0, 2, 1)
														
 
															                 # s ? ?
														
 
															                 else:
														
 
															-                    cur = self.curs['sk:tk']
														
 
															-                    term = self._pickle(s)
														
 
															+                    bound_terms = [s]
														
 
															+                    cur_label = 's:po'
														
 
															+                    order = (0, 1, 2)
														
 
															         else:
														
 
															             if p is not None:
														
 
															                 # ? p o
														
 
															                 if o is not None:
														
 
															-                    cur = self.curs['pok:tk']
														
 
															-                    term = self._pickle((p, o))
														
 
															+                    bound_terms = [p, o]
														
 
															+                    cur_label = 'po:s'
														
 
															+                    order = (2, 0, 1)
														
 
															                 # ? p ?
														
 
															                 else:
														
 
															-                    cur = self.curs['pk:tk']
														
 
															-                    term = self._pickle(p)
														
 
															+                    bound_terms = [p]
														
 
															+                    cur_label = 'p:so'
														
 
															+                    order = (1, 0, 2)
														
 
															             else:
														
 
															-                # ? ? o
														
 
															+                # ? ? or
														
 
															                 if o is not None:
														
 
															-                    cur = self.curs['ok:tk']
														
 
															-                    term = self._pickle(o)
														
 
															+                    bound_terms = [o]
														
 
															+                    cur_label = 'o:sp'
														
 
															+                    order = (1, 2, 0)
														
 
															                 # ? ? ?
														
 
															                 else:
														
 
															-                    # Get all triples in the database
														
 
															-                    for c in self.curs['tk:t'].iternext(values=False):
														
 
															-                        yield c
														
 
															+                    # Get all triples in the database.
														
 
															+                    with self.cur('spo:c') as cur:
														
 
															+                        yield from cur.iternext_nodup()
														
 
															                     return
														
 
															-        key = hashlib.new(self.KEY_HASH_ALGO, term).digest()
														
 
															-        if cur.set_key(key):
														
 
															-            for match in cur.iternext_dup():
														
 
															-                yield match
														
 
															-        else:
														
 
															-            return iter(())
														
 
															-
														
 
															-
														
 
															-    def _enqueue_action(self, action, db, k, v):
														
 
															-        '''
														
 
															-        Enqueue an action to be performed in a write transaction.
														
 
															+        tkey = self._to_key(bound_terms)
														
 
															+        with self.cur(cur_label) as cur:
														
 
															+            #import pdb; pdb.set_trace()
														
 
															+            if cur.set_key(tkey):
														
 
															+                for match in cur.iternext_dup():
														
 
															+                    # Combine bound and found in search order.
														
 
															+                    comb_keys = (
														
 
															+                            bytes(tkey).split(self.SEP_BYTE)
														
 
															+                            + bytes(match).split(self.SEP_BYTE))
														
 
															+                    # Rearrange term keys according to given order.
														
 
															+                    yield self.SEP_BYTE.join([comb_keys[i] for i in order])
														
 
															+            else:
														
 
															+                return iter(())
														
 
															-        Actions are accumulated sequentially and then executed once the
														
 
															-        `_run_update` method is called. This is usually done by the
														
 
															-        TxnManager class.
														
 
															-        @param action (string) One of 'put', 'putmulti' or 'delete'.
														
 
															-        @param db (string) Label of the database to perform the action.
														
 
															-        @param k (bytes) Key to update.
														
 
															-        @param v (bytes) Value to insert or delete.
														
 
															+    def _append(self, cur, values, **kwargs):
														
 
															         '''
														
 
															-        if not action in ('put', 'putmulti', 'delete'):
														
 
															-            raise NameError('No action with name {}.'.format(action))
														
 
															-
														
 
															-        self._data_queue.append((action, db, k, v))
														
 
															+        Append one or more values to the end of a database.
														
 
															+        @param cur (lmdb.Cursor) The write cursor to act on.
														
 
															+        @param data (list(bytes)) Value(s) to append.
														
 
															-    def _apply_changes(self):
														
 
															-        '''
														
 
															-        Apply changes in `_data_queue`.
														
 
															+        @return list(bytes) Last key(s) inserted.
														
 
															         '''
														
 
															-        with ExitStack() as stack:
														
 
															-            data_txn = stack.enter_context(
														
 
															-                    self.data_env.begin(write=True, buffers=True))
														
 
															-            logger.info('Beginning data insert. Data write lock acquired.')
														
 
															+        if not isinstance(values, list) and not isinstance(values, tuple):
														
 
															+            raise ValueError('Input must be a list or tuple.')
														
 
															+        data = []
														
 
															+        lastkey = cur.key() if cur.last() else None
														
 
															+        for v in values:
														
 
															+            lastkey = self._key_seq.next(lastkey)
														
 
															+            data.append((lastkey, v))
														
 
															-            curs = {
														
 
															-                task[1]: stack.enter_context(
														
 
															-                        data_txn.cursor(self.dbs[task[1]]))
														
 
															-                for task in self._data_queue
														
 
															-            }
														
 
															-            #logger.debug('Data queue: {}'.format(self._data_queue))
														
 
															-            #import pdb; pdb.set_trace()
														
 
															-            logger.debug('Data queue: {} triples.'.format(len(self._data_queue)))
														
 
															-            while len(self._data_queue):
														
 
															-                action, db, k, v = self._data_queue.pop()
														
 
															-                if action == 'put':
														
 
															-                    curs[db].put(k, v)
														
 
															-                elif action == 'putmulti':
														
 
															-                    # With 'putmulti', `k` is a series of 2-tuples and `v` is
														
 
															-                    # ignored.
														
 
															-                    data = k
														
 
															-                    curs[db].putmulti(data)
														
 
															-                elif action == 'delete':
														
 
															-                    if v is None:
														
 
															-                        # Delete all values for the key.
														
 
															-                        if curs[db].set_key(k):
														
 
															-                            curs[db].delete(dupdata=True)
														
 
															-                    else:
														
 
															-                        # Delete only a specific k:v pair.
														
 
															-                        if curs[db].set_key_dup(k, v):
														
 
															-                            curs[db].delete(dupdata=False)
														
 
															-                else:
														
 
															-                    raise ValueError(
														
 
															-                        'Action type \'{}\' is not supported.' .format(action))
														
 
															-        logger.info('Data insert completed. Data write lock released.')
														
 
															-
														
 
															-
														
 
															-    def _run_indexing(self):
														
 
															-        '''
														
 
															-        Update indices for a given triple.
														
 
															-
														
 
															-        If the triple is found, add indices. if it is not found, delete them.
														
 
															-        This method is run asynchronously and may outlive the HTTP request.
														
 
															-
														
 
															-        @param key (bytes) Unique key associated with the triple.
														
 
															-        @param pk_ctx (bytes) Pickled context term.
														
 
															-        @param triple (tuple: rdflib.Identifier) Tuple of 3 RDFLib terms.
														
 
															-        This can be provided if already pre-calculated, otherwise it will be
														
 
															-        retrieved from the store using `trp_key`.
														
 
															-        '''
														
 
															-        with ExitStack() as stack:
														
 
															-            data_txn = stack.enter_context(self.data_env.begin(buffers=True))
														
 
															-            idx_txn = stack.enter_context(
														
 
															-                    self.idx_env.begin(write=True, buffers=True))
														
 
															-            logger.info('Index started. Index write lock acquired.')
														
 
															-            data_curs = self.get_data_cursors(data_txn)
														
 
															-            idx_curs = self.get_idx_cursors(idx_txn)
														
 
															-
														
 
															-            lock = Lock()
														
 
															-            #logger.debug('Index queue: {}'.format(self._idx_queue))
														
 
															-            logger.debug('Index queue: {}'.format(len(self._idx_queue)))
														
 
															-            while len(self._idx_queue):
														
 
															-                lock.acquire()
														
 
															-                trp_key, pk_ctx, triple = self._idx_queue.pop()
														
 
															-
														
 
															-                if trp_key is None and triple is None:
														
 
															-                    # This is when a graph is deleted.
														
 
															-                    if not data_curs['tk:c'].set_key(pk_ctx):
														
 
															-                        pk_none = self._pickle(None)
														
 
															-                        if idx_curs['c:tk'].set_key_dup(pk_none, pk_ctx):
														
 
															-                            idx_curs['c:tk'].delete()
														
 
															-                    lock.release()
														
 
															-                    continue
														
 
															-
														
 
															-                if triple is None:
														
 
															-                    triple = self._key_to_triple(trp_key)
														
 
															-
														
 
															-                s, p, o = triple
														
 
															-                term_keys = {
														
 
															-                    'sk:tk': self._to_key(s),
														
 
															-                    'pk:tk': self._to_key(p),
														
 
															-                    'ok:tk': self._to_key(o),
														
 
															-                    'spk:tk': self._to_key((s, p)),
														
 
															-                    'sok:tk': self._to_key((s, o)),
														
 
															-                    'pok:tk': self._to_key((p, o)),
														
 
															-                }
														
 
															-
														
 
															-                if data_curs['tk:t'].get(trp_key):
														
 
															-                    # Add to index.
														
 
															-                    for ikey in term_keys:
														
 
															-                        idx_curs[ikey].put(term_keys[ikey], trp_key)
														
 
															-                else:
														
 
															-                    # Delete from index if a match is found.
														
 
															-                    for ikey in term_keys:
														
 
															-                        if idx_curs[ikey].set_key_dup(
														
 
															-                                term_keys[ikey], trp_key):
														
 
															-                            idx_curs[ikey].delete()
														
 
															-
														
 
															-                # Add or remove context association index.
														
 
															-                if data_curs['tk:c'].set_key_dup(trp_key, pk_ctx):
														
 
															-                    idx_curs['c:tk'].put(pk_ctx, trp_key)
														
 
															-                elif idx_curs['c:tk'].set_key_dup(pk_ctx, trp_key):
														
 
															-                    idx_curs['c:tk'].delete()
														
 
															-                lock.release()
														
 
															+        cur.putmulti(data, **kwargs)
														
 
															-        logger.info('Index completed. Index write lock released.')
														
 
															+        return [d[0] for d in data]
														
 
															     ## Convenience methods—not necessary for functioning but useful for
														
@@ -898,12 +999,12 @@ class LmdbStore(Store):
 
															         @return Iterator:tuple Generator of triples.
														
 
															         '''
														
 
															-        cur = self.curs['c:tk']
														
 
															-        if cur.set_key(pk_ctx):
														
 
															-            tkeys = cur.iternext_dup()
														
 
															-            return {self._key_to_triple(tk) for tk in tkeys}
														
 
															-        else:
														
 
															-            return set()
														
 
															+        with self.cur('c:tk') as cur:
														
 
															+            if cur.set_key(pk_ctx):
														
 
															+                tkeys = cur.iternext_dup()
														
 
															+                return {self._key_to_triple(tk) for tk in tkeys}
														
 
															+            else:
														
 
															+                return set()
														
 
															     def _ctx_for_key(self, tkey):
														
@@ -914,9 +1015,9 @@ class LmdbStore(Store):
 
															         @return Iterator:URIRef Generator of context URIs.
														
 
															         '''
														
 
															-        cur = self.curs['tk:c']
														
 
															-        if cur.set_key(tkey):
														
 
															-            ctx = cur.iternext_dup()
														
 
															-            return {self._unpickle(c) for c in ctx}
														
 
															-        else:
														
 
															-            return set()
														
 
															+        with self.cur('tk:c') as cur:
														
 
															+            if cur.set_key(tkey):
														
 
															+                ctx = cur.iternext_dup()
														
 
															+                return {self._unpickle(c) for c in ctx}
														
 
															+            else:
														
 
															+                return set()