Browse Source

Initial work on RDF checksums.

Stefano Cossu 6 years ago
parent
commit
642724288e

+ 51 - 0
lakesuperior/model/ldpr.py

@@ -3,12 +3,14 @@ import re
 
 from abc import ABCMeta
 from collections import defaultdict
+from threading import Thread
 from urllib.parse import urldefrag
 from uuid import uuid4
 
 import arrow
 
 from rdflib import Graph, URIRef, Literal
+from rdflib.compare import to_isomorphic
 from rdflib.namespace import RDF
 
 from lakesuperior import env, thread_env
@@ -275,6 +277,42 @@ class Ldpr(metaclass=ABCMeta):
         return out_gr
 
 
+    @property
+    def canonical_graph(self):
+        """
+        "Canonical" representation of a resource.
+
+        TODO: There is no agreement yet on what a "canonical" representation
+        of an LDP resource should be. This is a PoC method that assumes such
+        representation to include all triples that would be retrieved with a
+        GET request to the resource, including the ones with a different
+        subject than the resource URI.
+
+        :rtype: rdflib.compare.IsomorphicGraph
+        """
+        # First verify that the instance IMR options correspond to the
+        # "canonical" representation.
+        if (
+                self.imr_options.get('incl_srv_mgd')
+                and not self.imr_options.get('incl_inbound')
+                and imr_options.get('incl_children')):
+            gr = self.imr
+        else:
+            gr = rdfly.get_imr(
+                    self.uid, incl_inbound=False, incl_children=True)
+        return to_isomorphic(gr)
+
+
+    @property
+    def digest(self):
+        """
+        Cryptographic digest of a resource.
+
+        :rtype: str
+        """
+        return self.canonical_graph.graph_digest()
+
+
     @property
     def version_info(self):
         """
@@ -706,9 +744,14 @@ class Ldpr(metaclass=ABCMeta):
             delete) or None. In the latter case, no notification is sent.
         :type ev_type: str or None
         :param set remove_trp: Triples to be removed.
+            # Add metadata.
         :param set add_trp: Triples to be added.
         """
         rdfly.modify_rsrc(self.uid, remove_trp, add_trp)
+
+        # Calculate checksum (asynchronously).
+        Thread(target=self._update_checksum).run()
+
         # Clear IMR buffer.
         if hasattr(self, '_imr'):
             delattr(self, '_imr')
@@ -724,6 +767,14 @@ class Ldpr(metaclass=ABCMeta):
             self._enqueue_msg(ev_type, remove_trp, add_trp)
 
 
+    def _update_checksum(self):
+        """
+        Save the resource checksum in a dedicated metadata store.
+        """
+        pass
+
+
+
     def _enqueue_msg(self, ev_type, remove_trp=None, add_trp=None):
         """
         Compose a message about a resource change.

+ 49 - 0
lakesuperior/store/ldp_rs/metadata_store.py

@@ -0,0 +1,49 @@
+import hashlib
+
+import lmdb
+
+from lakesuperior import env
+
+
+
+class MetadataStore:
+    """
+    LMDB store for RDF metadata.
+
+    Note that even though this store connector uses LMDB as the
+    :py::class:`LmdbStore` class, it is separate because it is not part of the
+    RDFLib store implementation and carries higher-level concepts such as LDP
+    resource URIs.
+    """
+
+    db_labels = (
+        'checksums',
+    )
+    """
+    At the moment only ``checksums`` is implemented. It is a registry of
+    LDP resource graphs, indicated in the key by their UID, and their
+    cryptographic hashes.
+    """
+
+    def __init__(self, create=True):
+        """
+        Initialize DBs.
+        """
+        path = env.app_globals.config['ldp_rs']['location']
+        if not exists(path) and create is True:
+            makedirs(path)
+
+        if getattr(env, 'wsgi_options', False):
+            self._workers = env.wsgi_options['workers']
+        else:
+            self._workers = 1
+        logger.info('Max LMDB readers: {}'.format(self._workers))
+
+        self.data_env = lmdb.open(
+                path + '/metadata', subdir=False, create=create,
+                map_size=1024 ** 3 * 10, max_dbs=len(self.dbs),
+                max_spare_txns=self._workers)
+
+        self.dbs = {
+                label: self.env.open_db(label.encode('ascii'), create=create)
+                for label in db_labels}

+ 1 - 1
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -251,7 +251,7 @@ class RsrcCentricLayout:
 
     def get_imr(
                 self, uid, ver_uid=None, strict=True, incl_inbound=False,
-                incl_children=True, embed_children=False, **kwargs):
+                incl_children=True, **kwargs):
         """
         See base_rdf_layout.get_imr.
         """