瀏覽代碼

Initial work on RDF checksums.

Stefano Cossu 6 年之前
父節點
當前提交
642724288e

+ 51 - 0
lakesuperior/model/ldpr.py

@@ -3,12 +3,14 @@ import re
 
 
 from abc import ABCMeta
 from abc import ABCMeta
 from collections import defaultdict
 from collections import defaultdict
+from threading import Thread
 from urllib.parse import urldefrag
 from urllib.parse import urldefrag
 from uuid import uuid4
 from uuid import uuid4
 
 
 import arrow
 import arrow
 
 
 from rdflib import Graph, URIRef, Literal
 from rdflib import Graph, URIRef, Literal
+from rdflib.compare import to_isomorphic
 from rdflib.namespace import RDF
 from rdflib.namespace import RDF
 
 
 from lakesuperior import env, thread_env
 from lakesuperior import env, thread_env
@@ -275,6 +277,42 @@ class Ldpr(metaclass=ABCMeta):
         return out_gr
         return out_gr
 
 
 
 
+    @property
+    def canonical_graph(self):
+        """
+        "Canonical" representation of a resource.
+
+        TODO: There is no agreement yet on what a "canonical" representation
+        of an LDP resource should be. This is a PoC method that assumes such
+        representation to include all triples that would be retrieved with a
+        GET request to the resource, including the ones with a different
+        subject than the resource URI.
+
+        :rtype: rdflib.compare.IsomorphicGraph
+        """
+        # First verify that the instance IMR options correspond to the
+        # "canonical" representation.
+        if (
+                self.imr_options.get('incl_srv_mgd')
+                and not self.imr_options.get('incl_inbound')
+                and imr_options.get('incl_children')):
+            gr = self.imr
+        else:
+            gr = rdfly.get_imr(
+                    self.uid, incl_inbound=False, incl_children=True)
+        return to_isomorphic(gr)
+
+
+    @property
+    def digest(self):
+        """
+        Cryptographic digest of a resource.
+
+        :rtype: str
+        """
+        return self.canonical_graph.graph_digest()
+
+
     @property
     @property
     def version_info(self):
     def version_info(self):
         """
         """
@@ -706,9 +744,14 @@ class Ldpr(metaclass=ABCMeta):
             delete) or None. In the latter case, no notification is sent.
             delete) or None. In the latter case, no notification is sent.
         :type ev_type: str or None
         :type ev_type: str or None
         :param set remove_trp: Triples to be removed.
         :param set remove_trp: Triples to be removed.
+            # Add metadata.
         :param set add_trp: Triples to be added.
         :param set add_trp: Triples to be added.
         """
         """
         rdfly.modify_rsrc(self.uid, remove_trp, add_trp)
         rdfly.modify_rsrc(self.uid, remove_trp, add_trp)
+
+        # Calculate checksum (asynchronously).
+        Thread(target=self._update_checksum).run()
+
         # Clear IMR buffer.
         # Clear IMR buffer.
         if hasattr(self, '_imr'):
         if hasattr(self, '_imr'):
             delattr(self, '_imr')
             delattr(self, '_imr')
@@ -724,6 +767,14 @@ class Ldpr(metaclass=ABCMeta):
             self._enqueue_msg(ev_type, remove_trp, add_trp)
             self._enqueue_msg(ev_type, remove_trp, add_trp)
 
 
 
 
+    def _update_checksum(self):
+        """
+        Save the resource checksum in a dedicated metadata store.
+        """
+        pass
+
+
+
     def _enqueue_msg(self, ev_type, remove_trp=None, add_trp=None):
     def _enqueue_msg(self, ev_type, remove_trp=None, add_trp=None):
         """
         """
         Compose a message about a resource change.
         Compose a message about a resource change.

+ 49 - 0
lakesuperior/store/ldp_rs/metadata_store.py

@@ -0,0 +1,49 @@
+import hashlib
+
+import lmdb
+
+from lakesuperior import env
+
+
+
+class MetadataStore:
+    """
+    LMDB store for RDF metadata.
+
+    Note that even though this store connector uses LMDB as the
+    :py::class:`LmdbStore` class, it is separate because it is not part of the
+    RDFLib store implementation and carries higher-level concepts such as LDP
+    resource URIs.
+    """
+
+    db_labels = (
+        'checksums',
+    )
+    """
+    At the moment only ``checksums`` is implemented. It is a registry of
+    LDP resource graphs, indicated in the key by their UID, and their
+    cryptographic hashes.
+    """
+
+    def __init__(self, create=True):
+        """
+        Initialize DBs.
+        """
+        path = env.app_globals.config['ldp_rs']['location']
+        if not exists(path) and create is True:
+            makedirs(path)
+
+        if getattr(env, 'wsgi_options', False):
+            self._workers = env.wsgi_options['workers']
+        else:
+            self._workers = 1
+        logger.info('Max LMDB readers: {}'.format(self._workers))
+
+        self.data_env = lmdb.open(
+                path + '/metadata', subdir=False, create=create,
+                map_size=1024 ** 3 * 10, max_dbs=len(self.dbs),
+                max_spare_txns=self._workers)
+
+        self.dbs = {
+                label: self.env.open_db(label.encode('ascii'), create=create)
+                for label in db_labels}

+ 1 - 1
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -251,7 +251,7 @@ class RsrcCentricLayout:
 
 
     def get_imr(
     def get_imr(
                 self, uid, ver_uid=None, strict=True, incl_inbound=False,
                 self, uid, ver_uid=None, strict=True, incl_inbound=False,
-                incl_children=True, embed_children=False, **kwargs):
+                incl_children=True, **kwargs):
         """
         """
         See base_rdf_layout.get_imr.
         See base_rdf_layout.get_imr.
         """
         """