Просмотр исходного кода

Add and pass tests for checksum store; expose digest & ETag in LDP API.

Stefano Cossu 6 лет назад
Родитель
Сommit
09bda5b1e3

+ 29 - 14
lakesuperior/endpoints/ldp.py

@@ -234,7 +234,7 @@ def post_resource(parent_uid):
 
     Add a new resource in a new URI.
     """
-    out_headers = std_headers
+    rsp_headers = std_headers
     try:
         slug = request.headers['Slug']
         logger.debug('Slug: {}'.format(slug))
@@ -274,9 +274,10 @@ def post_resource(parent_uid):
         hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="{0}"'\
                 .format(uri)
 
-    out_headers.update(hdr)
+    rsp_headers.update(hdr)
+    rsp_headers.update(_digest_headers(nsc['fcres'][uid]))
 
-    return uri, 201, out_headers
+    return uri, 201, rsp_headers
 
 
 @ldp.route('/<path:uid>', methods=['PUT'], strict_slashes=False)
@@ -329,8 +330,7 @@ def put_resource(uid):
     else:
         rsp_code = 204
         rsp_body = ''
-    rsp_headers['Digest'] = 'SHA256={}'.format(
-            MetadataStore().get_checksum(nsc['fcres'][uid]).decode())
+    rsp_headers.update(_digest_headers(nsc['fcres'][uid]))
 
     return rsp_body, rsp_code, rsp_headers
 
@@ -363,8 +363,6 @@ def patch_resource(uid, is_metadata=False):
         return str(e), 415
     else:
         rsp_headers.update(_headers_from_metadata(rsrc))
-        rsp_headers['Digest'] = 'SHA256={}'.format(
-                MetadataStore().get_checksum(rsrc.uid).decode())
         return '', 204, rsp_headers
 
 
@@ -631,7 +629,7 @@ def _headers_from_metadata(rsrc, out_fmt='text/turtle'):
     :param lakesuperior.model.ldpr.Ldpr rsrc: Resource to extract metadata
         from.
     """
-    out_headers = defaultdict(list)
+    rsp_headers = defaultdict(list)
 
     digest = rsrc.metadata.value(rsrc.uri, nsc['premis'].hasMessageDigest)
     # Only add ETag and digest if output is not RDF.
@@ -644,21 +642,38 @@ def _headers_from_metadata(rsrc, out_fmt='text/turtle'):
                 'W/"{}"'.format(cksum_hex)
                 if nsc['ldp'].RDFSource in rsrc.ldp_types
                 else cksum_hex)
-        out_headers['ETag'] = etag_str,
-        out_headers['Digest'] = '{}={}'.format(
+        rsp_headers['ETag'] = etag_str,
+        rsp_headers['Digest'] = '{}={}'.format(
                 digest_algo.upper(), b64encode(cksum).decode('ascii'))
+    else:
+        rsp_headers.update(_digest_headers(rsrc.uri))
+
 
     last_updated_term = rsrc.metadata.value(nsc['fcrepo'].lastModified)
     if last_updated_term:
-        out_headers['Last-Modified'] = arrow.get(last_updated_term)\
+        rsp_headers['Last-Modified'] = arrow.get(last_updated_term)\
             .format('ddd, D MMM YYYY HH:mm:ss Z')
 
     for t in rsrc.ldp_types:
-        out_headers['Link'].append('{};rel="type"'.format(t.n3()))
+        rsp_headers['Link'].append('{};rel="type"'.format(t.n3()))
 
     mimetype = rsrc.metadata.value(nsc['ebucore'].hasMimeType)
     if mimetype:
-        out_headers['Content-Type'] = mimetype
+        rsp_headers['Content-Type'] = mimetype
+
+    return rsp_headers
+
+
+def _digest_headers(uri):
+    """
+    Get an LDP-RS resource digest and create header tags.
+
+    The ``Digest`` and ``ETag`` headers are created.
+    """
+    headers = {}
+    digest = MetadataStore().get_checksum(uri).hex()
+    headers['Digest'] = 'SHA256={}'.format(digest)
+    headers['ETag'] = 'W/{}'.format(digest)
 
-    return out_headers
+    return headers
 

+ 10 - 10
lakesuperior/model/ldpr.py

@@ -3,6 +3,7 @@ import re
 
 from abc import ABCMeta
 from collections import defaultdict
+from hashlib import sha256
 from threading import Thread
 from urllib.parse import urldefrag
 from uuid import uuid4
@@ -308,19 +309,18 @@ class Ldpr(metaclass=ABCMeta):
     @property
     def rsrc_digest(self):
         """
-        Cryptographic digest of a resource.
+        Cryptographic digest (SHA256) of a resource.
 
-        :rtype: str
+        :rtype: bytes
         """
         # This RDFLib function seems to be based on an in-depth study of the
-        # topic of graph checksums; however the output is puzzling because it
-        # returns **65** hexadecimal characters, which are one too many to be
-        # a SHA256 and an odd number that cannot be converted to bytes.
-        # Therefore the string version is being converted to bytes for
-        # storage. See https://github.com/RDFLib/rdflib/issues/825
-        digest = self.canonical_graph.graph_digest()
-
-        return format(digest, 'x').encode('ascii')
+        # topic of graph checksums; however the output is odd because it
+        # returns an arbitrarily long int that cannot be converted to bytes.
+        # The output is being converted to a proper # SHA256 checksum. This is
+        # a temporary fix. See https://github.com/RDFLib/rdflib/issues/825
+        checksum = self.canonical_graph.graph_digest()
+
+        return sha256(str(checksum).encode('ascii')).digest()
 
 
     @property

+ 13 - 0
lakesuperior/store/base_lmdb_store.py

@@ -20,6 +20,19 @@ class BaseLmdbStore(metaclass=ABCMeta):
     This interface can be subclassed for specific storage back ends. It is
     *not* used for :py:class:`~lakesuperior.store.ldp_rs.lmdb_store.LmdbStore`
     which has a more complex lifecycle and setup.
+
+    Example usage::
+
+        >>> class MyStore(BaseLmdbStore):
+        ...     path = '/base/store/path'
+        ...     db_labels = ('db1', 'db2')
+        ...
+        >>> ms = MyStore()
+        >>> # "with" wraps the operation in a transaction.
+        >>> with ms.cur(index='db1', write=True):
+        ...     cur.put(b'key1', b'val1')
+        True
+
     """
 
     path = None

+ 10 - 0
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -1,6 +1,7 @@
 import logging
 
 from collections import defaultdict
+from hashlib import sha256
 from itertools import chain
 from os import path
 from string import Template
@@ -9,6 +10,7 @@ from urllib.parse import urldefrag
 import arrow
 
 from rdflib import Dataset, Graph, Literal, URIRef, plugin
+from rdflib.compare import to_isomorphic
 from rdflib.namespace import RDF
 from rdflib.query import ResultException
 from rdflib.resource import Resource
@@ -19,6 +21,7 @@ from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
         srv_mgd_predicates, srv_mgd_types
+from lakesuperior.globals import ROOT_RSRC_URI
 from lakesuperior.exceptions import (InvalidResourceError,
         ResourceNotExistsError, TombstoneError, PathSegmentError)
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
@@ -197,6 +200,8 @@ class RsrcCentricLayout:
         """
         Delete all graphs and insert the basic triples.
         """
+        from lakesuperior.store.ldp_rs.metadata_store import MetadataStore
+
         logger.info('Deleting all data from the graph store.')
         store = self.ds.store
         if getattr(store, 'is_txn_open', False):
@@ -211,6 +216,11 @@ class RsrcCentricLayout:
             with open(fname, 'r') as f:
                 data = Template(f.read())
                 self.ds.update(data.substitute(timestamp=arrow.utcnow()))
+            gr = self.get_imr('/', incl_inbound=False, incl_children=True)
+
+        checksum = to_isomorphic(gr).graph_digest()
+        digest = sha256(str(checksum).encode('ascii')).digest()
+        MetadataStore().update_checksum(ROOT_RSRC_URI, digest)
 
 
     def get_raw(self, uri, ctx=None):

+ 55 - 0
tests/endpoints/test_ldp.py

@@ -874,6 +874,61 @@ class TestPrefHeader:
 
 
 
+@pytest.mark.usefixtures('client_class')
+@pytest.mark.usefixtures('db')
+class TestDigest:
+    """
+    Test digest and ETag handling.
+    """
+    def test_digest_post(self):
+        """
+        Test ``Digest`` and ``ETag`` headers on resource POST.
+        """
+        resp = self.client.post('/ldp/')
+        assert 'Digest' in resp.headers
+        assert 'ETag' in resp.headers
+        assert (
+                resp.headers['ETag'].replace('W/', '') ==
+                resp.headers['Digest'].replace('SHA256=', ''))
+
+
+    def test_digest_put(self):
+        """
+        Test ``Digest`` and ``ETag`` headers on resource PUT.
+        """
+        resp_put = self.client.put('/ldp/test_digest_put')
+        assert 'Digest' in resp_put.headers
+        assert 'ETag' in resp_put.headers
+        assert (
+                resp_put.headers['ETag'].replace('W/', '') ==
+                resp_put.headers['Digest'].replace('SHA256=', ''))
+
+        resp_get = self.client.get('/ldp/test_digest_put')
+        assert 'Digest' in resp_get.headers
+        assert 'ETag' in resp_get.headers
+        assert (
+                resp_get.headers['ETag'].replace('W/', '') ==
+                resp_get.headers['Digest'].replace('SHA256=', ''))
+
+
+    def test_digest_patch(self):
+        """
+        Verify that the digest and ETag change on resource change.
+        """
+        path = '/ldp/test_digest_patch'
+        self.client.put(path)
+        rsp1 = self.client.get(path)
+
+        self.client.patch(
+                path, data=b'DELETE {} INSERT {<> a <http://ex.org/Test> .} '
+                b'WHERE {}',
+                headers={'Content-Type': 'application/sparql-update'})
+        rsp2 = self.client.get(path)
+
+        assert rsp1.headers['ETag'] != rsp2.headers['ETag']
+        assert rsp1.headers['Digest'] != rsp2.headers['Digest']
+
+
 @pytest.mark.usefixtures('client_class')
 @pytest.mark.usefixtures('db')
 class TestVersion: