瀏覽代碼

Remove ETag from headers if output is RDF; add digest header.

Stefano Cossu 6 年之前
父節點
當前提交
d643e82cad
共有 3 個文件被更改,包括 16 次插入45 次删除
  1. 16 13
      lakesuperior/endpoints/ldp.py
  2. 0 6
      lakesuperior/model/ldpr.py
  3. 0 26
      lakesuperior/toolbox.py

+ 16 - 13
lakesuperior/endpoints/ldp.py

@@ -1,6 +1,7 @@
 import logging
 import pdb
 
+from base64 import b64encode
 from collections import defaultdict
 from io import BytesIO
 from pprint import pformat
@@ -159,7 +160,7 @@ def get_resource(uid, out_fmt=None):
                     'rdf'
                     if isinstance(rsrc, LdpRs) or rdf_mimetype is not None
                     else 'non_rdf')
-        out_headers.update(_headers_from_metadata(rsrc))
+        out_headers.update(_headers_from_metadata(rsrc, out_fmt))
         uri = g.tbox.uid_to_uri(uid)
         if out_fmt == 'rdf':
             if locals().get('rdf_mimetype', None) is None:
@@ -176,12 +177,9 @@ def get_resource(uid, out_fmt=None):
             rsp = make_response(send_file(
                     rsrc.local_path, as_attachment=True,
                     attachment_filename=rsrc.filename,
-                    mimetype=rsrc.mimetype))
-            logger.debug('Out headers: {}'.format(out_headers))
+                    mimetype=rsrc.mimetype), 200, out_headers)
             rsp.headers.add('Link',
                     '<{}/fcr:metadata>; rel="describedby"'.format(uri))
-            for link in out_headers['Link']:
-                rsp.headers.add('Link', link)
             return rsp
 
 
@@ -620,7 +618,7 @@ def parse_repr_options(retr_opts):
     return imr_options
 
 
-def _headers_from_metadata(rsrc):
+def _headers_from_metadata(rsrc, out_fmt):
     """
     Create a dict of headers from a metadata graph.
 
@@ -629,14 +627,20 @@ def _headers_from_metadata(rsrc):
     """
     out_headers = defaultdict(list)
 
-    digest = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
-    if digest:
-        etag = digest.identifier.split(':')[-1]
+    digest = rsrc.metadata.value(rsrc.uri, nsc['premis'].hasMessageDigest)
+    # Only add ETag and digest if output is not RDF.
+    if digest and out_fmt == 'non_rdf':
+        digest_components = digest.split(':')
+        cksum_hex = digest_components[-1]
+        cksum = bytearray.fromhex(cksum_hex)
+        digest_algo = digest_components[-2]
         etag_str = (
-                'W/"{}"'.format(etag)
+                'W/"{}"'.format(cksum_hex)
                 if nsc['ldp'].RDFSource in rsrc.ldp_types
-                else etag)
+                else cksum_hex)
         out_headers['ETag'] = etag_str,
+        out_headers['Digest'] = '{}={}'.format(
+                digest_algo.upper(), b64encode(cksum).decode('ascii'))
 
     last_updated_term = rsrc.metadata.value(nsc['fcrepo'].lastModified)
     if last_updated_term:
@@ -644,8 +648,7 @@ def _headers_from_metadata(rsrc):
             .format('ddd, D MMM YYYY HH:mm:ss Z')
 
     for t in rsrc.ldp_types:
-        out_headers['Link'].append(
-                '{};rel="type"'.format(t.n3()))
+        out_headers['Link'].append('{};rel="type"'.format(t.n3()))
 
     mimetype = rsrc.metadata.value(nsc['ebucore'].hasMimeType)
     if mimetype:

+ 0 - 6
lakesuperior/model/ldpr.py

@@ -787,12 +787,6 @@ class Ldpr(metaclass=ABCMeta):
         for t in self.base_types:
             self.provided_imr.add((self.uri, RDF.type, t))
 
-        # Message digest.
-        cksum = self.tbox.rdf_cksum(self.provided_imr)
-        self.provided_imr.set((
-            self.uri, nsc['premis'].hasMessageDigest,
-            URIRef('urn:sha1:{}'.format(cksum))))
-
         # Create and modify timestamp.
         if create:
             self.provided_imr.set((

+ 0 - 26
lakesuperior/toolbox.py

@@ -255,32 +255,6 @@ class Toolbox:
         return parsed_hdr
 
 
-    def rdf_cksum(self, gr):
-        '''
-        Generate a checksum for a graph.
-
-        What this method does is ordering the graph by subject, predicate,
-        object, then creating a pickle string and a checksum of it.
-
-        N.B. The context of the triples is ignored, so isomorphic graphs would
-        have the same checksum regardless of the context(s) they are found in.
-
-        @TODO This can be later reworked to use a custom hashing algorithm.
-
-        :param rdflib.Graph: gr The graph to be hashed.
-
-        :rtype: str
-        :return: SHA1 checksum.
-        '''
-        # Remove the messageDigest property, which very likely reflects the
-        # previous state of the resource.
-        gr.remove((Variable('s'), nsc['premis'].messageDigest, Variable('o')))
-
-        ord_gr = sorted(list(gr), key=lambda x : (x[0], x[1], x[2]))
-        hash = sha1(pickle.dumps(ord_gr)).hexdigest()
-
-        return hash
-
     def split_uuid(self, uuid):
         '''
         Split a UID into pairtree segments. This mimics FCREPO4 behavior.