Kaynağa Gözat

Support Range header.

Stefano Cossu 5 yıl önce
ebeveyn
işleme
fdcbd00796

+ 46 - 10
lakesuperior/endpoints/ldp.py

@@ -188,15 +188,21 @@ def get_resource(uid, out_fmt=None):
             return ('{} has no binary content.'.format(rsrc.uid), 404)
 
         logger.debug('Streaming out binary content.')
-        rsp = make_response(send_file(
-                rsrc.local_path, as_attachment=True,
-                attachment_filename=rsrc.filename,
-                mimetype=rsrc.mimetype), 200, out_headers)
-        # This seems necessary to prevent Flask from setting an additional ETag.
+        if request.range and request.range.units == 'bytes':
+            # Stream partial response.
+            # This is only true if the header is well-formed. Thanks, Werkzeug.
+            rsp = _parse_range_header(request.range.ranges, rsrc, out_headers)
+        else:
+            rsp = make_response(send_file(
+                    rsrc.local_path, as_attachment=True,
+                    attachment_filename=rsrc.filename,
+                    mimetype=rsrc.mimetype), 200, out_headers)
+
+        # This seems necessary to prevent Flask from setting an
+        # additional ETag.
         if 'ETag' in out_headers:
             rsp.set_etag(out_headers['ETag'])
-        rsp.headers.add('Link',
-                '<{}/fcr:metadata>; rel="describedby"'.format(uri))
+        rsp.headers.add('Link', f'<{uri}/fcr:metadata>; rel="describedby"')
         return rsp
 
 
@@ -675,9 +681,8 @@ def _headers_from_metadata(rsrc, out_fmt='text/turtle'):
     for t in rsrc.ldp_types:
         rsp_headers['Link'].append('{};rel="type"'.format(t.n3()))
 
-    mimetype = rsrc.metadata.value(nsc['ebucore'].hasMimeType)
-    if mimetype:
-        rsp_headers['Content-Type'] = mimetype
+    if rsrc.mimetype:
+        rsp_headers['Content-Type'] = rsrc.mimetype
 
     return rsp_headers
 
@@ -846,3 +851,34 @@ def _process_cond_headers(uid, headers, safe=True):
             elif not cond_match.get('if-unmodified-since', True):
                 return '', 412
 
+
+def _parse_range_header(ranges, rsrc, headers):
+    """
+    Parse a ``Range`` header and return the appropriate response.
+    """
+    if len(ranges) == 1:
+        # Single range.
+        rng = ranges[0]
+        logger.debug('Streaming contiguous partial content.')
+        with open(rsrc.local_path, 'rb') as fh:
+            size = None if rng[1] is None else rng[1] - rng[0]
+            hdr_endbyte = (
+                    rsrc.content_size - 1 if rng[1] is None else rng[1] - 1)
+            fh.seek(rng[0])
+            out = fh.read(size)
+        headers['Content-Range'] = \
+                f'bytes {rng[0]}-{hdr_endbyte} / {rsrc.content_size}'
+
+    else:
+        return make_response('Multiple ranges are not yet supported.', 501)
+
+        # TODO Format the response as multipart/byteranges:
+        # https://tools.ietf.org/html/rfc7233#section-4.1
+        #out = []
+        #with open(rsrc.local_path, 'rb') as fh:
+        #    for rng in rng_header.ranges:
+        #        fh.seek(rng[0])
+        #        size = None if rng[1] is None else rng[1] - rng[0]
+        #        out.extend(fh.read(size))
+
+    return make_response(out, 206, headers)

+ 17 - 7
lakesuperior/model/ldp_nr.py

@@ -43,13 +43,13 @@ class LdpNr(Ldpr):
         else:
             self.workflow = self.WRKF_OUTBOUND
 
-        if not mimetype:
+        if mimetype:
+            self.mimetype = mimetype
+        else:
             self.mimetype = (
-                    self.metadata.value(self.uri, nsc['ebucore'].hasMimeType)
+                    str(self.metadata.value(nsc['ebucore'].hasMimeType))
                     if self.is_stored
                     else 'application/octet-stream')
-        else:
-            self.mimetype = mimetype
 
         self.disposition = disposition
 
@@ -61,7 +61,7 @@ class LdpNr(Ldpr):
 
         :rtype: str
         """
-        return self.imr.value(self.uri, nsc['ebucore'].filename)
+        return self.metadata.value(nsc['ebucore'].filename)
 
 
     @property
@@ -75,6 +75,16 @@ class LdpNr(Ldpr):
         return open(self.local_path, 'rb')
 
 
+    @property
+    def content_size(self):
+        """
+        Byte size of the binary content.
+
+        :rtype: int
+        """
+        return int(self.metadata.value(nsc['premis'].hasSize))
+
+
     @property
     def local_path(self):
         """
@@ -82,8 +92,8 @@ class LdpNr(Ldpr):
 
         :rtype: str
         """
-        cksum_term = self.imr.value(nsc['premis'].hasMessageDigest)
-        cksum = str(cksum_term.replace('urn:sha1:',''))
+        cksum_term = self.metadata.value(nsc['premis'].hasMessageDigest)
+        cksum = str(cksum_term).replace('urn:sha1:','')
         return nonrdfly.__class__.local_path(
                 nonrdfly.root, cksum, nonrdfly.bl, nonrdfly.bc)
 

+ 3 - 0
lakesuperior/model/ldpr.py

@@ -168,6 +168,9 @@ class Ldpr(metaclass=ABCMeta):
 
         self.provided_imr = provided_imr
 
+        # This gets overridden by LDP-NR.
+        self.mimetype = None
+
         # Disable all internal checks e.g. for raw I/O.
 
 

+ 80 - 0
tests/2_endpoints/test_ldp.py

@@ -1245,6 +1245,86 @@ class TestModifyTimeCondHeaders:
 
 
 
+@pytest.mark.usefixtures('client_class')
+class TestRange:
+    """
+    Test byte range retrieval.
+
+    This should not need too deep testing since it's functionality implemented
+    in Werkzeug/Flask.
+    """
+    @pytest.fixture(scope='class')
+    def bytestream(self):
+        """
+        Create a sample bytestream with predictable (8x8 bytes) content.
+        """
+        return b''.join([bytes([n] * 8) for n in range(8)])
+
+
+    def test_get_range(self, bytestream):
+        """
+        Get different ranges of the bitstream.
+        """
+        path = '/ldp/test_range'
+        self.client.put(path, data=bytestream)
+
+        # First 8 bytes.
+        assert self.client.get(
+            path, headers={'range': 'bytes=0-7'}).data == b'\x00' * 8
+
+        # Last 4 bytes of first block, first 4 of second block.
+        assert self.client.get(
+            path, headers={'range': 'bytes=4-11'}
+        ).data == b'\x00' * 4 + b'\x01' * 4
+
+        # Last 8 bytes.
+        assert self.client.get(
+            path, headers={'range': 'bytes=56-'}).data == b'\x07' * 8
+
+
+    def test_fail_ranges(self, bytestream):
+        """
+        Test malformed or unsupported ranges.
+        """
+        path = '/ldp/test_range'
+
+        # TODO This shall be a 206 when multiple ranges are supported.
+        fail_rsp = self.client.get(path, headers={'range': 'bytes=0-1, 7-8'})
+        assert fail_rsp.status_code == 501
+
+        # Bad ranges will be ignored.
+        for rng in ((10, 4), ('', 3), (3600, 6400)):
+            bad_rsp = self.client.get(
+                path, headers={'range': 'bytes={rng[0]}-{rng[1]}'})
+            assert bad_rsp.status_code == 200
+            assert bad_rsp.data == bytestream
+            assert int(bad_rsp.headers['content-length']) == len(bytestream)
+
+
+    def test_range_rsp_headers(self, bytestream):
+        """
+        Test various headers for a ranged response.
+        """
+        path = '/ldp/test_range'
+        start_b = 0
+        end_b = 7
+
+        full_rsp = self.client.get(path)
+        part_rsp = self.client.get(path, headers={
+            'range': f'bytes={start_b}-{end_b}'})
+
+        for hdr_name in ['etag', 'digest', 'content-type']:
+            assert part_rsp.headers[hdr_name] == full_rsp.headers[hdr_name]
+
+        for hdr in part_rsp.headers['link']:
+            assert hdr in full_rsp.headers['link']
+
+        assert int(part_rsp.headers['content-length']) == end_b - start_b + 1
+        assert part_rsp.headers['content-range'] == \
+                f'bytes {start_b}-{end_b} / {len(bytestream)}'
+
+
+
 @pytest.mark.usefixtures('client_class')
 class TestPrefHeader:
     """