|
@@ -13,6 +13,7 @@ from flask import (
|
|
|
Blueprint, Response, g, make_response, render_template,
|
|
|
request, send_file)
|
|
|
from rdflib import Graph, plugin, parser#, serializer
|
|
|
+from werkzeug.http import parse_date
|
|
|
|
|
|
from lakesuperior import env
|
|
|
from lakesuperior.api import resource as rsrc_api
|
|
@@ -140,46 +141,63 @@ def get_resource(uid, out_fmt=None):
|
|
|
"""
|
|
|
out_headers = std_headers.copy()
|
|
|
repr_options = defaultdict(dict)
|
|
|
+
|
|
|
+ # Fist check if it's not a 404 or a 410.
|
|
|
+ try:
|
|
|
+ if not rsrc_api.exists(uid):
|
|
|
+ return '', 404
|
|
|
+ except TombstoneError as e:
|
|
|
+ return _tombstone_response(e, uid)
|
|
|
+
|
|
|
+ # Then process the condition headers.
|
|
|
+ cond_ret = _process_cond_headers(uid, request.headers)
|
|
|
+ if cond_ret:
|
|
|
+ return cond_ret
|
|
|
+
|
|
|
+ # Then, business as usual.
|
|
|
+ # Evaluate which representation is requested.
|
|
|
if 'prefer' in request.headers:
|
|
|
prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
|
|
|
logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
|
|
|
if 'return' in prefer:
|
|
|
repr_options = parse_repr_options(prefer['return'])
|
|
|
|
|
|
- try:
|
|
|
- rsrc = rsrc_api.get(uid, repr_options)
|
|
|
- except ResourceNotExistsError as e:
|
|
|
- return str(e), 404
|
|
|
- except TombstoneError as e:
|
|
|
- return _tombstone_response(e, uid)
|
|
|
- else:
|
|
|
- if out_fmt is None:
|
|
|
- rdf_mimetype = _best_rdf_mimetype()
|
|
|
- out_fmt = (
|
|
|
- 'rdf'
|
|
|
- if isinstance(rsrc, LdpRs) or rdf_mimetype is not None
|
|
|
- else 'non_rdf')
|
|
|
- out_headers.update(_headers_from_metadata(rsrc, out_fmt))
|
|
|
- uri = g.tbox.uid_to_uri(uid)
|
|
|
- if out_fmt == 'rdf':
|
|
|
- if locals().get('rdf_mimetype', None) is None:
|
|
|
- rdf_mimetype = DEFAULT_RDF_MIMETYPE
|
|
|
- ggr = g.tbox.globalize_graph(rsrc.out_graph)
|
|
|
- ggr.namespace_manager = nsm
|
|
|
- return _negotiate_content(
|
|
|
- ggr, rdf_mimetype, out_headers, uid=uid, uri=uri)
|
|
|
- else:
|
|
|
- if not getattr(rsrc, 'local_path', False):
|
|
|
- return ('{} has no binary content.'.format(rsrc.uid), 404)
|
|
|
+ rsrc = rsrc_api.get(uid, repr_options)
|
|
|
|
|
|
- logger.debug('Streaming out binary content.')
|
|
|
- rsp = make_response(send_file(
|
|
|
- rsrc.local_path, as_attachment=True,
|
|
|
- attachment_filename=rsrc.filename,
|
|
|
- mimetype=rsrc.mimetype), 200, out_headers)
|
|
|
- rsp.headers.add('Link',
|
|
|
- '<{}/fcr:metadata>; rel="describedby"'.format(uri))
|
|
|
- return rsp
|
|
|
+ if out_fmt is None:
|
|
|
+ rdf_mimetype = _best_rdf_mimetype()
|
|
|
+ out_fmt = (
|
|
|
+ 'rdf'
|
|
|
+ if isinstance(rsrc, LdpRs) or rdf_mimetype is not None
|
|
|
+ else 'non_rdf')
|
|
|
+ out_headers.update(_headers_from_metadata(rsrc, out_fmt))
|
|
|
+ uri = g.tbox.uid_to_uri(uid)
|
|
|
+
|
|
|
+ # RDF output.
|
|
|
+ if out_fmt == 'rdf':
|
|
|
+ if locals().get('rdf_mimetype', None) is None:
|
|
|
+ rdf_mimetype = DEFAULT_RDF_MIMETYPE
|
|
|
+ ggr = g.tbox.globalize_graph(rsrc.out_graph)
|
|
|
+ ggr.namespace_manager = nsm
|
|
|
+ return _negotiate_content(
|
|
|
+ ggr, rdf_mimetype, out_headers, uid=uid, uri=uri)
|
|
|
+
|
|
|
+ # Datastream.
|
|
|
+ else:
|
|
|
+ if not getattr(rsrc, 'local_path', False):
|
|
|
+ return ('{} has no binary content.'.format(rsrc.uid), 404)
|
|
|
+
|
|
|
+ logger.debug('Streaming out binary content.')
|
|
|
+ rsp = make_response(send_file(
|
|
|
+ rsrc.local_path, as_attachment=True,
|
|
|
+ attachment_filename=rsrc.filename,
|
|
|
+ mimetype=rsrc.mimetype), 200, out_headers)
|
|
|
+ # This seems necessary to prevent Flask from setting an additional ETag.
|
|
|
+ if 'ETag' in out_headers:
|
|
|
+ rsp.set_etag(out_headers['ETag'])
|
|
|
+ rsp.headers.add('Link',
|
|
|
+ '<{}/fcr:metadata>; rel="describedby"'.format(uri))
|
|
|
+ return rsp
|
|
|
|
|
|
|
|
|
@ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
|
|
@@ -262,14 +280,13 @@ def post_resource(parent_uid):
|
|
|
except ServerManagedTermError as e:
|
|
|
return str(e), 412
|
|
|
|
|
|
- hdr = {'Location' : uri}
|
|
|
uri = g.tbox.uid_to_uri(rsrc.uid)
|
|
|
+ rsp_headers.update(_headers_from_metadata(rsrc))
|
|
|
+ rsp_headers['Location'] = uri
|
|
|
|
|
|
if mimetype and rdf_fmt is None:
|
|
|
- hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="{0}"'\
|
|
|
- .format(uri)
|
|
|
-
|
|
|
- rsp_headers.update(hdr)
|
|
|
+ rsp_headers['Link'] = (f'<{uri}/fcr:metadata>; rel="describedby"; '
|
|
|
+ f'anchor="{uri}"')
|
|
|
|
|
|
return uri, 201, rsp_headers
|
|
|
|
|
@@ -286,7 +303,9 @@ def put_resource(uid):
|
|
|
# Parse headers.
|
|
|
logger.debug('Request headers: {}'.format(request.headers))
|
|
|
|
|
|
- rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
|
|
|
+ cond_ret = _process_cond_headers(uid, request.headers, False)
|
|
|
+ if cond_ret:
|
|
|
+ return cond_ret
|
|
|
|
|
|
handling, disposition = set_post_put_params()
|
|
|
stream, mimetype = _bistream_from_req()
|
|
@@ -314,6 +333,9 @@ def put_resource(uid):
|
|
|
except TombstoneError as e:
|
|
|
return _tombstone_response(e, uid)
|
|
|
|
|
|
+ rsp_headers = _headers_from_metadata(rsrc)
|
|
|
+ rsp_headers['Content-Type'] = 'text/plain; charset=utf-8'
|
|
|
+
|
|
|
uri = g.tbox.uid_to_uri(uid)
|
|
|
if evt == RES_CREATED:
|
|
|
rsp_code = 201
|
|
@@ -336,6 +358,18 @@ def patch_resource(uid, is_metadata=False):
|
|
|
|
|
|
Update an existing resource with a SPARQL-UPDATE payload.
|
|
|
"""
|
|
|
+ # Fist check if it's not a 404 or a 410.
|
|
|
+ try:
|
|
|
+ if not rsrc_api.exists(uid):
|
|
|
+ return '', 404
|
|
|
+ except TombstoneError as e:
|
|
|
+ return _tombstone_response(e, uid)
|
|
|
+
|
|
|
+ # Then process the condition headers.
|
|
|
+ cond_ret = _process_cond_headers(uid, request.headers, False)
|
|
|
+ if cond_ret:
|
|
|
+ return cond_ret
|
|
|
+
|
|
|
rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
|
|
|
if request.mimetype != 'application/sparql-update':
|
|
|
return 'Provided content type is not a valid parsable format: {}'\
|
|
@@ -345,10 +379,6 @@ def patch_resource(uid, is_metadata=False):
|
|
|
local_update_str = g.tbox.localize_ext_str(update_str, nsc['fcres'][uid])
|
|
|
try:
|
|
|
rsrc = rsrc_api.update(uid, local_update_str, is_metadata)
|
|
|
- except ResourceNotExistsError as e:
|
|
|
- return str(e), 404
|
|
|
- except TombstoneError as e:
|
|
|
- return _tombstone_response(e, uid)
|
|
|
except (ServerManagedTermError, SingleSubjectError) as e:
|
|
|
return str(e), 412
|
|
|
except InvalidResourceError as e:
|
|
@@ -377,6 +407,18 @@ def delete_resource(uid):
|
|
|
must be deleted as well, or the ``Prefer:no-tombstone`` header can be used.
|
|
|
The latter will forget (completely delete) the resource immediately.
|
|
|
"""
|
|
|
+ # Fist check if it's not a 404 or a 410.
|
|
|
+ try:
|
|
|
+ if not rsrc_api.exists(uid):
|
|
|
+ return '', 404
|
|
|
+ except TombstoneError as e:
|
|
|
+ return _tombstone_response(e, uid)
|
|
|
+
|
|
|
+ # Then process the condition headers.
|
|
|
+ cond_ret = _process_cond_headers(uid, request.headers, False)
|
|
|
+ if cond_ret:
|
|
|
+ return cond_ret
|
|
|
+
|
|
|
headers = std_headers.copy()
|
|
|
|
|
|
if 'prefer' in request.headers:
|
|
@@ -385,12 +427,7 @@ def delete_resource(uid):
|
|
|
else:
|
|
|
leave_tstone = True
|
|
|
|
|
|
- try:
|
|
|
- rsrc_api.delete(uid, leave_tstone)
|
|
|
- except ResourceNotExistsError as e:
|
|
|
- return str(e), 404
|
|
|
- except TombstoneError as e:
|
|
|
- return _tombstone_response(e, uid)
|
|
|
+ rsrc_api.delete(uid, leave_tstone)
|
|
|
|
|
|
return '', 204, headers
|
|
|
|
|
@@ -623,17 +660,11 @@ def _headers_from_metadata(rsrc, out_fmt='text/turtle'):
|
|
|
"""
|
|
|
rsp_headers = defaultdict(list)
|
|
|
|
|
|
- digest = rsrc.metadata.value(rsrc.uri, nsc['premis'].hasMessageDigest)
|
|
|
+ digest_p = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
|
|
|
# Only add ETag and digest if output is not RDF.
|
|
|
- if digest:
|
|
|
- digest_components = digest.split(':')
|
|
|
- cksum_hex = digest_components[-1]
|
|
|
- cksum = bytearray.fromhex(cksum_hex)
|
|
|
- digest_algo = digest_components[-2]
|
|
|
- etag_str = cksum_hex
|
|
|
- rsp_headers['ETag'] = etag_str
|
|
|
- rsp_headers['Digest'] = '{}={}'.format(
|
|
|
- digest_algo.upper(), b64encode(cksum).decode('ascii'))
|
|
|
+ if digest_p:
|
|
|
+ rsp_headers['ETag'], rsp_headers['Digest'] = (
|
|
|
+ _digest_headers(digest_p))
|
|
|
|
|
|
|
|
|
last_updated_term = rsrc.metadata.value(nsc['fcrepo'].lastModified)
|
|
@@ -650,3 +681,168 @@ def _headers_from_metadata(rsrc, out_fmt='text/turtle'):
|
|
|
|
|
|
return rsp_headers
|
|
|
|
|
|
+
|
|
|
+def _digest_headers(digest):
|
|
|
+ """
|
|
|
+ Format ETag and Digest headers from resource checksum.
|
|
|
+
|
|
|
+ :param str digest: Resource digest. For an extracted IMR, this is the
|
|
|
+ value of the ``premis:hasMessageDigest`` property.
|
|
|
+ """
|
|
|
+ digest_components = digest.split(':')
|
|
|
+ cksum_hex = digest_components[-1]
|
|
|
+ cksum = bytearray.fromhex(cksum_hex)
|
|
|
+ digest_algo = digest_components[-2]
|
|
|
+ etag_str = cksum_hex
|
|
|
+ digest_str = '{}={}'.format(
|
|
|
+ digest_algo.upper(), b64encode(cksum).decode('ascii'))
|
|
|
+
|
|
|
+ return etag_str, digest_str
|
|
|
+
|
|
|
+
|
|
|
+def _condition_hdr_match(uid, headers, safe=True):
|
|
|
+ """
|
|
|
+ Conditional header evaluation for HEAD, GET, PUT and DELETE requests.
|
|
|
+
|
|
|
+ Determine whether any conditional headers, and which, is/are imposed in the
|
|
|
+ request (``If-Match``, ``If-None-Match``, ``If-Modified-Since``,
|
|
|
+ ``If-Unmodified-Since``, or none) and what the most relevant condition
|
|
|
+ evaluates to (``True`` or ``False``).
|
|
|
+
|
|
|
+ `RFC 7232 <https://tools.ietf.org/html/rfc7232#section-3.1>`__ does not
|
|
|
+ indicate an exact condition precedence, except that the ETag
|
|
|
+ matching conditions void the timestamp-based ones. This function
|
|
|
+ adopts the following precedence:
|
|
|
+
|
|
|
+ - ``If-Match`` is evaluated first if present;
|
|
|
+ - Else, ``If-None-Match`` is evaluated if present;
|
|
|
+ - Else, ``If-Modified-Since`` and ``If-Unmodified-Since``
|
|
|
+ are evaluated if present. If both conditions are present they are
|
|
|
+ both returned so they can be furher evaluated, e.g. using a logical AND
|
|
|
+ to allow time-range conditions, where the two terms indicate the early
|
|
|
+ and late boundary, respectively.
|
|
|
+
|
|
|
+ Note that the above mentioned RFC mentions several cases in which these
|
|
|
+ conditions are ignored, e.g. for a 404 in some cases, or for certain
|
|
|
+ HTTP methods for ``If-Modified-Since``. This must be implemented by the
|
|
|
+ calling function.
|
|
|
+
|
|
|
+ :param str uid: UID of the resource requested.
|
|
|
+ :param werkzeug.datastructures.EnvironHeaders headers: Incoming request
|
|
|
+ headers.
|
|
|
+ :param bool safe: Whether a "safe" method is being processed. Defaults to
|
|
|
+ True.
|
|
|
+
|
|
|
+ :rtype: dict (str, bool)
|
|
|
+ :return: Dictionary whose keys are the conditional header names that
|
|
|
+ have been evaluated, and whose boolean values indicate whether each
|
|
|
+ condition is met. If no valid conditional header is found, an empty
|
|
|
+ dict is returned.
|
|
|
+ """
|
|
|
+ # ETag-based conditions.
|
|
|
+ # This ignores headers with empty values.
|
|
|
+ if headers.get('if-match') or headers.get('if-none-match'):
|
|
|
+ cond_hdr = 'if-match' if headers.get('if-match') else 'if-none-match'
|
|
|
+
|
|
|
+ # Wildcard matching for unsafe methods. Cannot be part of a list of
|
|
|
+ # ETags nor be enclosed in quotes.
|
|
|
+ if not safe and headers.get(cond_hdr) == '*':
|
|
|
+ return {cond_hdr: (cond_hdr == 'if-match') == rsrc_api.exists(uid)}
|
|
|
+
|
|
|
+ req_etags = [
|
|
|
+ et.strip('\'" ') for et in headers.get(cond_hdr).split(',')]
|
|
|
+
|
|
|
+ try:
|
|
|
+ rsrc_meta = rsrc_api.get_metadata(uid)
|
|
|
+ except ResourceNotExistsError:
|
|
|
+ rsrc_meta = Imr(nsc['fcres'][uid])
|
|
|
+
|
|
|
+ digest_prop = rsrc_meta.value(nsc['premis'].hasMessageDigest)
|
|
|
+ if digest_prop:
|
|
|
+ etag, _ = _digest_headers(digest_prop)
|
|
|
+ if cond_hdr == 'if-match':
|
|
|
+ is_match = etag in req_etags
|
|
|
+ else:
|
|
|
+ is_match = etag not in req_etags
|
|
|
+ else:
|
|
|
+ is_match = cond_hdr == 'if-none-match'
|
|
|
+
|
|
|
+ return {cond_hdr: is_match}
|
|
|
+
|
|
|
+ # Timestmp-based conditions.
|
|
|
+ ret = {}
|
|
|
+ if headers.get('if-modified-since') or headers.get('if-unmodified-since'):
|
|
|
+
|
|
|
+ try:
|
|
|
+ rsrc_meta = rsrc_api.get_metadata(uid)
|
|
|
+ except ResourceNotExistsError:
|
|
|
+ return {
|
|
|
+ 'if-modified-since': False,
|
|
|
+ 'if-unmodified-since': False
|
|
|
+ }
|
|
|
+
|
|
|
+ lastmod_str = rsrc_meta.value(nsc['fcrepo'].lastModified)
|
|
|
+ lastmod_date = arrow.get(lastmod_str)
|
|
|
+
|
|
|
+ mod_since_date = parse_date(headers.get('if-modified-since'))
|
|
|
+ # If date is not in a RFC 5322 format
|
|
|
+ # (https://tools.ietf.org/html/rfc5322#section-3.3) it evaluates to
|
|
|
+ # None.
|
|
|
+ if mod_since_date:
|
|
|
+ cond_hdr = 'if-modified-since'
|
|
|
+ ret[cond_hdr] = lastmod_ts > mod_since_date
|
|
|
+
|
|
|
+ unmod_since_date = parse_date(headers.get('if-unmodified-since'))
|
|
|
+ if unmod_since_date:
|
|
|
+ cond_hdr = 'if-unmodified-since'
|
|
|
+ ret[cond_hdr] = lastmod_ts < unmod_since_date
|
|
|
+
|
|
|
+ return ret
|
|
|
+
|
|
|
+
|
|
|
+def _process_cond_headers(uid, headers, safe=True):
|
|
|
+ """
|
|
|
+ Process the outcome of the evaluation of conditional headers.
|
|
|
+
|
|
|
+ This yields different response between safe methods (``HEAD``, ``GET``,
|
|
|
+ etc.) and unsafe ones (``PUT``, ``DELETE``, etc.
|
|
|
+
|
|
|
+ :param str uid: Resource UID.
|
|
|
+ :param werkzeug.datastructures.EnvironHeaders headers: Incoming request
|
|
|
+ headers.
|
|
|
+ :param bool safe: Whether a "safe" method is being processed. Defaults to
|
|
|
+ True.
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ cond_match = _condition_hdr_match(uid, headers, safe)
|
|
|
+ except TombstoneError as e:
|
|
|
+ return _tombstone_response(e, uid)
|
|
|
+
|
|
|
+ if cond_match:
|
|
|
+ if safe:
|
|
|
+ if 'if-match' in cond_match or 'if-none-match' in cond_match:
|
|
|
+ # If an expected list of tags is not matched, the response is
|
|
|
+ # "Precondition Failed". For all other cases, it's "Not Modified".
|
|
|
+ if not cond_match.get('if-match', True):
|
|
|
+ return '', 412
|
|
|
+ if not cond_match.get('if-none-match', True):
|
|
|
+ return '', 304
|
|
|
+ # The presence of an Etag-based condition, whether satisfied or not,
|
|
|
+ # voids the timestamp-based conditions.
|
|
|
+ elif (
|
|
|
+ not cond_match.get('if-modified-since', True) or
|
|
|
+ not cond_match.get('if-unmodified-since', True)):
|
|
|
+ return '', 304
|
|
|
+ else:
|
|
|
+ # Note that If-Modified-Since is only evaluated for safe methods.
|
|
|
+ if 'if-match' in cond_match or 'if-none-match' in cond_match:
|
|
|
+ if (
|
|
|
+ not cond_match.get('if-match', True) or
|
|
|
+ not cond_match.get('if-none-match', True)):
|
|
|
+ return '', 412
|
|
|
+
|
|
|
+ # The presence of an Etag-based condition, whether satisfied or not,
|
|
|
+ # voids the timestamp-based conditions.
|
|
|
+ elif not cond_match.get('if-unmodified-since', True):
|
|
|
+ return '', 412
|
|
|
+
|