ldp.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891
  1. import hashlib
  2. import logging
  3. import pdb
  4. from base64 import b64encode
  5. from collections import defaultdict
  6. from io import BytesIO
  7. from pprint import pformat
  8. from uuid import uuid4
  9. import arrow
  10. from flask import (
  11. Blueprint, Response, g, make_response, render_template,
  12. request, send_file)
  13. from rdflib import Graph, plugin, parser#, serializer
  14. from werkzeug.http import parse_date
  15. from lakesuperior import env
  16. from lakesuperior.api import resource as rsrc_api
  17. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  18. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  19. from lakesuperior.exceptions import (
  20. ChecksumValidationError, ResourceNotExistsError, TombstoneError,
  21. ServerManagedTermError, InvalidResourceError, SingleSubjectError,
  22. ResourceExistsError, IncompatibleLdpTypeError)
  23. from lakesuperior.globals import RES_CREATED
  24. from lakesuperior.model.ldp_factory import LdpFactory
  25. from lakesuperior.model.ldp_nr import LdpNr
  26. from lakesuperior.model.ldp_rs import LdpRs
  27. from lakesuperior.model.ldpr import Ldpr
  28. from lakesuperior.toolbox import Toolbox
  29. DEFAULT_RDF_MIMETYPE = 'text/turtle'
  30. """
  31. Fallback serialization format used when no acceptable formats are specified.
  32. """
  33. logger = logging.getLogger(__name__)
  34. rdf_parsable_mimetypes = {
  35. mt.name for mt in plugin.plugins()
  36. if mt.kind is parser.Parser and '/' in mt.name
  37. }
  38. """MIMEtypes that can be parsed into RDF."""
  39. rdf_serializable_mimetypes = {
  40. #mt.name for mt in plugin.plugins()
  41. #if mt.kind is serializer.Serializer and '/' in mt.name
  42. 'application/ld+json',
  43. 'application/n-triples',
  44. 'application/rdf+xml',
  45. 'text/turtle',
  46. 'text/n3',
  47. }
  48. """
  49. MIMEtypes that RDF can be serialized into.
  50. These are not automatically derived from RDFLib because only triple
  51. (not quad) serializations are applicable.
  52. """
  53. accept_patch = (
  54. 'application/sparql-update',
  55. )
  56. std_headers = {
  57. 'Accept-Patch' : ','.join(accept_patch),
  58. 'Accept-Post' : ','.join(rdf_parsable_mimetypes),
  59. }
  60. """Predicates excluded by view."""
  61. vw_blacklist = {
  62. }
  63. ldp = Blueprint(
  64. 'ldp', __name__, template_folder='templates',
  65. static_url_path='/static', static_folder='templates/static')
  66. """
  67. Blueprint for LDP REST API. This is what is usually found under ``/rest/`` in
  68. standard fcrepo4. Here, it is under ``/ldp`` but initially ``/rest`` will be
  69. kept for backward compatibility.
  70. """
  71. ## ROUTE PRE- & POST-PROCESSING ##
  72. @ldp.url_defaults
  73. def bp_url_defaults(endpoint, values):
  74. url_prefix = getattr(g, 'url_prefix', None)
  75. if url_prefix is not None:
  76. values.setdefault('url_prefix', url_prefix)
  77. @ldp.url_value_preprocessor
  78. def bp_url_value_preprocessor(endpoint, values):
  79. g.url_prefix = values.pop('url_prefix')
  80. g.webroot = request.host_url + g.url_prefix
  81. # Normalize leading slashes for UID.
  82. if 'uid' in values:
  83. values['uid'] = '/' + values['uid'].lstrip('/')
  84. if 'parent_uid' in values:
  85. values['parent_uid'] = '/' + values['parent_uid'].lstrip('/')
  86. @ldp.before_request
  87. def log_request_start():
  88. logger.info('** Start {} {} **'.format(request.method, request.url))
  89. @ldp.before_request
  90. def instantiate_req_vars():
  91. g.tbox = Toolbox()
  92. @ldp.after_request
  93. def log_request_end(rsp):
  94. logger.info('** End {} {} **'.format(request.method, request.url))
  95. return rsp
  96. ## REST SERVICES ##
  97. @ldp.route('/<path:uid>', methods=['GET'], strict_slashes=False)
  98. @ldp.route('/', defaults={'uid': '/'}, methods=['GET'], strict_slashes=False)
  99. @ldp.route('/<path:uid>/fcr:metadata', defaults={'out_fmt' : 'rdf'},
  100. methods=['GET'])
  101. @ldp.route('/<path:uid>/fcr:content', defaults={'out_fmt' : 'non_rdf'},
  102. methods=['GET'])
  103. def get_resource(uid, out_fmt=None):
  104. r"""
  105. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  106. Retrieve RDF or binary content.
  107. :param str uid: UID of resource to retrieve. The repository root has
  108. an empty string for UID.
  109. :param str out_fmt: Force output to RDF or non-RDF if the resource is
  110. a LDP-NR. This is not available in the API but is used e.g. by the
  111. ``\*/fcr:metadata`` and ``\*/fcr:content`` endpoints. The default is
  112. False.
  113. """
  114. out_headers = std_headers.copy()
  115. repr_options = defaultdict(dict)
  116. # Fist check if it's not a 404 or a 410.
  117. try:
  118. if not rsrc_api.exists(uid):
  119. return '', 404
  120. except TombstoneError as e:
  121. return _tombstone_response(e, uid)
  122. # Then process the condition headers.
  123. cond_ret = _process_cond_headers(uid, request.headers)
  124. if cond_ret:
  125. return cond_ret
  126. # Then, business as usual.
  127. # Evaluate which representation is requested.
  128. if 'prefer' in request.headers:
  129. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  130. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  131. if 'return' in prefer:
  132. repr_options = parse_repr_options(prefer['return'])
  133. rsrc = rsrc_api.get(uid, repr_options)
  134. if out_fmt is None:
  135. rdf_mimetype = _best_rdf_mimetype()
  136. out_fmt = (
  137. 'rdf'
  138. if isinstance(rsrc, LdpRs) or rdf_mimetype is not None
  139. else 'non_rdf')
  140. out_headers.update(_headers_from_metadata(rsrc, out_fmt))
  141. uri = g.tbox.uid_to_uri(uid)
  142. # RDF output.
  143. if out_fmt == 'rdf':
  144. if locals().get('rdf_mimetype', None) is None:
  145. rdf_mimetype = DEFAULT_RDF_MIMETYPE
  146. ggr = g.tbox.globalize_graph(rsrc.out_graph)
  147. ggr.namespace_manager = nsm
  148. return _negotiate_content(
  149. ggr, rdf_mimetype, out_headers, uid=uid, uri=uri)
  150. # Datastream.
  151. else:
  152. if not getattr(rsrc, 'local_path', False):
  153. return ('{} has no binary content.'.format(rsrc.uid), 404)
  154. logger.debug('Streaming out binary content.')
  155. if request.range and request.range.units == 'bytes':
  156. # Stream partial response.
  157. # This is only true if the header is well-formed. Thanks, Werkzeug.
  158. rsp = _parse_range_header(request.range.ranges, rsrc, out_headers)
  159. else:
  160. rsp = make_response(send_file(
  161. rsrc.local_path, as_attachment=True,
  162. attachment_filename=rsrc.filename,
  163. mimetype=rsrc.mimetype), 200, out_headers)
  164. # This seems necessary to prevent Flask from setting an
  165. # additional ETag.
  166. if 'ETag' in out_headers:
  167. rsp.set_etag(out_headers['ETag'])
  168. rsp.headers.add('Link', f'<{uri}/fcr:metadata>; rel="describedby"')
  169. return rsp
  170. @ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
  171. def get_version_info(uid):
  172. """
  173. Get version info (`fcr:versions`).
  174. :param str uid: UID of resource to retrieve versions for.
  175. """
  176. rdf_mimetype = _best_rdf_mimetype() or DEFAULT_RDF_MIMETYPE
  177. try:
  178. gr = rsrc_api.get_version_info(uid)
  179. except ResourceNotExistsError as e:
  180. return str(e), 404
  181. except InvalidResourceError as e:
  182. return str(e), 409
  183. except TombstoneError as e:
  184. return _tombstone_response(e, uid)
  185. else:
  186. return _negotiate_content(g.tbox.globalize_graph(gr), rdf_mimetype)
  187. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
  188. def get_version(uid, ver_uid):
  189. """
  190. Get an individual resource version.
  191. :param str uid: Resource UID.
  192. :param str ver_uid: Version UID.
  193. """
  194. rdf_mimetype = _best_rdf_mimetype() or DEFAULT_RDF_MIMETYPE
  195. try:
  196. gr = rsrc_api.get_version(uid, ver_uid)
  197. except ResourceNotExistsError as e:
  198. return str(e), 404
  199. except InvalidResourceError as e:
  200. return str(e), 409
  201. except TombstoneError as e:
  202. return _tombstone_response(e, uid)
  203. else:
  204. return _negotiate_content(g.tbox.globalize_graph(gr), rdf_mimetype)
  205. @ldp.route('/<path:parent_uid>', methods=['POST'], strict_slashes=False)
  206. @ldp.route('/', defaults={'parent_uid': '/'}, methods=['POST'],
  207. strict_slashes=False)
  208. def post_resource(parent_uid):
  209. """
  210. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  211. Add a new resource in a new URI.
  212. """
  213. rsp_headers = std_headers.copy()
  214. slug = request.headers.get('Slug')
  215. kwargs = {}
  216. kwargs['handling'], kwargs['disposition'] = set_post_put_params()
  217. stream, mimetype = _bistream_from_req()
  218. if mimetype in rdf_parsable_mimetypes:
  219. # If the content is RDF, localize in-repo URIs.
  220. global_rdf = stream.read()
  221. kwargs['rdf_data'] = g.tbox.localize_payload(global_rdf)
  222. kwargs['rdf_fmt'] = mimetype
  223. else:
  224. kwargs['stream'] = stream
  225. kwargs['mimetype'] = mimetype
  226. # Check digest if requested.
  227. if 'digest' in request.headers:
  228. kwargs['prov_cksum_algo'], kwargs['prov_cksum'] = \
  229. request.headers['digest'].split('=')
  230. try:
  231. rsrc = rsrc_api.create(parent_uid, slug, **kwargs)
  232. except ResourceNotExistsError as e:
  233. return str(e), 404
  234. except (InvalidResourceError, ChecksumValidationError) as e:
  235. return str(e), 409
  236. except TombstoneError as e:
  237. return _tombstone_response(e, uid)
  238. except ServerManagedTermError as e:
  239. return str(e), 412
  240. uri = g.tbox.uid_to_uri(rsrc.uid)
  241. rsp_headers.update(_headers_from_metadata(rsrc))
  242. rsp_headers['Location'] = uri
  243. if mimetype and kwargs.get('rdf_fmt') is None:
  244. rsp_headers['Link'] = (f'<{uri}/fcr:metadata>; rel="describedby"; '
  245. f'anchor="{uri}"')
  246. return uri, 201, rsp_headers
  247. @ldp.route('/<path:uid>', methods=['PUT'], strict_slashes=False)
  248. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  249. methods=['PUT'])
  250. def put_resource(uid):
  251. """
  252. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  253. Add or replace a new resource at a specified URI.
  254. """
  255. # Parse headers.
  256. logger.debug('Request headers: {}'.format(request.headers))
  257. cond_ret = _process_cond_headers(uid, request.headers, False)
  258. if cond_ret:
  259. return cond_ret
  260. kwargs = {}
  261. kwargs['handling'], kwargs['disposition'] = set_post_put_params()
  262. stream, mimetype = _bistream_from_req()
  263. if mimetype in rdf_parsable_mimetypes:
  264. # If the content is RDF, localize in-repo URIs.
  265. global_rdf = stream.read()
  266. kwargs['rdf_data'] = g.tbox.localize_payload(global_rdf)
  267. kwargs['rdf_fmt'] = mimetype
  268. else:
  269. kwargs['stream'] = stream
  270. kwargs['mimetype'] = mimetype
  271. # Check digest if requested.
  272. if 'digest' in request.headers:
  273. kwargs['prov_cksum_algo'], kwargs['prov_cksum'] = \
  274. request.headers['digest'].split('=')
  275. try:
  276. evt, rsrc = rsrc_api.create_or_replace(uid, **kwargs)
  277. except (
  278. InvalidResourceError, ChecksumValidationError,
  279. ResourceExistsError) as e:
  280. return str(e), 409
  281. except (ServerManagedTermError, SingleSubjectError) as e:
  282. return str(e), 412
  283. except IncompatibleLdpTypeError as e:
  284. return str(e), 415
  285. except TombstoneError as e:
  286. return _tombstone_response(e, uid)
  287. rsp_headers = _headers_from_metadata(rsrc)
  288. rsp_headers['Content-Type'] = 'text/plain; charset=utf-8'
  289. uri = g.tbox.uid_to_uri(uid)
  290. if evt == RES_CREATED:
  291. rsp_code = 201
  292. rsp_headers['Location'] = rsp_body = uri
  293. if mimetype and not kwargs.get('rdf_data'):
  294. rsp_headers['Link'] = f'<{uri}/fcr:metadata>; rel="describedby"'
  295. else:
  296. rsp_code = 204
  297. rsp_body = ''
  298. return rsp_body, rsp_code, rsp_headers
  299. @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
  300. @ldp.route('/', defaults={'uid': '/'}, methods=['PATCH'],
  301. strict_slashes=False)
  302. def patch_resource(uid, is_metadata=False):
  303. """
  304. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  305. Update an existing resource with a SPARQL-UPDATE payload.
  306. """
  307. # Fist check if it's not a 404 or a 410.
  308. try:
  309. if not rsrc_api.exists(uid):
  310. return '', 404
  311. except TombstoneError as e:
  312. return _tombstone_response(e, uid)
  313. # Then process the condition headers.
  314. cond_ret = _process_cond_headers(uid, request.headers, False)
  315. if cond_ret:
  316. return cond_ret
  317. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  318. if request.mimetype != 'application/sparql-update':
  319. return 'Provided content type is not a valid parsable format: {}'\
  320. .format(request.mimetype), 415
  321. update_str = request.get_data().decode('utf-8')
  322. local_update_str = g.tbox.localize_ext_str(update_str, nsc['fcres'][uid])
  323. try:
  324. rsrc = rsrc_api.update(uid, local_update_str, is_metadata)
  325. except (ServerManagedTermError, SingleSubjectError) as e:
  326. return str(e), 412
  327. except InvalidResourceError as e:
  328. return str(e), 415
  329. else:
  330. rsp_headers.update(_headers_from_metadata(rsrc))
  331. return '', 204, rsp_headers
  332. @ldp.route('/<path:uid>/fcr:metadata', methods=['PATCH'])
  333. def patch_resource_metadata(uid):
  334. return patch_resource(uid, True)
  335. @ldp.route('/<path:uid>', methods=['DELETE'])
  336. def delete_resource(uid):
  337. """
  338. Delete a resource and optionally leave a tombstone.
  339. This behaves differently from FCREPO. A tombstone indicated that the
  340. resource is no longer available at its current location, but its historic
  341. snapshots still are. Also, deleting a resource with a tombstone creates
  342. one more version snapshot of the resource prior to being deleted.
  343. In order to completely wipe out all traces of a resource, the tombstone
  344. must be deleted as well, or the ``Prefer:no-tombstone`` header can be used.
  345. The latter will forget (completely delete) the resource immediately.
  346. """
  347. # Fist check if it's not a 404 or a 410.
  348. try:
  349. if not rsrc_api.exists(uid):
  350. return '', 404
  351. except TombstoneError as e:
  352. return _tombstone_response(e, uid)
  353. # Then process the condition headers.
  354. cond_ret = _process_cond_headers(uid, request.headers, False)
  355. if cond_ret:
  356. return cond_ret
  357. headers = std_headers.copy()
  358. if 'prefer' in request.headers:
  359. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  360. leave_tstone = 'no-tombstone' not in prefer
  361. else:
  362. leave_tstone = True
  363. rsrc_api.delete(uid, leave_tstone)
  364. return '', 204, headers
  365. @ldp.route('/<path:uid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  366. 'PATCH', 'DELETE'])
  367. def tombstone(uid):
  368. """
  369. Handle all tombstone operations.
  370. The only allowed methods are POST and DELETE; any other verb will return a
  371. 405.
  372. """
  373. try:
  374. rsrc = rsrc_api.get(uid)
  375. except TombstoneError as e:
  376. if request.method == 'DELETE':
  377. if e.uid == uid:
  378. rsrc_api.delete(uid, False)
  379. return '', 204
  380. else:
  381. return _tombstone_response(e, uid)
  382. elif request.method == 'POST':
  383. if e.uid == uid:
  384. rsrc_uri = rsrc_api.resurrect(uid)
  385. headers = {'Location' : rsrc_uri}
  386. return rsrc_uri, 201, headers
  387. else:
  388. return _tombstone_response(e, uid)
  389. else:
  390. return 'Method Not Allowed.', 405
  391. except ResourceNotExistsError as e:
  392. return str(e), 404
  393. else:
  394. return '', 404
  395. @ldp.route('/<path:uid>/fcr:versions', methods=['POST', 'PUT'])
  396. def post_version(uid):
  397. """
  398. Create a new resource version.
  399. """
  400. if request.method == 'PUT':
  401. return 'Method not allowed.', 405
  402. ver_uid = request.headers.get('slug', None)
  403. try:
  404. ver_uid = rsrc_api.create_version(uid, ver_uid)
  405. except ResourceNotExistsError as e:
  406. return str(e), 404
  407. except InvalidResourceError as e:
  408. return str(e), 409
  409. except TombstoneError as e:
  410. return _tombstone_response(e, uid)
  411. else:
  412. return '', 201, {'Location': g.tbox.uid_to_uri(ver_uid)}
  413. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['PATCH'])
  414. def patch_version(uid, ver_uid):
  415. """
  416. Revert to a previous version.
  417. NOTE: This creates a new version snapshot.
  418. :param str uid: Resource UID.
  419. :param str ver_uid: Version UID.
  420. """
  421. try:
  422. rsrc_api.revert_to_version(uid, ver_uid)
  423. except ResourceNotExistsError as e:
  424. return str(e), 404
  425. except InvalidResourceError as e:
  426. return str(e), 409
  427. except TombstoneError as e:
  428. return _tombstone_response(e, uid)
  429. else:
  430. return '', 204
  431. ## PRIVATE METHODS ##
  432. def _best_rdf_mimetype():
  433. """
  434. Check if any of the 'Accept' header values provided is a RDF parsable
  435. format.
  436. """
  437. for accept in request.accept_mimetypes:
  438. mimetype = accept[0]
  439. if mimetype in rdf_parsable_mimetypes:
  440. return mimetype
  441. return None
  442. def _negotiate_content(gr, rdf_mimetype, headers=None, **vw_kwargs):
  443. """
  444. Return HTML or serialized RDF depending on accept headers.
  445. """
  446. if request.accept_mimetypes.best == 'text/html':
  447. return render_template(
  448. 'resource.html', gr=gr, nsc=nsc, nsm=nsm,
  449. blacklist=vw_blacklist, arrow=arrow, **vw_kwargs)
  450. else:
  451. for p in vw_blacklist:
  452. gr.remove((None, p, None))
  453. return Response(
  454. gr.serialize(format=rdf_mimetype), 200, headers,
  455. mimetype=rdf_mimetype)
  456. def _bistream_from_req():
  457. """
  458. Find how a binary file and its MIMEtype were uploaded in the request.
  459. """
  460. #logger.debug('Content type: {}'.format(request.mimetype))
  461. #logger.debug('files: {}'.format(request.files))
  462. #logger.debug('stream: {}'.format(request.stream))
  463. if request.mimetype == 'multipart/form-data':
  464. # This seems the "right" way to upload a binary file, with a
  465. # multipart/form-data MIME type and the file in the `file`
  466. # field. This however is not supported by FCREPO4.
  467. stream = request.files.get('file').stream
  468. mimetype = request.files.get('file').content_type
  469. # @TODO This will turn out useful to provide metadata
  470. # with the binary.
  471. #metadata = request.files.get('metadata').stream
  472. else:
  473. # This is a less clean way, with the file in the form body and
  474. # the request as application/x-www-form-urlencoded.
  475. # This is how FCREPO4 accepts binary uploads.
  476. stream = request.stream
  477. # @FIXME Must decide what to do with this.
  478. mimetype = request.mimetype
  479. if mimetype == '' or mimetype == 'application/x-www-form-urlencoded':
  480. if getattr(stream, 'limit', 0) == 0:
  481. stream = mimetype = None
  482. else:
  483. mimetype = 'application/octet-stream'
  484. return stream, mimetype
  485. def _tombstone_response(e, uid):
  486. headers = {
  487. 'Link': '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  488. } if e.uid == uid else {}
  489. return str(e), 410, headers
  490. def set_post_put_params():
  491. """
  492. Sets handling and content disposition for POST and PUT by parsing headers.
  493. """
  494. handling = 'strict'
  495. if 'prefer' in request.headers:
  496. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  497. logger.debug('Parsed Prefer header: {}'.format(prefer))
  498. if 'handling' in prefer:
  499. handling = prefer['handling']['value']
  500. try:
  501. disposition = g.tbox.parse_rfc7240(
  502. request.headers['content-disposition'])
  503. except KeyError:
  504. disposition = None
  505. return handling, disposition
  506. def parse_repr_options(retr_opts):
  507. """
  508. Set options to retrieve IMR.
  509. Ideally, IMR retrieval is done once per request, so all the options
  510. are set once in the `imr()` property.
  511. :param dict retr_opts:: Options parsed from `Prefer` header.
  512. """
  513. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  514. imr_options = {}
  515. if retr_opts.get('value') == 'minimal':
  516. imr_options = {
  517. 'embed_children' : False,
  518. 'incl_children' : False,
  519. 'incl_inbound' : False,
  520. 'incl_srv_mgd' : False,
  521. }
  522. else:
  523. # Default.
  524. imr_options = {
  525. 'embed_children' : False,
  526. 'incl_children' : True,
  527. 'incl_inbound' : False,
  528. 'incl_srv_mgd' : True,
  529. }
  530. # Override defaults.
  531. if 'parameters' in retr_opts:
  532. include = retr_opts['parameters']['include'].split(' ') \
  533. if 'include' in retr_opts['parameters'] else []
  534. omit = retr_opts['parameters']['omit'].split(' ') \
  535. if 'omit' in retr_opts['parameters'] else []
  536. logger.debug('Include: {}'.format(include))
  537. logger.debug('Omit: {}'.format(omit))
  538. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  539. imr_options['embed_children'] = True
  540. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  541. imr_options['incl_children'] = False
  542. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  543. imr_options['incl_inbound'] = True
  544. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  545. imr_options['incl_srv_mgd'] = False
  546. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  547. return imr_options
  548. def _headers_from_metadata(rsrc, out_fmt='text/turtle'):
  549. """
  550. Create a dict of headers from a metadata graph.
  551. :param lakesuperior.model.ldpr.Ldpr rsrc: Resource to extract metadata
  552. from.
  553. """
  554. rsp_headers = defaultdict(list)
  555. digest_p = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
  556. # Only add ETag and digest if output is not RDF.
  557. if digest_p:
  558. rsp_headers['ETag'], rsp_headers['Digest'] = (
  559. _digest_headers(digest_p))
  560. last_updated_term = rsrc.metadata.value(nsc['fcrepo'].lastModified)
  561. if last_updated_term:
  562. rsp_headers['Last-Modified'] = arrow.get(last_updated_term)\
  563. .format('ddd, D MMM YYYY HH:mm:ss Z')
  564. for t in rsrc.ldp_types:
  565. rsp_headers['Link'].append('{};rel="type"'.format(t.n3()))
  566. if rsrc.mimetype:
  567. rsp_headers['Content-Type'] = rsrc.mimetype
  568. return rsp_headers
  569. def _digest_headers(digest):
  570. """
  571. Format ETag and Digest headers from resource checksum.
  572. :param str digest: Resource digest. For an extracted IMR, this is the
  573. value of the ``premis:hasMessageDigest`` property.
  574. """
  575. digest_components = digest.split(':')
  576. cksum_hex = digest_components[-1]
  577. cksum = bytearray.fromhex(cksum_hex)
  578. digest_algo = digest_components[-2]
  579. etag_str = cksum_hex
  580. digest_str = '{}={}'.format(
  581. digest_algo.upper(), b64encode(cksum).decode('ascii'))
  582. return etag_str, digest_str
  583. def _condition_hdr_match(uid, headers, safe=True):
  584. """
  585. Conditional header evaluation for HEAD, GET, PUT and DELETE requests.
  586. Determine whether any conditional headers, and which, is/are imposed in the
  587. request (``If-Match``, ``If-None-Match``, ``If-Modified-Since``,
  588. ``If-Unmodified-Since``, or none) and what the most relevant condition
  589. evaluates to (``True`` or ``False``).
  590. `RFC 7232 <https://tools.ietf.org/html/rfc7232#section-3.1>`__ does not
  591. indicate an exact condition precedence, except that the ETag
  592. matching conditions void the timestamp-based ones. This function
  593. adopts the following precedence:
  594. - ``If-Match`` is evaluated first if present;
  595. - Else, ``If-None-Match`` is evaluated if present;
  596. - Else, ``If-Modified-Since`` and ``If-Unmodified-Since``
  597. are evaluated if present. If both conditions are present they are
  598. both returned so they can be furher evaluated, e.g. using a logical AND
  599. to allow time-range conditions, where the two terms indicate the early
  600. and late boundary, respectively.
  601. Note that the above mentioned RFC mentions several cases in which these
  602. conditions are ignored, e.g. for a 404 in some cases, or for certain
  603. HTTP methods for ``If-Modified-Since``. This must be implemented by the
  604. calling function.
  605. :param str uid: UID of the resource requested.
  606. :param werkzeug.datastructures.EnvironHeaders headers: Incoming request
  607. headers.
  608. :param bool safe: Whether a "safe" method is being processed. Defaults to
  609. True.
  610. :rtype: dict (str, bool)
  611. :return: Dictionary whose keys are the conditional header names that
  612. have been evaluated, and whose boolean values indicate whether each
  613. condition is met. If no valid conditional header is found, an empty
  614. dict is returned.
  615. """
  616. # ETag-based conditions.
  617. # This ignores headers with empty values.
  618. if headers.get('if-match') or headers.get('if-none-match'):
  619. cond_hdr = 'if-match' if headers.get('if-match') else 'if-none-match'
  620. # Wildcard matching for unsafe methods. Cannot be part of a list of
  621. # ETags nor be enclosed in quotes.
  622. if not safe and headers.get(cond_hdr) == '*':
  623. return {cond_hdr: (cond_hdr == 'if-match') == rsrc_api.exists(uid)}
  624. req_etags = [
  625. et.strip('\'" ') for et in headers.get(cond_hdr).split(',')]
  626. try:
  627. rsrc_meta = rsrc_api.get_metadata(uid)
  628. except ResourceNotExistsError:
  629. rsrc_meta = Imr(nsc['fcres'][uid])
  630. digest_prop = rsrc_meta.value(nsc['premis'].hasMessageDigest)
  631. if digest_prop:
  632. etag, _ = _digest_headers(digest_prop)
  633. if cond_hdr == 'if-match':
  634. is_match = etag in req_etags
  635. else:
  636. is_match = etag not in req_etags
  637. else:
  638. is_match = cond_hdr == 'if-none-match'
  639. return {cond_hdr: is_match}
  640. # Timestmp-based conditions.
  641. ret = {}
  642. if headers.get('if-modified-since') or headers.get('if-unmodified-since'):
  643. try:
  644. rsrc_meta = rsrc_api.get_metadata(uid)
  645. except ResourceNotExistsError:
  646. return {
  647. 'if-modified-since': False,
  648. 'if-unmodified-since': False
  649. }
  650. lastmod_str = rsrc_meta.value(nsc['fcrepo'].lastModified)
  651. lastmod_ts = arrow.get(lastmod_str)
  652. # If date is not in a RFC 5322 format
  653. # (https://tools.ietf.org/html/rfc5322#section-3.3) parse_date
  654. # evaluates to None.
  655. mod_since_date = parse_date(headers.get('if-modified-since'))
  656. if mod_since_date:
  657. cond_hdr = 'if-modified-since'
  658. ret[cond_hdr] = lastmod_ts > arrow.get(mod_since_date)
  659. unmod_since_date = parse_date(headers.get('if-unmodified-since'))
  660. if unmod_since_date:
  661. cond_hdr = 'if-unmodified-since'
  662. ret[cond_hdr] = lastmod_ts < arrow.get(unmod_since_date)
  663. return ret
  664. def _process_cond_headers(uid, headers, safe=True):
  665. """
  666. Process the outcome of the evaluation of conditional headers.
  667. This yields different response between safe methods (``HEAD``, ``GET``,
  668. etc.) and unsafe ones (``PUT``, ``DELETE``, etc.
  669. :param str uid: Resource UID.
  670. :param werkzeug.datastructures.EnvironHeaders headers: Incoming request
  671. headers.
  672. :param bool safe: Whether a "safe" method is being processed. Defaults to
  673. True.
  674. """
  675. try:
  676. cond_match = _condition_hdr_match(uid, headers, safe)
  677. except TombstoneError as e:
  678. return _tombstone_response(e, uid)
  679. if cond_match:
  680. if safe:
  681. if 'if-match' in cond_match or 'if-none-match' in cond_match:
  682. # If an expected list of tags is not matched, the response is
  683. # "Precondition Failed". For all other cases, it's "Not Modified".
  684. if not cond_match.get('if-match', True):
  685. return '', 412
  686. if not cond_match.get('if-none-match', True):
  687. return '', 304
  688. # The presence of an Etag-based condition, whether satisfied or not,
  689. # voids the timestamp-based conditions.
  690. elif (
  691. not cond_match.get('if-modified-since', True) or
  692. not cond_match.get('if-unmodified-since', True)):
  693. return '', 304
  694. else:
  695. # Note that If-Modified-Since is only evaluated for safe methods.
  696. if 'if-match' in cond_match or 'if-none-match' in cond_match:
  697. if (
  698. not cond_match.get('if-match', True) or
  699. not cond_match.get('if-none-match', True)):
  700. return '', 412
  701. # The presence of an Etag-based condition, whether satisfied or not,
  702. # voids the timestamp-based conditions.
  703. elif not cond_match.get('if-unmodified-since', True):
  704. return '', 412
  705. def _parse_range_header(ranges, rsrc, headers):
  706. """
  707. Parse a ``Range`` header and return the appropriate response.
  708. """
  709. if len(ranges) == 1:
  710. # Single range.
  711. rng = ranges[0]
  712. logger.debug('Streaming contiguous partial content.')
  713. with open(rsrc.local_path, 'rb') as fh:
  714. size = None if rng[1] is None else rng[1] - rng[0]
  715. hdr_endbyte = (
  716. rsrc.content_size - 1 if rng[1] is None else rng[1] - 1)
  717. fh.seek(rng[0])
  718. out = fh.read(size)
  719. headers['Content-Range'] = \
  720. f'bytes {rng[0]}-{hdr_endbyte} / {rsrc.content_size}'
  721. else:
  722. return make_response('Multiple ranges are not yet supported.', 501)
  723. # TODO Format the response as multipart/byteranges:
  724. # https://tools.ietf.org/html/rfc7233#section-4.1
  725. #out = []
  726. #with open(rsrc.local_path, 'rb') as fh:
  727. # for rng in rng_header.ranges:
  728. # fh.seek(rng[0])
  729. # size = None if rng[1] is None else rng[1] - rng[0]
  730. # out.extend(fh.read(size))
  731. return make_response(out, 206, headers)