ldp.py 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007
  1. import hashlib
  2. import logging
  3. import pdb
  4. from base64 import b64encode
  5. from collections import defaultdict
  6. from io import BytesIO
  7. from pprint import pformat
  8. from uuid import uuid4
  9. import arrow
  10. from flask import (
  11. Blueprint, Response, g, make_response, render_template,
  12. request, send_file)
  13. from rdflib import Graph, plugin, parser#, serializer
  14. from werkzeug.http import parse_date
  15. from lakesuperior import env
  16. from lakesuperior import exceptions as exc
  17. from lakesuperior.api import resource as rsrc_api
  18. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  19. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  20. from lakesuperior.model.ldp.ldp_factory import LdpFactory
  21. from lakesuperior.model.ldp.ldp_nr import LdpNr
  22. from lakesuperior.model.ldp.ldp_rs import LdpRs
  23. from lakesuperior.model.ldp.ldpr import RES_CREATED, Ldpr
  24. from lakesuperior.util import toolbox
  25. from lakesuperior.util.toolbox import RequestUtils
  26. DEFAULT_RDF_MIMETYPE = 'text/turtle'
  27. """
  28. Fallback serialization format used when no acceptable formats are specified.
  29. """
  30. logger = logging.getLogger(__name__)
  31. rdf_parsable_mimetypes = {
  32. mt.name for mt in plugin.plugins()
  33. if mt.kind is parser.Parser and '/' in mt.name
  34. }
  35. """MIMEtypes that can be parsed into RDF."""
  36. store = env.app_globals.rdf_store
  37. rdf_serializable_mimetypes = {
  38. #mt.name for mt in plugin.plugins()
  39. #if mt.kind is serializer.Serializer and '/' in mt.name
  40. 'application/ld+json',
  41. 'application/n-triples',
  42. 'application/rdf+xml',
  43. 'text/turtle',
  44. 'text/n3',
  45. }
  46. """
  47. MIMEtypes that RDF can be serialized into.
  48. These are not automatically derived from RDFLib because only triple
  49. (not quad) serializations are applicable.
  50. """
  51. accept_patch = (
  52. 'application/sparql-update',
  53. )
  54. std_headers = {
  55. 'Accept-Patch' : ','.join(accept_patch),
  56. 'Accept-Post' : ','.join(rdf_parsable_mimetypes),
  57. }
  58. """Predicates excluded by view."""
  59. vw_blacklist = {
  60. }
  61. """Prefer representations currently supported"""
  62. option_to_uri = {
  63. 'embed_children': Ldpr.EMBED_CHILD_RES_URI,
  64. 'incl_children': Ldpr.RETURN_CHILD_RES_URI,
  65. 'incl_inbound': Ldpr.RETURN_INBOUND_REF_URI,
  66. 'incl_srv_mgd': Ldpr.RETURN_SRV_MGD_RES_URI
  67. }
  68. ldp = Blueprint(
  69. 'ldp', __name__, template_folder='templates',
  70. static_url_path='/static', static_folder='templates/static')
  71. """
  72. Blueprint for LDP REST API. This is what is usually found under ``/rest/`` in
  73. standard fcrepo4. Here, it is under ``/ldp`` but initially ``/rest`` will be
  74. kept for backward compatibility.
  75. """
  76. ## ROUTE PRE- & POST-PROCESSING ##
  77. @ldp.url_defaults
  78. def bp_url_defaults(endpoint, values):
  79. url_prefix = getattr(g, 'url_prefix', None)
  80. if url_prefix is not None:
  81. values.setdefault('url_prefix', url_prefix)
  82. @ldp.url_value_preprocessor
  83. def bp_url_value_preprocessor(endpoint, values):
  84. g.url_prefix = values.pop('url_prefix')
  85. g.webroot = request.host_url + g.url_prefix
  86. # Normalize leading slashes for UID.
  87. if 'uid' in values:
  88. values['uid'] = '/' + values['uid'].lstrip('/')
  89. if 'parent_uid' in values:
  90. values['parent_uid'] = '/' + values['parent_uid'].lstrip('/')
  91. @ldp.before_request
  92. def log_request_start():
  93. logger.info('** Start {} {} **'.format(request.method, request.url))
  94. @ldp.before_request
  95. def instantiate_req_vars():
  96. g.tbox = RequestUtils()
  97. @ldp.after_request
  98. def log_request_end(rsp):
  99. logger.info('** End {} {} **'.format(request.method, request.url))
  100. return rsp
  101. ## REST SERVICES ##
  102. @ldp.route('/<path:uid>', methods=['GET'], strict_slashes=False)
  103. @ldp.route('/', defaults={'uid': '/'}, methods=['GET'], strict_slashes=False)
  104. @ldp.route('/<path:uid>/fcr:metadata', defaults={'out_fmt' : 'rdf'},
  105. methods=['GET'])
  106. @ldp.route('/<path:uid>/fcr:content', defaults={'out_fmt' : 'non_rdf'},
  107. methods=['GET'])
  108. def get_resource(uid, out_fmt=None):
  109. r"""
  110. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  111. Retrieve RDF or binary content.
  112. :param str uid: UID of resource to retrieve. The repository root has
  113. an empty string for UID.
  114. :param str out_fmt: Force output to RDF or non-RDF if the resource is
  115. a LDP-NR. This is not available in the API but is used e.g. by the
  116. ``\*/fcr:metadata`` and ``\*/fcr:content`` endpoints. The default is
  117. False.
  118. """
  119. out_headers = std_headers.copy()
  120. repr_options = defaultdict(dict)
  121. # Fist check if it's not a 404 or a 410.
  122. try:
  123. if not rsrc_api.exists(uid):
  124. return '', 404
  125. except exc.TombstoneError as e:
  126. return _tombstone_response(e, uid)
  127. # Then process the condition headers.
  128. cond_ret = _process_cond_headers(uid, request.headers)
  129. if cond_ret:
  130. return cond_ret
  131. # Then, business as usual.
  132. # Evaluate which representation is requested.
  133. if 'prefer' in request.headers:
  134. prefer = toolbox.parse_rfc7240(request.headers['prefer'])
  135. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  136. if 'return' in prefer:
  137. repr_options = parse_repr_options(prefer['return'], out_headers)
  138. rsrc = rsrc_api.get(uid, repr_options)
  139. with store.txn_ctx():
  140. if out_fmt is None:
  141. rdf_mimetype = _best_rdf_mimetype()
  142. out_fmt = (
  143. 'rdf'
  144. if isinstance(rsrc, LdpRs) or rdf_mimetype is not None
  145. else 'non_rdf')
  146. out_headers.update(_headers_from_metadata(rsrc, out_fmt))
  147. uri = g.tbox.uid_to_uri(uid)
  148. # RDF output.
  149. if out_fmt == 'rdf':
  150. if locals().get('rdf_mimetype', None) is None:
  151. rdf_mimetype = DEFAULT_RDF_MIMETYPE
  152. ggr = g.tbox.globalize_imr(rsrc.out_graph)
  153. ggr.namespace_manager = nsm
  154. rsp = _negotiate_content(
  155. ggr, rdf_mimetype, out_headers, uid=uid, uri=uri)
  156. if isinstance(rsrc, LdpNr):
  157. rsp.headers.add(
  158. 'Link', f'<{g.tbox.uid_to_uri(uid)}>', rel='describes')
  159. return rsp
  160. # Datastream.
  161. if not getattr(rsrc, 'local_path', False):
  162. return ('{} has no binary content.'.format(rsrc.uid), 404)
  163. logger.debug('Streaming out binary content.')
  164. if request.range and request.range.units == 'bytes':
  165. # Stream partial response.
  166. # This is only true if the header is well-formed. Thanks, Werkzeug.
  167. rsp = _parse_range_header(
  168. request.range.ranges, rsrc, out_headers
  169. )
  170. else:
  171. rsp = make_response(send_file(
  172. rsrc.local_path, as_attachment=True,
  173. attachment_filename=rsrc.filename,
  174. mimetype=rsrc.mimetype), 200, out_headers)
  175. # This seems necessary to prevent Flask from setting an
  176. # additional ETag.
  177. if 'ETag' in out_headers:
  178. rsp.set_etag(out_headers['ETag'])
  179. rsp.headers.add('Link', f'<{uri}/fcr:metadata>; rel="describedby"')
  180. return rsp
  181. @ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
  182. def get_version_info(uid):
  183. """
  184. Get version info (`fcr:versions`).
  185. :param str uid: UID of resource to retrieve versions for.
  186. """
  187. rdf_mimetype = _best_rdf_mimetype() or DEFAULT_RDF_MIMETYPE
  188. try:
  189. imr = rsrc_api.get_version_info(uid)
  190. except exc.ResourceNotExistsError as e:
  191. return str(e), 404
  192. except exc.InvalidResourceError as e:
  193. return str(e), 409
  194. except exc.TombstoneError as e:
  195. return _tombstone_response(e, uid)
  196. else:
  197. with store.txn_ctx():
  198. return _negotiate_content(g.tbox.globalize_imr(imr), rdf_mimetype)
  199. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
  200. def get_version(uid, ver_uid):
  201. """
  202. Get an individual resource version.
  203. :param str uid: Resource UID.
  204. :param str ver_uid: Version UID.
  205. """
  206. rdf_mimetype = _best_rdf_mimetype() or DEFAULT_RDF_MIMETYPE
  207. try:
  208. imr = rsrc_api.get_version(uid, ver_uid)
  209. except exc.ResourceNotExistsError as e:
  210. return str(e), 404
  211. except exc.InvalidResourceError as e:
  212. return str(e), 409
  213. except exc.TombstoneError as e:
  214. return _tombstone_response(e, uid)
  215. else:
  216. with store.txn_ctx():
  217. return _negotiate_content(g.tbox.globalize_imr(imr), rdf_mimetype)
  218. @ldp.route('/<path:parent_uid>', methods=['POST'], strict_slashes=False)
  219. @ldp.route('/', defaults={'parent_uid': '/'}, methods=['POST'],
  220. strict_slashes=False)
  221. def post_resource(parent_uid):
  222. """
  223. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  224. Add a new resource in a new URI.
  225. """
  226. rsp_headers = std_headers.copy()
  227. slug = request.headers.get('Slug')
  228. try:
  229. kwargs = _create_args_from_req(slug)
  230. rsrc = rsrc_api.create(parent_uid, slug, **kwargs)
  231. except exc.RdfParsingError as e:
  232. return str(e), 400
  233. except exc.IndigestibleError:
  234. return (
  235. f'Unable to parse digest header: {request.headers["digest"]}'
  236. ), 400
  237. except exc.ResourceNotExistsError as e:
  238. return str(e), 404
  239. except (exc.InvalidResourceError, exc.ChecksumValidationError) as e:
  240. return str(e), 409
  241. except exc.TombstoneError as e:
  242. return _tombstone_response(e, uid)
  243. except exc.ServerManagedTermError as e:
  244. rsp_headers['Link'] = (
  245. f'<{uri}>; rel="{nsc["ldp"].constrainedBy}"; '
  246. f'{g.webroot}/info/ldp_constraints"'
  247. )
  248. return str(e), 412
  249. uri = g.tbox.uid_to_uri(rsrc.uid)
  250. with store.txn_ctx():
  251. rsp_headers.update(_headers_from_metadata(rsrc))
  252. rsp_headers['Location'] = uri
  253. if kwargs.get('mimetype') and kwargs.get('rdf_fmt') is None:
  254. rsp_headers['Link'] = (
  255. f'<{uri}/fcr:metadata>; rel="describedby"; anchor="{uri}"'
  256. )
  257. return uri, 201, rsp_headers
  258. @ldp.route('/<path:uid>', methods=['PUT'], strict_slashes=False)
  259. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  260. methods=['PUT'])
  261. def put_resource(uid):
  262. """
  263. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  264. Add or replace a new resource at a specified URI.
  265. """
  266. # Parse headers.
  267. logger.debug('Request headers: {}'.format(request.headers))
  268. cond_ret = _process_cond_headers(uid, request.headers, False)
  269. if cond_ret:
  270. return cond_ret
  271. try:
  272. kwargs = _create_args_from_req(uid)
  273. evt, rsrc = rsrc_api.create_or_replace(uid, **kwargs)
  274. except exc.RdfParsingError as e:
  275. return str(e), 400
  276. except exc.IndigestibleError:
  277. return (
  278. f'Unable to parse digest header: {request.headers["digest"]}',
  279. 400)
  280. except (
  281. exc.InvalidResourceError, exc.ChecksumValidationError,
  282. exc.ResourceExistsError) as e:
  283. return str(e), 409
  284. except (exc.ServerManagedTermError, exc.SingleSubjectError) as e:
  285. return str(e), 412
  286. except exc.IncompatibleLdpTypeError as e:
  287. return str(e), 415
  288. except exc.TombstoneError as e:
  289. return _tombstone_response(e, uid)
  290. with store.txn_ctx():
  291. rsp_headers = _headers_from_metadata(rsrc)
  292. rsp_headers['Content-Type'] = 'text/plain; charset=utf-8'
  293. uri = g.tbox.uid_to_uri(uid)
  294. if evt == RES_CREATED:
  295. rsp_code = 201
  296. rsp_headers['Location'] = rsp_body = uri
  297. if kwargs.get('mimetype') and not kwargs.get('rdf_data'):
  298. rsp_headers['Link'] = f'<{uri}/fcr:metadata>; rel="describedby"'
  299. else:
  300. rsp_code = 204
  301. rsp_body = ''
  302. return rsp_body, rsp_code, rsp_headers
  303. @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
  304. @ldp.route('/', defaults={'uid': '/'}, methods=['PATCH'],
  305. strict_slashes=False)
  306. def patch_resource(uid, is_metadata=False):
  307. """
  308. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  309. Update an existing resource with a SPARQL-UPDATE payload.
  310. """
  311. # Fist check if it's not a 404 or a 410.
  312. try:
  313. if not rsrc_api.exists(uid):
  314. return '', 404
  315. except exc.TombstoneError as e:
  316. return _tombstone_response(e, uid)
  317. # Then process the condition headers.
  318. cond_ret = _process_cond_headers(uid, request.headers, False)
  319. if cond_ret:
  320. return cond_ret
  321. handling, _ = _set_post_put_params()
  322. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  323. if request.mimetype != 'application/sparql-update':
  324. return 'Provided content type is not a valid parsable format: {}'\
  325. .format(request.mimetype), 415
  326. update_str = request.get_data().decode('utf-8')
  327. local_update_str = g.tbox.localize_ext_str(update_str, nsc['fcres'][uid])
  328. try:
  329. rsrc = rsrc_api.update(uid, local_update_str, is_metadata, handling)
  330. except (exc.ServerManagedTermError, exc.SingleSubjectError) as e:
  331. return str(e), 412
  332. except exc.InvalidResourceError as e:
  333. return str(e), 415
  334. else:
  335. with store.txn_ctx():
  336. rsp_headers.update(_headers_from_metadata(rsrc))
  337. return '', 204, rsp_headers
  338. @ldp.route('/<path:uid>/fcr:metadata', methods=['PATCH'])
  339. def patch_resource_metadata(uid):
  340. return patch_resource(uid, True)
  341. @ldp.route('/<path:uid>', methods=['DELETE'])
  342. def delete_resource(uid):
  343. """
  344. Delete a resource and optionally leave a tombstone.
  345. This behaves differently from FCREPO. A tombstone indicated that the
  346. resource is no longer available at its current location, but its historic
  347. snapshots still are. Also, deleting a resource with a tombstone creates
  348. one more version snapshot of the resource prior to being deleted.
  349. In order to completely wipe out all traces of a resource, the tombstone
  350. must be deleted as well, or the ``Prefer:no-tombstone`` header can be used.
  351. The latter will forget (completely delete) the resource immediately.
  352. """
  353. # Fist check if it's not a 404 or a 410.
  354. try:
  355. if not rsrc_api.exists(uid):
  356. return '', 404
  357. except exc.TombstoneError as e:
  358. return _tombstone_response(e, uid)
  359. # Then process the condition headers.
  360. cond_ret = _process_cond_headers(uid, request.headers, False)
  361. if cond_ret:
  362. return cond_ret
  363. headers = std_headers.copy()
  364. if 'prefer' in request.headers:
  365. prefer = toolbox.parse_rfc7240(request.headers['prefer'])
  366. leave_tstone = 'no-tombstone' not in prefer
  367. else:
  368. leave_tstone = True
  369. rsrc_api.delete(uid, leave_tstone)
  370. return '', 204, headers
  371. @ldp.route('/<path:uid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  372. 'PATCH', 'DELETE'])
  373. def tombstone(uid):
  374. """
  375. Handle all tombstone operations.
  376. The only allowed methods are POST and DELETE; any other verb will return a
  377. 405.
  378. """
  379. try:
  380. rsrc_api.get(uid)
  381. except exc.TombstoneError as e:
  382. if request.method == 'DELETE':
  383. if e.uid == uid:
  384. rsrc_api.delete(uid, False)
  385. return '', 204
  386. else:
  387. return _tombstone_response(e, uid)
  388. elif request.method == 'POST':
  389. if e.uid == uid:
  390. rsrc_uri = rsrc_api.resurrect(uid)
  391. headers = {'Location' : rsrc_uri}
  392. return rsrc_uri, 201, headers
  393. else:
  394. return _tombstone_response(e, uid)
  395. else:
  396. return 'Method Not Allowed.', 405
  397. except exc.ResourceNotExistsError as e:
  398. return str(e), 404
  399. else:
  400. return '', 404
  401. @ldp.route('/<path:uid>/fcr:versions', methods=['POST', 'PUT'])
  402. def post_version(uid):
  403. """
  404. Create a new resource version.
  405. """
  406. if request.method == 'PUT':
  407. return 'Method not allowed.', 405
  408. ver_uid = request.headers.get('slug', None)
  409. try:
  410. ver_uid = rsrc_api.create_version(uid, ver_uid)
  411. except exc.ResourceNotExistsError as e:
  412. return str(e), 404
  413. except exc.InvalidResourceError as e:
  414. return str(e), 409
  415. except exc.TombstoneError as e:
  416. return _tombstone_response(e, uid)
  417. else:
  418. return '', 201, {'Location': g.tbox.uid_to_uri(ver_uid)}
  419. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['PATCH'])
  420. def patch_version(uid, ver_uid):
  421. """
  422. Revert to a previous version.
  423. NOTE: This creates a new version snapshot.
  424. :param str uid: Resource UID.
  425. :param str ver_uid: Version UID.
  426. """
  427. try:
  428. rsrc_api.revert_to_version(uid, ver_uid)
  429. except exc.ResourceNotExistsError as e:
  430. return str(e), 404
  431. except exc.InvalidResourceError as e:
  432. return str(e), 409
  433. except exc.TombstoneError as e:
  434. return _tombstone_response(e, uid)
  435. else:
  436. return '', 204
  437. ## PRIVATE METHODS ##
  438. def _best_rdf_mimetype():
  439. """
  440. Check if any of the 'Accept' header values provided is a RDF parsable
  441. format.
  442. """
  443. for accept in request.accept_mimetypes:
  444. mimetype = accept[0]
  445. if mimetype in rdf_parsable_mimetypes:
  446. return mimetype
  447. return None
  448. def _negotiate_content(gr, rdf_mimetype, headers=None, **vw_kwargs):
  449. """
  450. Return HTML or serialized RDF depending on accept headers.
  451. """
  452. if request.accept_mimetypes.best == 'text/html':
  453. rsp = render_template(
  454. 'resource.html', gr=gr, nsc=nsc, nsm=nsm,
  455. blacklist=vw_blacklist, arrow=arrow, **vw_kwargs)
  456. mimetype = 'text/html'
  457. else:
  458. for p in vw_blacklist:
  459. gr.remove((None, p, None))
  460. rsp = gr.serialize(format=rdf_mimetype)
  461. mimetype = rdf_mimetype
  462. return Response(rsp, 200, headers, mimetype=mimetype)
  463. def _create_args_from_req(uid):
  464. """
  465. Set API creation method arguments from request parameters.
  466. The ``kwargs`` variable returned has two keys: either ``rdf_data`` and
  467. ``rdf_fmt`` for LDP-RS or ``stream`` and ``mimetype`` for LDP-NR.
  468. :rtype: dict
  469. """
  470. #logger.debug('Content type: {}'.format(request.mimetype))
  471. #logger.debug('files: {}'.format(request.files))
  472. #logger.debug('stream: {}'.format(request.stream))
  473. #pdb.set_trace()
  474. handling, disposition = _set_post_put_params()
  475. kwargs = {'handling': handling}
  476. if disposition:
  477. kwargs['disposition'] = disposition
  478. link_hdr = request.headers.get('Link')
  479. if link_hdr:
  480. force_ldpnr = (
  481. nsc['ldp']['NonRDFSource'] in link_hdr
  482. and 'rel="type"' in link_hdr)
  483. else:
  484. force_ldpnr = False
  485. if request.mimetype == 'multipart/form-data':
  486. # This seems the "right" way to upload a binary file, with a
  487. # multipart/form-data MIME type and the file in the `file`
  488. # field. This however is not supported by FCREPO4.
  489. stream = request.files.get('file').stream
  490. mimetype = request.files.get('file').content_type
  491. # @TODO This will turn out useful to provide metadata
  492. # with the binary.
  493. #metadata = request.files.get('metadata').stream
  494. else:
  495. # This is a less clean way, with the file in the form body and
  496. # the request as application/x-www-form-urlencoded.
  497. # This is how FCREPO4 accepts binary uploads.
  498. stream = request.stream
  499. # @FIXME Must decide what to do with this.
  500. mimetype = request.mimetype
  501. if mimetype == 'application/x-www-form-urlencoded':
  502. mimetype = None
  503. if mimetype in rdf_parsable_mimetypes and not force_ldpnr:
  504. # If the content is RDF, localize in-repo URIs.
  505. global_rdf = stream.read()
  506. kwargs['rdf_data'] = g.tbox.localize_payload(global_rdf)
  507. kwargs['rdf_fmt'] = mimetype
  508. else:
  509. # Unspecified mimetype or force_ldpnr creates a LDP-NR.
  510. kwargs['stream'] = stream or BytesIO(b'')
  511. kwargs['mimetype'] = mimetype or 'application/octet-stream'
  512. # Check digest if requested.
  513. if 'digest' in request.headers:
  514. try:
  515. kwargs['prov_cksum_algo'], kwargs['prov_cksum'] = (
  516. request.headers['digest'].split('=')
  517. )
  518. except ValueError:
  519. raise exc.IndigestibleError(uid)
  520. return kwargs
  521. def _tombstone_response(e, uid):
  522. headers = {
  523. 'Link': '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  524. } if e.uid == uid else {}
  525. return str(e), 410, headers
  526. def _set_post_put_params():
  527. """
  528. Sets handling and content disposition for POST and PUT by parsing headers.
  529. """
  530. handling = 'strict'
  531. if 'prefer' in request.headers:
  532. prefer = toolbox.parse_rfc7240(request.headers['prefer'])
  533. logger.debug('Parsed Prefer header: {}'.format(prefer))
  534. if 'handling' in prefer:
  535. handling = prefer['handling']['value']
  536. try:
  537. disposition = toolbox.parse_rfc7240(
  538. request.headers['content-disposition'])
  539. except KeyError:
  540. disposition = None
  541. return handling, disposition
  542. def parse_repr_options(repr_options, out_headers):
  543. """
  544. Set options to retrieve IMR.
  545. Ideally, IMR retrieval is done once per request, so all the options
  546. are set once in the `imr()` property.
  547. Representation options include:
  548. - ``embed_children``: include full resource representation of all resource
  549. children in the resource graph.
  550. - ``incl_children``: TODO
  551. - ``incl_inbound``: include inbound triples (triples whose object is
  552. this resource).
  553. - ``incl_srv_mgd``: include server-managed triples.
  554. All options above are ``False`` by default except for ``incl_srv_mgd``
  555. which is only ``False`` if the ``return`` representation is ``minimal``.
  556. :param dict repr_options:: Options parsed from `Prefer` header.
  557. :param dict out_headers:: Response headers.
  558. """
  559. logger.debug('Parsing retrieval options: {}'.format(repr_options))
  560. if repr_options.get('value') == 'minimal':
  561. imr_options = {
  562. 'embed_children' : False,
  563. 'incl_children' : False,
  564. 'incl_inbound' : False,
  565. 'incl_srv_mgd' : False,
  566. }
  567. out_headers['Preference-Applied'] = 'return="minimal"'
  568. else:
  569. # Default.
  570. imr_options = {
  571. 'embed_children' : False,
  572. 'incl_children' : True,
  573. 'incl_inbound' : False,
  574. 'incl_srv_mgd' : True,
  575. }
  576. # Override defaults.
  577. if 'parameters' in repr_options:
  578. try:
  579. pref_imr_options = _valid_preferences(repr_options)
  580. include = list()
  581. omit = list()
  582. for k, v in pref_imr_options.items():
  583. # pref_imr_options only contains requested preferences,
  584. # override the defaults for those.
  585. imr_options[k] = v
  586. # This creates Preference-Applied headers.
  587. if v:
  588. list_holder = include
  589. else:
  590. list_holder = omit
  591. list_holder.append(str(option_to_uri[k]))
  592. header_output = ''
  593. if len(include) > 0:
  594. header_output += ' include="' + ' '.join(include) + '";'
  595. if len(omit) > 0:
  596. header_output += ' omit="' + ' '.join(omit) + '";'
  597. if len(header_output) > 0:
  598. out_headers['Preference-Applied'] = 'return=representation;'\
  599. + header_output
  600. except KeyError:
  601. # Invalid Prefer header so we disregard the entire thing.
  602. pass
  603. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  604. return imr_options
  605. def _preference_decision(include, omit, header):
  606. """
  607. Determine whether a header is in include or omit but not both.
  608. :param include:: list of include preference uris
  609. :param omit:: list of omit preference uris
  610. :param header:: the uri to look for
  611. :return: True if in include only or false if in omit only.
  612. """
  613. if str(header) in include or str(header) in omit:
  614. if str(header) in include and str(header) in omit:
  615. # You can't include and omit, so ignore it.
  616. raise KeyError('Can\'t include and omit same preference')
  617. else:
  618. return str(header) in include
  619. return None
  620. def _valid_preferences(repr_options):
  621. """
  622. Parse the Preference header to determine which we are applying.
  623. Re-used for response Preference-Applied header.
  624. :param repr_options: The incoming Preference header.
  625. :return: list of options being applied.
  626. """
  627. imr_options = dict()
  628. include = repr_options['parameters']['include'].split(' ') \
  629. if 'include' in repr_options['parameters'] else []
  630. omit = repr_options['parameters']['omit'].split(' ') \
  631. if 'omit' in repr_options['parameters'] else []
  632. logger.debug('Include: {}'.format(include))
  633. logger.debug('Omit: {}'.format(omit))
  634. distinct_representations = include.copy()
  635. distinct_representations.extend(omit)
  636. distinct_representations = set(distinct_representations)
  637. uri_to_option = {str(v): k for k, v in option_to_uri.items()}
  638. for uri in distinct_representations:
  639. # Throws KeyError if we don't support the header
  640. option = uri_to_option[uri]
  641. imr_options[option] = _preference_decision(include, omit, uri)
  642. return imr_options
  643. def _headers_from_metadata(rsrc, out_fmt='text/turtle'):
  644. """
  645. Create a dict of headers from a metadata graph.
  646. :param lakesuperior.model.ldp.ldpr.Ldpr rsrc: Resource to extract metadata
  647. from.
  648. """
  649. rsp_headers = defaultdict(list)
  650. digest_p = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
  651. # Only add ETag and digest if output is not RDF.
  652. if digest_p:
  653. rsp_headers['ETag'], rsp_headers['Digest'] = (
  654. _digest_headers(digest_p))
  655. last_updated_term = rsrc.metadata.value(nsc['fcrepo'].lastModified)
  656. if last_updated_term:
  657. rsp_headers['Last-Modified'] = arrow.get(last_updated_term)\
  658. .format('ddd, D MMM YYYY HH:mm:ss Z')
  659. for t in rsrc.ldp_types:
  660. rsp_headers['Link'].append('{};rel="type"'.format(t.n3()))
  661. if rsrc.mimetype:
  662. rsp_headers['Content-Type'] = rsrc.mimetype
  663. return rsp_headers
  664. def _digest_headers(digest):
  665. """
  666. Format ETag and Digest headers from resource checksum.
  667. :param str digest: Resource digest. For an extracted IMR, this is the
  668. value of the ``premis:hasMessageDigest`` property.
  669. """
  670. digest_components = digest.split(':')
  671. cksum_hex = digest_components[-1]
  672. cksum = bytearray.fromhex(cksum_hex)
  673. digest_algo = digest_components[-2]
  674. etag_str = cksum_hex
  675. digest_str = '{}={}'.format(
  676. digest_algo.upper(), b64encode(cksum).decode('ascii'))
  677. return etag_str, digest_str
  678. def _condition_hdr_match(uid, headers, safe=True):
  679. """
  680. Conditional header evaluation for HEAD, GET, PUT and DELETE requests.
  681. Determine whether any conditional headers, and which, is/are imposed in the
  682. request (``If-Match``, ``If-None-Match``, ``If-Modified-Since``,
  683. ``If-Unmodified-Since``, or none) and what the most relevant condition
  684. evaluates to (``True`` or ``False``).
  685. `RFC 7232 <https://tools.ietf.org/html/rfc7232#section-3.1>`__ does not
  686. indicate an exact condition precedence, except that the ETag
  687. matching conditions void the timestamp-based ones. This function
  688. adopts the following precedence:
  689. - ``If-Match`` is evaluated first if present;
  690. - Else, ``If-None-Match`` is evaluated if present;
  691. - Else, ``If-Modified-Since`` and ``If-Unmodified-Since``
  692. are evaluated if present. If both conditions are present they are
  693. both returned so they can be furher evaluated, e.g. using a logical AND
  694. to allow time-range conditions, where the two terms indicate the early
  695. and late boundary, respectively.
  696. Note that the above mentioned RFC mentions several cases in which these
  697. conditions are ignored, e.g. for a 404 in some cases, or for certain
  698. HTTP methods for ``If-Modified-Since``. This must be implemented by the
  699. calling function.
  700. :param str uid: UID of the resource requested.
  701. :param werkzeug.datastructures.EnvironHeaders headers: Incoming request
  702. headers.
  703. :param bool safe: Whether a "safe" method is being processed. Defaults to
  704. True.
  705. :rtype: dict (str, bool)
  706. :return: Dictionary whose keys are the conditional header names that
  707. have been evaluated, and whose boolean values indicate whether each
  708. condition is met. If no valid conditional header is found, an empty
  709. dict is returned.
  710. """
  711. # ETag-based conditions.
  712. # This ignores headers with empty values.
  713. if headers.get('if-match') or headers.get('if-none-match'):
  714. cond_hdr = 'if-match' if headers.get('if-match') else 'if-none-match'
  715. # Wildcard matching for unsafe methods. Cannot be part of a list of
  716. # ETags nor be enclosed in quotes.
  717. if not safe and headers.get(cond_hdr) == '*':
  718. return {cond_hdr: (cond_hdr == 'if-match') == rsrc_api.exists(uid)}
  719. req_etags = [
  720. et.strip('\'" ') for et in headers.get(cond_hdr).split(',')]
  721. with store.txn_ctx():
  722. try:
  723. rsrc_meta = rsrc_api.get_metadata(uid)
  724. except exc.ResourceNotExistsError:
  725. rsrc_meta = Graph(uri=nsc['fcres'][uid])
  726. digest_prop = rsrc_meta.value(nsc['premis'].hasMessageDigest)
  727. if digest_prop:
  728. etag, _ = _digest_headers(digest_prop)
  729. if cond_hdr == 'if-match':
  730. is_match = etag in req_etags
  731. else:
  732. is_match = etag not in req_etags
  733. else:
  734. is_match = cond_hdr == 'if-none-match'
  735. return {cond_hdr: is_match}
  736. # Timestmp-based conditions.
  737. ret = {}
  738. if headers.get('if-modified-since') or headers.get('if-unmodified-since'):
  739. try:
  740. rsrc_meta = rsrc_api.get_metadata(uid)
  741. except exc.ResourceNotExistsError:
  742. return {
  743. 'if-modified-since': False,
  744. 'if-unmodified-since': False
  745. }
  746. with store.txn_ctx():
  747. lastmod_str = rsrc_meta.value(nsc['fcrepo'].lastModified)
  748. lastmod_ts = arrow.get(lastmod_str)
  749. # If date is not in a RFC 5322 format
  750. # (https://tools.ietf.org/html/rfc5322#section-3.3) parse_date
  751. # evaluates to None.
  752. mod_since_date = parse_date(headers.get('if-modified-since'))
  753. if mod_since_date:
  754. cond_hdr = 'if-modified-since'
  755. ret[cond_hdr] = lastmod_ts > arrow.get(mod_since_date)
  756. unmod_since_date = parse_date(headers.get('if-unmodified-since'))
  757. if unmod_since_date:
  758. cond_hdr = 'if-unmodified-since'
  759. ret[cond_hdr] = lastmod_ts < arrow.get(unmod_since_date)
  760. return ret
  761. def _process_cond_headers(uid, headers, safe=True):
  762. """
  763. Process the outcome of the evaluation of conditional headers.
  764. This yields different response between safe methods (``HEAD``, ``GET``,
  765. etc.) and unsafe ones (``PUT``, ``DELETE``, etc.
  766. :param str uid: Resource UID.
  767. :param werkzeug.datastructures.EnvironHeaders headers: Incoming request
  768. headers.
  769. :param bool safe: Whether a "safe" method is being processed. Defaults to
  770. True.
  771. """
  772. try:
  773. cond_match = _condition_hdr_match(uid, headers, safe)
  774. except exc.TombstoneError as e:
  775. return _tombstone_response(e, uid)
  776. if cond_match:
  777. if safe:
  778. if 'if-match' in cond_match or 'if-none-match' in cond_match:
  779. # If an expected list of tags is not matched, the response is
  780. # "Precondition Failed". For all other cases, it's "Not Modified".
  781. if not cond_match.get('if-match', True):
  782. return '', 412
  783. if not cond_match.get('if-none-match', True):
  784. return '', 304
  785. # The presence of an Etag-based condition, whether satisfied or not,
  786. # voids the timestamp-based conditions.
  787. elif (
  788. not cond_match.get('if-modified-since', True) or
  789. not cond_match.get('if-unmodified-since', True)):
  790. return '', 304
  791. else:
  792. # Note that If-Modified-Since is only evaluated for safe methods.
  793. if 'if-match' in cond_match or 'if-none-match' in cond_match:
  794. if (
  795. not cond_match.get('if-match', True) or
  796. not cond_match.get('if-none-match', True)):
  797. return '', 412
  798. # The presence of an Etag-based condition, whether satisfied or not,
  799. # voids the timestamp-based conditions.
  800. elif not cond_match.get('if-unmodified-since', True):
  801. return '', 412
  802. def _parse_range_header(ranges, rsrc, headers):
  803. """
  804. Parse a ``Range`` header and return the appropriate response.
  805. """
  806. if len(ranges) == 1:
  807. # Single range.
  808. rng = ranges[0]
  809. logger.debug('Streaming contiguous partial content.')
  810. with open(rsrc.local_path, 'rb') as fh:
  811. size = None if rng[1] is None else rng[1] - rng[0]
  812. hdr_endbyte = (
  813. rsrc.content_size - 1 if rng[1] is None else rng[1] - 1)
  814. fh.seek(rng[0])
  815. out = fh.read(size)
  816. headers['Content-Range'] = \
  817. f'bytes {rng[0]}-{hdr_endbyte} / {rsrc.content_size}'
  818. else:
  819. return make_response('Multiple ranges are not yet supported.', 501)
  820. # TODO Format the response as multipart/byteranges:
  821. # https://tools.ietf.org/html/rfc7233#section-4.1
  822. #out = []
  823. #with open(rsrc.local_path, 'rb') as fh:
  824. # for rng in rng_header.ranges:
  825. # fh.seek(rng[0])
  826. # size = None if rng[1] is None else rng[1] - rng[0]
  827. # out.extend(fh.read(size))
  828. return make_response(out, 206, headers)