ldp.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. import logging
  2. import pdb
  3. from collections import defaultdict
  4. from io import BytesIO
  5. from pprint import pformat
  6. from uuid import uuid4
  7. import arrow
  8. from flask import (
  9. Blueprint, g, make_response, render_template,
  10. request, send_file)
  11. from rdflib import Graph
  12. from lakesuperior.api import resource as rsrc_api
  13. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  14. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  15. from lakesuperior.exceptions import (ResourceNotExistsError, TombstoneError,
  16. ServerManagedTermError, InvalidResourceError, SingleSubjectError,
  17. ResourceExistsError, IncompatibleLdpTypeError)
  18. from lakesuperior.globals import RES_CREATED
  19. from lakesuperior.model.ldp_factory import LdpFactory
  20. from lakesuperior.model.ldp_nr import LdpNr
  21. from lakesuperior.model.ldp_rs import LdpRs
  22. from lakesuperior.model.ldpr import Ldpr
  23. from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
  24. from lakesuperior.toolbox import Toolbox
  25. logger = logging.getLogger(__name__)
  26. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  27. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  28. # for backward compatibility.
  29. ldp = Blueprint(
  30. 'ldp', __name__, template_folder='templates',
  31. static_url_path='/static', static_folder='templates/static')
  32. accept_patch = (
  33. 'application/sparql-update',
  34. )
  35. accept_rdf = (
  36. 'application/ld+json',
  37. 'application/n-triples',
  38. 'application/rdf+xml',
  39. #'application/x-turtle',
  40. #'application/xhtml+xml',
  41. #'application/xml',
  42. #'text/html',
  43. 'text/n3',
  44. #'text/plain',
  45. 'text/rdf+n3',
  46. 'text/turtle',
  47. )
  48. std_headers = {
  49. 'Accept-Patch' : ','.join(accept_patch),
  50. 'Accept-Post' : ','.join(accept_rdf),
  51. #'Allow' : ','.join(allow),
  52. }
  53. """Predicates excluded by view."""
  54. vw_blacklist = {
  55. }
  56. @ldp.url_defaults
  57. def bp_url_defaults(endpoint, values):
  58. url_prefix = getattr(g, 'url_prefix', None)
  59. if url_prefix is not None:
  60. values.setdefault('url_prefix', url_prefix)
  61. @ldp.url_value_preprocessor
  62. def bp_url_value_preprocessor(endpoint, values):
  63. g.url_prefix = values.pop('url_prefix')
  64. g.webroot = request.host_url + g.url_prefix
  65. # Normalize leading slashes for UID.
  66. if 'uid' in values:
  67. values['uid'] = '/' + values['uid'].lstrip('/')
  68. if 'parent_uid' in values:
  69. values['parent_uid'] = '/' + values['parent_uid'].lstrip('/')
  70. @ldp.before_request
  71. def log_request_start():
  72. logger.info('** Start {} {} **'.format(request.method, request.url))
  73. @ldp.before_request
  74. def instantiate_req_vars():
  75. g.tbox = Toolbox()
  76. @ldp.after_request
  77. def log_request_end(rsp):
  78. logger.info('** End {} {} **'.format(request.method, request.url))
  79. return rsp
  80. ## REST SERVICES ##
  81. @ldp.route('/<path:uid>', methods=['GET'], strict_slashes=False)
  82. @ldp.route('/', defaults={'uid': '/'}, methods=['GET'], strict_slashes=False)
  83. @ldp.route('/<path:uid>/fcr:metadata', defaults={'out_fmt' : 'rdf'},
  84. methods=['GET'])
  85. @ldp.route('/<path:uid>/fcr:content', defaults={'out_fmt' : 'non_rdf'},
  86. methods=['GET'])
  87. def get_resource(uid, out_fmt=None):
  88. r"""
  89. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  90. Retrieve RDF or binary content.
  91. :param str uid: UID of resource to retrieve. The repository root has
  92. an empty string for UID.
  93. :param str out_fmt: Force output to RDF or non-RDF if the resource is
  94. a LDP-NR. This is not available in the API but is used e.g. by the
  95. ``\*/fcr:metadata`` and ``\*/fcr:content`` endpoints. The default is
  96. False.
  97. """
  98. logger.info('UID: {}'.format(uid))
  99. out_headers = std_headers
  100. repr_options = defaultdict(dict)
  101. if 'prefer' in request.headers:
  102. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  103. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  104. if 'return' in prefer:
  105. repr_options = parse_repr_options(prefer['return'])
  106. try:
  107. rsrc = rsrc_api.get(uid, repr_options)
  108. except ResourceNotExistsError as e:
  109. return str(e), 404
  110. except TombstoneError as e:
  111. return _tombstone_response(e, uid)
  112. else:
  113. if out_fmt is None:
  114. out_fmt = (
  115. 'rdf'
  116. if isinstance(rsrc, LdpRs) or is_accept_hdr_rdf_parsable()
  117. else 'non_rdf')
  118. out_headers.update(_headers_from_metadata(rsrc))
  119. uri = g.tbox.uid_to_uri(uid)
  120. if out_fmt == 'rdf':
  121. ggr = g.tbox.globalize_graph(rsrc.out_graph)
  122. ggr.namespace_manager = nsm
  123. return _negotiate_content(ggr, out_headers, uid=uid, uri=uri)
  124. else:
  125. if not getattr(rsrc, 'local_path', False):
  126. return ('{} has no binary content.'.format(rsrc.uid), 404)
  127. logger.debug('Streaming out binary content.')
  128. rsp = make_response(send_file(
  129. rsrc.local_path, as_attachment=True,
  130. attachment_filename=rsrc.filename,
  131. mimetype=rsrc.mimetype))
  132. logger.debug('Out headers: {}'.format(out_headers))
  133. rsp.headers.add('Link',
  134. '<{}/fcr:metadata>; rel="describedby"'.format(uri))
  135. for link in out_headers['Link']:
  136. rsp.headers.add('Link', link)
  137. return rsp
  138. @ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
  139. def get_version_info(uid):
  140. """
  141. Get version info (`fcr:versions`).
  142. :param str uid: UID of resource to retrieve versions for.
  143. """
  144. try:
  145. gr = rsrc_api.get_version_info(uid)
  146. except ResourceNotExistsError as e:
  147. return str(e), 404
  148. except InvalidResourceError as e:
  149. return str(e), 409
  150. except TombstoneError as e:
  151. return _tombstone_response(e, uid)
  152. else:
  153. return _negotiate_content(g.tbox.globalize_graph(gr))
  154. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
  155. def get_version(uid, ver_uid):
  156. """
  157. Get an individual resource version.
  158. :param str uid: Resource UID.
  159. :param str ver_uid: Version UID.
  160. """
  161. try:
  162. gr = rsrc_api.get_version(uid, ver_uid)
  163. except ResourceNotExistsError as e:
  164. return str(e), 404
  165. except InvalidResourceError as e:
  166. return str(e), 409
  167. except TombstoneError as e:
  168. return _tombstone_response(e, uid)
  169. else:
  170. return _negotiate_content(g.tbox.globalize_graph(gr))
  171. @ldp.route('/<path:parent_uid>', methods=['POST'], strict_slashes=False)
  172. @ldp.route('/', defaults={'parent_uid': '/'}, methods=['POST'],
  173. strict_slashes=False)
  174. def post_resource(parent_uid):
  175. """
  176. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  177. Add a new resource in a new URI.
  178. """
  179. out_headers = std_headers
  180. try:
  181. slug = request.headers['Slug']
  182. logger.debug('Slug: {}'.format(slug))
  183. except KeyError:
  184. slug = None
  185. handling, disposition = set_post_put_params()
  186. stream, mimetype = _bistream_from_req()
  187. if LdpFactory.is_rdf_parsable(mimetype):
  188. # If the content is RDF, localize in-repo URIs.
  189. global_rdf = stream.read()
  190. local_rdf = g.tbox.localize_payload(global_rdf)
  191. stream = BytesIO(local_rdf)
  192. is_rdf = True
  193. else:
  194. is_rdf = False
  195. try:
  196. uid = rsrc_api.create(
  197. parent_uid, slug, stream=stream, mimetype=mimetype,
  198. handling=handling, disposition=disposition)
  199. except ResourceNotExistsError as e:
  200. return str(e), 404
  201. except InvalidResourceError as e:
  202. return str(e), 409
  203. except TombstoneError as e:
  204. return _tombstone_response(e, uid)
  205. except ServerManagedTermError as e:
  206. return str(e), 412
  207. uri = g.tbox.uid_to_uri(uid)
  208. hdr = {'Location' : uri}
  209. if mimetype and not is_rdf:
  210. hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="<{0}>"'\
  211. .format(uri)
  212. out_headers.update(hdr)
  213. return uri, 201, out_headers
  214. @ldp.route('/<path:uid>', methods=['PUT'], strict_slashes=False)
  215. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  216. methods=['PUT'])
  217. def put_resource(uid):
  218. """
  219. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  220. Add or replace a new resource at a specified URI.
  221. """
  222. # Parse headers.
  223. logger.debug('Request headers: {}'.format(request.headers))
  224. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  225. handling, disposition = set_post_put_params()
  226. stream, mimetype = _bistream_from_req()
  227. if LdpFactory.is_rdf_parsable(mimetype):
  228. # If the content is RDF, localize in-repo URIs.
  229. global_rdf = stream.read()
  230. local_rdf = g.tbox.localize_payload(global_rdf)
  231. graph = Graph().parse(
  232. data=local_rdf, format=mimetype, publicID=nsc['fcres'][uid])
  233. stream = mimetype = None
  234. else:
  235. graph = None
  236. try:
  237. evt = rsrc_api.create_or_replace(uid, stream=stream, mimetype=mimetype,
  238. graph=graph, handling=handling, disposition=disposition)
  239. except (InvalidResourceError, ResourceExistsError) as e:
  240. return str(e), 409
  241. except (ServerManagedTermError, SingleSubjectError) as e:
  242. return str(e), 412
  243. except IncompatibleLdpTypeError as e:
  244. return str(e), 415
  245. except TombstoneError as e:
  246. return _tombstone_response(e, uid)
  247. uri = g.tbox.uid_to_uri(uid)
  248. if evt == RES_CREATED:
  249. rsp_code = 201
  250. rsp_headers['Location'] = rsp_body = uri
  251. if mimetype and not graph:
  252. rsp_headers['Link'] = (
  253. '<{0}/fcr:metadata>; rel="describedby"'.format(uri))
  254. else:
  255. rsp_code = 204
  256. rsp_body = ''
  257. return rsp_body, rsp_code, rsp_headers
  258. @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
  259. @ldp.route('/', defaults={'uid': '/'}, methods=['PATCH'],
  260. strict_slashes=False)
  261. def patch_resource(uid, is_metadata=False):
  262. """
  263. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  264. Update an existing resource with a SPARQL-UPDATE payload.
  265. """
  266. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  267. if request.mimetype != 'application/sparql-update':
  268. return 'Provided content type is not a valid parsable format: {}'\
  269. .format(request.mimetype), 415
  270. update_str = request.get_data().decode('utf-8')
  271. local_update_str = g.tbox.localize_ext_str(update_str, nsc['fcres'][uid])
  272. try:
  273. rsrc = rsrc_api.update(uid, local_update_str, is_metadata)
  274. except ResourceNotExistsError as e:
  275. return str(e), 404
  276. except TombstoneError as e:
  277. return _tombstone_response(e, uid)
  278. except (ServerManagedTermError, SingleSubjectError) as e:
  279. return str(e), 412
  280. except InvalidResourceError as e:
  281. return str(e), 415
  282. else:
  283. rsp_headers.update(_headers_from_metadata(rsrc))
  284. return '', 204, rsp_headers
  285. @ldp.route('/<path:uid>/fcr:metadata', methods=['PATCH'])
  286. def patch_resource_metadata(uid):
  287. return patch_resource(uid, True)
  288. @ldp.route('/<path:uid>', methods=['DELETE'])
  289. def delete_resource(uid):
  290. """
  291. Delete a resource and optionally leave a tombstone.
  292. This behaves differently from FCREPO. A tombstone indicated that the
  293. resource is no longer available at its current location, but its historic
  294. snapshots still are. Also, deleting a resource with a tombstone creates
  295. one more version snapshot of the resource prior to being deleted.
  296. In order to completely wipe out all traces of a resource, the tombstone
  297. must be deleted as well, or the ``Prefer:no-tombstone`` header can be used.
  298. The latter will forget (completely delete) the resource immediately.
  299. """
  300. headers = std_headers
  301. if 'prefer' in request.headers:
  302. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  303. leave_tstone = 'no-tombstone' not in prefer
  304. else:
  305. leave_tstone = True
  306. try:
  307. rsrc_api.delete(uid, leave_tstone)
  308. except ResourceNotExistsError as e:
  309. return str(e), 404
  310. except TombstoneError as e:
  311. return _tombstone_response(e, uid)
  312. return '', 204, headers
  313. @ldp.route('/<path:uid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  314. 'PATCH', 'DELETE'])
  315. def tombstone(uid):
  316. """
  317. Handle all tombstone operations.
  318. The only allowed methods are POST and DELETE; any other verb will return a
  319. 405.
  320. """
  321. try:
  322. rsrc = rsrc_api.get(uid)
  323. except TombstoneError as e:
  324. if request.method == 'DELETE':
  325. if e.uid == uid:
  326. rsrc_api.delete(uid, False)
  327. return '', 204
  328. else:
  329. return _tombstone_response(e, uid)
  330. elif request.method == 'POST':
  331. if e.uid == uid:
  332. rsrc_uri = rsrc_api.resurrect(uid)
  333. headers = {'Location' : rsrc_uri}
  334. return rsrc_uri, 201, headers
  335. else:
  336. return _tombstone_response(e, uid)
  337. else:
  338. return 'Method Not Allowed.', 405
  339. except ResourceNotExistsError as e:
  340. return str(e), 404
  341. else:
  342. return '', 404
  343. @ldp.route('/<path:uid>/fcr:versions', methods=['POST', 'PUT'])
  344. def post_version(uid):
  345. """
  346. Create a new resource version.
  347. """
  348. if request.method == 'PUT':
  349. return 'Method not allowed.', 405
  350. ver_uid = request.headers.get('slug', None)
  351. try:
  352. ver_uid = rsrc_api.create_version(uid, ver_uid)
  353. except ResourceNotExistsError as e:
  354. return str(e), 404
  355. except InvalidResourceError as e:
  356. return str(e), 409
  357. except TombstoneError as e:
  358. return _tombstone_response(e, uid)
  359. else:
  360. return '', 201, {'Location': g.tbox.uid_to_uri(ver_uid)}
  361. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['PATCH'])
  362. def patch_version(uid, ver_uid):
  363. """
  364. Revert to a previous version.
  365. NOTE: This creates a new version snapshot.
  366. :param str uid: Resource UID.
  367. :param str ver_uid: Version UID.
  368. """
  369. try:
  370. LdpFactory.from_stored(uid).revert_to_version(ver_uid)
  371. except ResourceNotExistsError as e:
  372. return str(e), 404
  373. except InvalidResourceError as e:
  374. return str(e), 409
  375. except TombstoneError as e:
  376. return _tombstone_response(e, uid)
  377. else:
  378. return '', 204
  379. ## PRIVATE METHODS ##
  380. def _negotiate_content(gr, headers=None, **vw_kwargs):
  381. """
  382. Return HTML or serialized RDF depending on accept headers.
  383. """
  384. if request.accept_mimetypes.best == 'text/html':
  385. return render_template(
  386. 'resource.html', gr=gr, nsc=nsc, nsm=nsm,
  387. blacklist=vw_blacklist, arrow=arrow, **vw_kwargs)
  388. else:
  389. for p in vw_blacklist:
  390. gr.remove((None, p, None))
  391. return (gr.serialize(format='turtle'), headers)
  392. def _bistream_from_req():
  393. """
  394. Find how a binary file and its MIMEtype were uploaded in the request.
  395. """
  396. #logger.debug('Content type: {}'.format(request.mimetype))
  397. #logger.debug('files: {}'.format(request.files))
  398. #logger.debug('stream: {}'.format(request.stream))
  399. if request.mimetype == 'multipart/form-data':
  400. # This seems the "right" way to upload a binary file, with a
  401. # multipart/form-data MIME type and the file in the `file`
  402. # field. This however is not supported by FCREPO4.
  403. stream = request.files.get('file').stream
  404. mimetype = request.files.get('file').content_type
  405. # @TODO This will turn out useful to provide metadata
  406. # with the binary.
  407. #metadata = request.files.get('metadata').stream
  408. else:
  409. # This is a less clean way, with the file in the form body and
  410. # the request as application/x-www-form-urlencoded.
  411. # This is how FCREPO4 accepts binary uploads.
  412. stream = request.stream
  413. # @FIXME Must decide what to do with this.
  414. mimetype = request.mimetype
  415. if mimetype == '' or mimetype == 'application/x-www-form-urlencoded':
  416. if getattr(stream, 'limit', 0) == 0:
  417. stream = mimetype = None
  418. else:
  419. mimetype = 'application/octet-stream'
  420. return stream, mimetype
  421. def _tombstone_response(e, uid):
  422. headers = {
  423. 'Link': '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  424. } if e.uid == uid else {}
  425. return str(e), 410, headers
  426. def set_post_put_params():
  427. """
  428. Sets handling and content disposition for POST and PUT by parsing headers.
  429. """
  430. handling = 'strict'
  431. if 'prefer' in request.headers:
  432. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  433. logger.debug('Parsed Prefer header: {}'.format(prefer))
  434. if 'handling' in prefer:
  435. handling = prefer['handling']['value']
  436. try:
  437. disposition = g.tbox.parse_rfc7240(
  438. request.headers['content-disposition'])
  439. except KeyError:
  440. disposition = None
  441. return handling, disposition
  442. def is_accept_hdr_rdf_parsable():
  443. """
  444. Check if any of the 'Accept' header values provided is a RDF parsable
  445. format.
  446. """
  447. for mimetype in request.accept_mimetypes.values():
  448. if LdpFactory.is_rdf_parsable(mimetype):
  449. return True
  450. return False
  451. def parse_repr_options(retr_opts):
  452. """
  453. Set options to retrieve IMR.
  454. Ideally, IMR retrieval is done once per request, so all the options
  455. are set once in the `imr()` property.
  456. :param dict retr_opts:: Options parsed from `Prefer` header.
  457. """
  458. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  459. imr_options = {}
  460. if retr_opts.get('value') == 'minimal':
  461. imr_options = {
  462. 'embed_children' : False,
  463. 'incl_children' : False,
  464. 'incl_inbound' : False,
  465. 'incl_srv_mgd' : False,
  466. }
  467. else:
  468. # Default.
  469. imr_options = {
  470. 'embed_children' : False,
  471. 'incl_children' : True,
  472. 'incl_inbound' : False,
  473. 'incl_srv_mgd' : True,
  474. }
  475. # Override defaults.
  476. if 'parameters' in retr_opts:
  477. include = retr_opts['parameters']['include'].split(' ') \
  478. if 'include' in retr_opts['parameters'] else []
  479. omit = retr_opts['parameters']['omit'].split(' ') \
  480. if 'omit' in retr_opts['parameters'] else []
  481. logger.debug('Include: {}'.format(include))
  482. logger.debug('Omit: {}'.format(omit))
  483. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  484. imr_options['embed_children'] = True
  485. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  486. imr_options['incl_children'] = False
  487. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  488. imr_options['incl_inbound'] = True
  489. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  490. imr_options['incl_srv_mgd'] = False
  491. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  492. return imr_options
  493. def _headers_from_metadata(rsrc):
  494. """
  495. Create a dict of headers from a metadata graph.
  496. :param lakesuperior.model.ldpr.Ldpr rsrc: Resource to extract metadata
  497. from.
  498. """
  499. out_headers = defaultdict(list)
  500. digest = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
  501. if digest:
  502. etag = digest.identifier.split(':')[-1]
  503. etag_str = (
  504. 'W/"{}"'.format(etag)
  505. if nsc['ldp'].RDFSource in rsrc.ldp_types
  506. else etag)
  507. out_headers['ETag'] = etag_str,
  508. last_updated_term = rsrc.metadata.value(nsc['fcrepo'].lastModified)
  509. if last_updated_term:
  510. out_headers['Last-Modified'] = arrow.get(last_updated_term)\
  511. .format('ddd, D MMM YYYY HH:mm:ss Z')
  512. for t in rsrc.ldp_types:
  513. out_headers['Link'].append(
  514. '{};rel="type"'.format(t.n3()))
  515. mimetype = rsrc.metadata.value(nsc['ebucore'].hasMimeType)
  516. if mimetype:
  517. out_headers['Content-Type'] = mimetype
  518. return out_headers