ldp.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. import logging
  2. from collections import defaultdict
  3. from pprint import pformat
  4. from uuid import uuid4
  5. import arrow
  6. from flask import (
  7. Blueprint, current_app, g, make_response, render_template,
  8. request, send_file)
  9. from rdflib.namespace import RDF, XSD
  10. from rdflib.term import Literal
  11. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  12. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  13. from lakesuperior.exceptions import *
  14. from lakesuperior.model.ldp_factory import LdpFactory
  15. from lakesuperior.model.ldp_nr import LdpNr
  16. from lakesuperior.model.ldp_rs import LdpRs
  17. from lakesuperior.model.ldpr import Ldpr
  18. from lakesuperior.toolbox import Toolbox
  19. logger = logging.getLogger(__name__)
  20. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  21. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  22. # for backward compatibility.
  23. ldp = Blueprint(
  24. 'ldp', __name__, template_folder='templates',
  25. static_url_path='/static', static_folder='../../static')
  26. accept_patch = (
  27. 'application/sparql-update',
  28. )
  29. accept_rdf = (
  30. 'application/ld+json',
  31. 'application/n-triples',
  32. 'application/rdf+xml',
  33. #'application/x-turtle',
  34. #'application/xhtml+xml',
  35. #'application/xml',
  36. #'text/html',
  37. 'text/n3',
  38. #'text/plain',
  39. 'text/rdf+n3',
  40. 'text/turtle',
  41. )
  42. std_headers = {
  43. 'Accept-Patch' : ','.join(accept_patch),
  44. 'Accept-Post' : ','.join(accept_rdf),
  45. #'Allow' : ','.join(allow),
  46. }
  47. @ldp.url_defaults
  48. def bp_url_defaults(endpoint, values):
  49. url_prefix = getattr(g, 'url_prefix', None)
  50. if url_prefix is not None:
  51. values.setdefault('url_prefix', url_prefix)
  52. @ldp.url_value_preprocessor
  53. def bp_url_value_preprocessor(endpoint, values):
  54. g.url_prefix = values.pop('url_prefix')
  55. g.webroot = request.host_url + g.url_prefix
  56. @ldp.before_request
  57. def log_request_start():
  58. logger.info('\n\n** Start {} {} **'.format(request.method, request.url))
  59. @ldp.before_request
  60. def instantiate_toolbox():
  61. g.tbox = Toolbox()
  62. @ldp.before_request
  63. def request_timestamp():
  64. g.timestamp = arrow.utcnow()
  65. g.timestamp_term = Literal(g.timestamp, datatype=XSD.dateTime)
  66. @ldp.after_request
  67. def log_request_end(rsp):
  68. logger.info('** End {} {} **\n\n'.format(request.method, request.url))
  69. return rsp
  70. ## REST SERVICES ##
  71. @ldp.route('/<path:uuid>', methods=['GET'], strict_slashes=False)
  72. @ldp.route('/', defaults={'uuid': None}, methods=['GET'], strict_slashes=False)
  73. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  74. methods=['GET'])
  75. def get_resource(uuid, force_rdf=False):
  76. '''
  77. Retrieve RDF or binary content.
  78. @param uuid (string) UUID of resource to retrieve.
  79. @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
  80. a LDP-NR. This is not available in the API but is used e.g. by the
  81. `*/fcr:metadata` endpoint. The default is False.
  82. '''
  83. out_headers = std_headers
  84. repr_options = defaultdict(dict)
  85. if 'prefer' in request.headers:
  86. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  87. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  88. if 'return' in prefer:
  89. repr_options = parse_repr_options(prefer['return'])
  90. try:
  91. rsrc = LdpFactory.from_stored(uuid, repr_options)
  92. except ResourceNotExistsError as e:
  93. return str(e), 404
  94. except TombstoneError as e:
  95. return _tombstone_response(e, uuid)
  96. else:
  97. out_headers.update(rsrc.head())
  98. if isinstance(rsrc, LdpRs) \
  99. or is_accept_hdr_rdf_parsable() \
  100. or force_rdf:
  101. resp = rsrc.get()
  102. if request.accept_mimetypes.best == 'text/html':
  103. rsrc = resp.resource(request.path)
  104. return render_template('resource.html', rsrc=rsrc, nsm=nsm)
  105. else:
  106. return (resp.serialize(format='turtle'), out_headers)
  107. else:
  108. logger.info('Streaming out binary content.')
  109. rsp = make_response(send_file(rsrc.local_path, as_attachment=True,
  110. attachment_filename=rsrc.filename))
  111. rsp.headers['Link'] = '<{}/fcr:metadata>; rel="describedby"'\
  112. .format(rsrc.uri)
  113. return rsp
  114. @ldp.route('/<path:parent>', methods=['POST'], strict_slashes=False)
  115. @ldp.route('/', defaults={'parent': None}, methods=['POST'],
  116. strict_slashes=False)
  117. def post_resource(parent):
  118. '''
  119. Add a new resource in a new URI.
  120. '''
  121. out_headers = std_headers
  122. try:
  123. slug = request.headers['Slug']
  124. logger.info('Slug: {}'.format(slug))
  125. except KeyError:
  126. slug = None
  127. handling, disposition = set_post_put_params()
  128. stream, mimetype = bitstream_from_req()
  129. try:
  130. uuid = uuid_for_post(parent, slug)
  131. logger.debug('Generated UUID for POST: {}'.format(uuid))
  132. rsrc = LdpFactory.from_provided(uuid, content_length=request.content_length,
  133. stream=stream, mimetype=mimetype, handling=handling,
  134. disposition=disposition)
  135. except ResourceNotExistsError as e:
  136. return str(e), 404
  137. except InvalidResourceError as e:
  138. return str(e), 409
  139. except TombstoneError as e:
  140. return _tombstone_response(e, uuid)
  141. try:
  142. rsrc.post()
  143. except ServerManagedTermError as e:
  144. return str(e), 412
  145. hdr = {
  146. 'Location' : rsrc.uri,
  147. }
  148. if isinstance(rsrc, LdpNr):
  149. hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="<{0}>"'\
  150. .format(rsrc.uri)
  151. out_headers.update(hdr)
  152. return rsrc.uri, 201, out_headers
  153. @ldp.route('/<path:uuid>/fcr:versions', methods=['GET'])
  154. def get_version_info(uuid):
  155. '''
  156. Get version info (`fcr:versions`).
  157. '''
  158. try:
  159. rsp = Ldpr(uuid).get_version_info()
  160. except ResourceNotExistsError as e:
  161. return str(e), 404
  162. except InvalidResourceError as e:
  163. return str(e), 409
  164. except TombstoneError as e:
  165. return _tombstone_response(e, uuid)
  166. else:
  167. return rsp.serialize(format='turtle'), 200
  168. @ldp.route('/<path:uuid>/fcr:versions/<ver_uid>', methods=['GET'])
  169. def get_version(uuid, ver_uid):
  170. '''
  171. Get an individual resource version.
  172. @param uuid (string) Resource UUID.
  173. @param ver_uid (string) Version UID.
  174. '''
  175. try:
  176. rsp = Ldpr(uuid).get_version(ver_uid)
  177. except ResourceNotExistsError as e:
  178. return str(e), 404
  179. except InvalidResourceError as e:
  180. return str(e), 409
  181. except TombstoneError as e:
  182. return _tombstone_response(e, uuid)
  183. else:
  184. return rsp.serialize(format='turtle'), 200
  185. @ldp.route('/<path:uuid>/fcr:versions', methods=['POST'])
  186. def post_version(uuid):
  187. '''
  188. Create a new resource version.
  189. '''
  190. ver_uid = request.headers.get('slug', None)
  191. try:
  192. ver_uri = LdpFactory.from_stored(uuid).create_version(ver_uid)
  193. except ResourceNotExistsError as e:
  194. return str(e), 404
  195. except InvalidResourceError as e:
  196. return str(e), 409
  197. except TombstoneError as e:
  198. return _tombstone_response(e, uuid)
  199. else:
  200. return '', 201, {'Location': ver_uri}
  201. @ldp.route('/<path:uuid>/fcr:versions/<ver_uid>', methods=['PATCH'])
  202. def patch_version(uuid, ver_uid):
  203. '''
  204. Revert to a previous version.
  205. NOTE: This creates a new version snapshot.
  206. @param uuid (string) Resource UUID.
  207. @param ver_uid (string) Version UID.
  208. '''
  209. try:
  210. LdpFactory.from_stored(uuid).revert_to_version(ver_uid)
  211. except ResourceNotExistsError as e:
  212. return str(e), 404
  213. except InvalidResourceError as e:
  214. return str(e), 409
  215. except TombstoneError as e:
  216. return _tombstone_response(e, uuid)
  217. else:
  218. return '', 204
  219. @ldp.route('/<path:uuid>', methods=['PUT'], strict_slashes=False)
  220. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  221. methods=['PUT'])
  222. def put_resource(uuid):
  223. '''
  224. Add a new resource at a specified URI.
  225. '''
  226. # Parse headers.
  227. logger.info('Request headers: {}'.format(request.headers))
  228. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  229. handling, disposition = set_post_put_params()
  230. stream, mimetype = bitstream_from_req()
  231. try:
  232. rsrc = LdpFactory.from_provided(uuid, content_length=request.content_length,
  233. stream=stream, mimetype=mimetype, handling=handling,
  234. disposition=disposition)
  235. if not request.content_length and rsrc.is_stored:
  236. raise InvalidResourceError(
  237. rsrc.uuid, 'Resource already exists and no data was provided.')
  238. except InvalidResourceError as e:
  239. return str(e), 409
  240. except (ServerManagedTermError, SingleSubjectError) as e:
  241. return str(e), 412
  242. except IncompatibleLdpTypeError as e:
  243. return str(e), 415
  244. try:
  245. ret = rsrc.put()
  246. except (InvalidResourceError, ResourceExistsError) as e:
  247. return str(e), 409
  248. except TombstoneError as e:
  249. return _tombstone_response(e, uuid)
  250. rsp_headers.update(rsrc.head())
  251. if ret == Ldpr.RES_CREATED:
  252. rsp_code = 201
  253. rsp_headers['Location'] = rsp_body = rsrc.uri
  254. if isinstance(rsrc, LdpNr):
  255. rsp_headers['Link'] = '<{0}/fcr:metadata>; rel="describedby"'\
  256. .format(rsrc.uri)
  257. else:
  258. rsp_code = 204
  259. rsp_body = ''
  260. return rsp_body, rsp_code, rsp_headers
  261. @ldp.route('/<path:uuid>', methods=['PATCH'], strict_slashes=False)
  262. def patch_resource(uuid):
  263. '''
  264. Update an existing resource with a SPARQL-UPDATE payload.
  265. '''
  266. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  267. rsrc = LdpRs(uuid)
  268. if request.mimetype != 'application/sparql-update':
  269. return 'Provided content type is not a valid parsable format: {}'\
  270. .format(request.mimetype), 415
  271. try:
  272. rsrc.patch(request.get_data().decode('utf-8'))
  273. except ResourceNotExistsError as e:
  274. return str(e), 404
  275. except TombstoneError as e:
  276. return _tombstone_response(e, uuid)
  277. except (ServerManagedTermError, SingleSubjectError) as e:
  278. return str(e), 412
  279. else:
  280. rsp_headers.update(rsrc.head())
  281. return '', 204, rsp_headers
  282. @ldp.route('/<path:uuid>/fcr:metadata', methods=['PATCH'])
  283. def patch_resource_metadata(uuid):
  284. return patch_resource(uuid)
  285. @ldp.route('/<path:uuid>', methods=['DELETE'])
  286. def delete_resource(uuid):
  287. '''
  288. Delete a resource and optionally leave a tombstone.
  289. This behaves differently from FCREPO. A tombstone indicated that the
  290. resource is no longer available at its current location, but its historic
  291. snapshots still are. Also, deleting a resource with a tombstone creates
  292. one more version snapshot of the resource prior to being deleted.
  293. In order to completely wipe out all traces of a resource, the tombstone
  294. must be deleted as well, or the `Prefer:no-tombstone` header can be used.
  295. The latter will purge the resource immediately.
  296. '''
  297. headers = std_headers
  298. # If referential integrity is enforced, grab all inbound relationships
  299. # to break them.
  300. repr_opts = {'incl_inbound' : True} \
  301. if current_app.config['store']['ldp_rs']['referential_integrity'] \
  302. else {}
  303. if 'prefer' in request.headers:
  304. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  305. leave_tstone = 'no-tombstone' not in prefer
  306. else:
  307. leave_tstone = True
  308. try:
  309. LdpFactory.from_stored(uuid, repr_opts).delete(leave_tstone=leave_tstone)
  310. except ResourceNotExistsError as e:
  311. return str(e), 404
  312. except TombstoneError as e:
  313. return _tombstone_response(e, uuid)
  314. return '', 204, headers
  315. @ldp.route('/<path:uuid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  316. 'PATCH', 'DELETE'])
  317. def tombstone(uuid):
  318. '''
  319. Handle all tombstone operations.
  320. The only allowed methods are POST and DELETE; any other verb will return a
  321. 405.
  322. '''
  323. logger.debug('Deleting tombstone for {}.'.format(uuid))
  324. rsrc = Ldpr(uuid)
  325. try:
  326. imr = rsrc.imr
  327. except TombstoneError as e:
  328. if request.method == 'DELETE':
  329. if e.uuid == uuid:
  330. rsrc.purge()
  331. return '', 204
  332. else:
  333. return _tombstone_response(e, uuid)
  334. elif request.method == 'POST':
  335. if e.uuid == uuid:
  336. rsrc_uri = rsrc.resurrect()
  337. headers = {'Location' : rsrc_uri}
  338. return rsrc_uri, 201, headers
  339. else:
  340. return _tombstone_response(e, uuid)
  341. else:
  342. return 'Method Not Allowed.', 405
  343. except ResourceNotExistsError as e:
  344. return str(e), 404
  345. else:
  346. return '', 404
  347. def uuid_for_post(parent_uuid=None, slug=None):
  348. '''
  349. Validate conditions to perform a POST and return an LDP resource
  350. UUID for using with the `post` method.
  351. This may raise an exception resulting in a 404 if the parent is not
  352. found or a 409 if the parent is not a valid container.
  353. '''
  354. def split_if_legacy(uuid):
  355. if current_app.config['store']['ldp_rs']['legacy_ptree_split']:
  356. uuid = g.tbox.split_uuid(uuid)
  357. return uuid
  358. # Shortcut!
  359. if not slug and not parent_uuid:
  360. uuid = split_if_legacy(str(uuid4()))
  361. return uuid
  362. parent = LdpFactory.from_stored(parent_uuid, repr_opts={'incl_children' : False})
  363. if nsc['fcrepo'].Pairtree in parent.types:
  364. raise InvalidResourceError(parent.uuid,
  365. 'Resources cannot be created under a pairtree.')
  366. # Set prefix.
  367. if parent_uuid:
  368. parent_types = { t.identifier for t in \
  369. parent.imr.objects(RDF.type) }
  370. logger.debug('Parent types: {}'.format(pformat(parent_types)))
  371. if nsc['ldp'].Container not in parent_types:
  372. raise InvalidResourceError('Parent {} is not a container.'
  373. .format(parent_uuid))
  374. pfx = parent_uuid + '/'
  375. else:
  376. pfx = ''
  377. # Create candidate UUID and validate.
  378. if slug:
  379. cnd_uuid = pfx + slug
  380. if current_app.rdfly.ask_rsrc_exists(nsc['fcres'][cnd_uuid]):
  381. uuid = pfx + split_if_legacy(str(uuid4()))
  382. else:
  383. uuid = cnd_uuid
  384. else:
  385. uuid = pfx + split_if_legacy(str(uuid4()))
  386. return uuid
  387. def bitstream_from_req():
  388. '''
  389. Find how a binary file and its MIMEtype were uploaded in the request.
  390. '''
  391. logger.debug('Content type: {}'.format(request.mimetype))
  392. logger.debug('files: {}'.format(request.files))
  393. logger.debug('stream: {}'.format(request.stream))
  394. if request.mimetype == 'multipart/form-data':
  395. # This seems the "right" way to upload a binary file, with a
  396. # multipart/form-data MIME type and the file in the `file`
  397. # field. This however is not supported by FCREPO4.
  398. stream = request.files.get('file').stream
  399. mimetype = request.files.get('file').content_type
  400. # @TODO This will turn out useful to provide metadata
  401. # with the binary.
  402. #metadata = request.files.get('metadata').stream
  403. #provided_imr = [parse RDF here...]
  404. else:
  405. # This is a less clean way, with the file in the form body and
  406. # the request as application/x-www-form-urlencoded.
  407. # This is how FCREPO4 accepts binary uploads.
  408. stream = request.stream
  409. mimetype = request.mimetype
  410. return stream, mimetype
  411. def _get_bitstream(rsrc):
  412. out_headers = std_headers
  413. # @TODO This may change in favor of more low-level handling if the file
  414. # system is not local.
  415. return send_file(rsrc.local_path, as_attachment=True,
  416. attachment_filename=rsrc.filename)
  417. def _tombstone_response(e, uuid):
  418. headers = {
  419. 'Link' : '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  420. } if e.uuid == uuid else {}
  421. return str(e), 410, headers
  422. def set_post_put_params():
  423. '''
  424. Sets handling and content disposition for POST and PUT by parsing headers.
  425. '''
  426. handling = None
  427. if 'prefer' in request.headers:
  428. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  429. logger.debug('Parsed Prefer header: {}'.format(prefer))
  430. if 'handling' in prefer:
  431. handling = prefer['handling']['value']
  432. try:
  433. disposition = g.tbox.parse_rfc7240(
  434. request.headers['content-disposition'])
  435. except KeyError:
  436. disposition = None
  437. return handling, disposition
  438. def is_accept_hdr_rdf_parsable():
  439. '''
  440. Check if any of the 'Accept' header values provided is a RDF parsable
  441. format.
  442. '''
  443. for mimetype in request.accept_mimetypes.values():
  444. if LdpFactory.is_rdf_parsable(mimetype):
  445. return True
  446. return False
  447. def parse_repr_options(retr_opts):
  448. '''
  449. Set options to retrieve IMR.
  450. Ideally, IMR retrieval is done once per request, so all the options
  451. are set once in the `imr()` property.
  452. @param retr_opts (dict): Options parsed from `Prefer` header.
  453. '''
  454. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  455. imr_options = {}
  456. if retr_opts.get('value') == 'minimal':
  457. imr_options = {
  458. 'embed_children' : False,
  459. 'incl_children' : False,
  460. 'incl_inbound' : False,
  461. 'incl_srv_mgd' : False,
  462. }
  463. else:
  464. # Default.
  465. imr_options = {
  466. 'embed_children' : False,
  467. 'incl_children' : True,
  468. 'incl_inbound' : False,
  469. 'incl_srv_mgd' : True,
  470. }
  471. # Override defaults.
  472. if 'parameters' in retr_opts:
  473. include = retr_opts['parameters']['include'].split(' ') \
  474. if 'include' in retr_opts['parameters'] else []
  475. omit = retr_opts['parameters']['omit'].split(' ') \
  476. if 'omit' in retr_opts['parameters'] else []
  477. logger.debug('Include: {}'.format(include))
  478. logger.debug('Omit: {}'.format(omit))
  479. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  480. imr_options['embed_children'] = True
  481. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  482. imr_options['incl_children'] = False
  483. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  484. imr_options['incl_inbound'] = True
  485. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  486. imr_options['incl_srv_mgd'] = False
  487. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  488. return imr_options