ldp.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. import logging
  2. from collections import defaultdict
  3. from pprint import pformat
  4. from uuid import uuid4
  5. import arrow
  6. from flask import (
  7. Blueprint, current_app, g, make_response, render_template,
  8. request, send_file)
  9. from rdflib.namespace import XSD
  10. from rdflib.term import Literal
  11. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  12. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  13. from lakesuperior.exceptions import (ResourceNotExistsError, TombstoneError,
  14. ServerManagedTermError, InvalidResourceError, SingleSubjectError,
  15. ResourceExistsError, IncompatibleLdpTypeError)
  16. from lakesuperior.model.ldp_factory import LdpFactory
  17. from lakesuperior.model.ldp_nr import LdpNr
  18. from lakesuperior.model.ldp_rs import LdpRs
  19. from lakesuperior.model.ldpr import Ldpr
  20. from lakesuperior.toolbox import Toolbox
  21. logger = logging.getLogger(__name__)
  22. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  23. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  24. # for backward compatibility.
  25. ldp = Blueprint(
  26. 'ldp', __name__, template_folder='templates',
  27. static_url_path='/static', static_folder='../../static')
  28. accept_patch = (
  29. 'application/sparql-update',
  30. )
  31. accept_rdf = (
  32. 'application/ld+json',
  33. 'application/n-triples',
  34. 'application/rdf+xml',
  35. #'application/x-turtle',
  36. #'application/xhtml+xml',
  37. #'application/xml',
  38. #'text/html',
  39. 'text/n3',
  40. #'text/plain',
  41. 'text/rdf+n3',
  42. 'text/turtle',
  43. )
  44. std_headers = {
  45. 'Accept-Patch' : ','.join(accept_patch),
  46. 'Accept-Post' : ','.join(accept_rdf),
  47. #'Allow' : ','.join(allow),
  48. }
  49. '''Predicates excluded by view.'''
  50. vw_blacklist = {
  51. nsc['fcrepo'].contains,
  52. }
  53. @ldp.url_defaults
  54. def bp_url_defaults(endpoint, values):
  55. url_prefix = getattr(g, 'url_prefix', None)
  56. if url_prefix is not None:
  57. values.setdefault('url_prefix', url_prefix)
  58. @ldp.url_value_preprocessor
  59. def bp_url_value_preprocessor(endpoint, values):
  60. g.url_prefix = values.pop('url_prefix')
  61. g.webroot = request.host_url + g.url_prefix
  62. @ldp.before_request
  63. def log_request_start():
  64. logger.info('\n\n** Start {} {} **'.format(request.method, request.url))
  65. @ldp.before_request
  66. def instantiate_toolbox():
  67. g.tbox = Toolbox()
  68. @ldp.before_request
  69. def request_timestamp():
  70. g.timestamp = arrow.utcnow()
  71. g.timestamp_term = Literal(g.timestamp, datatype=XSD.dateTime)
  72. @ldp.after_request
  73. def log_request_end(rsp):
  74. logger.info('** End {} {} **\n\n'.format(request.method, request.url))
  75. return rsp
  76. ## REST SERVICES ##
  77. @ldp.route('/<path:uid>', methods=['GET'], strict_slashes=False)
  78. @ldp.route('/', defaults={'uid': ''}, methods=['GET'], strict_slashes=False)
  79. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  80. methods=['GET'])
  81. def get_resource(uid, force_rdf=False):
  82. '''
  83. Retrieve RDF or binary content.
  84. @param uid (string) UID of resource to retrieve. The repository root has
  85. an empty string for UID.
  86. @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
  87. a LDP-NR. This is not available in the API but is used e.g. by the
  88. `*/fcr:metadata` endpoint. The default is False.
  89. '''
  90. out_headers = std_headers
  91. repr_options = defaultdict(dict)
  92. if 'prefer' in request.headers:
  93. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  94. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  95. if 'return' in prefer:
  96. repr_options = parse_repr_options(prefer['return'])
  97. try:
  98. rsrc = LdpFactory.from_stored(uid, repr_options)
  99. except ResourceNotExistsError as e:
  100. return str(e), 404
  101. except TombstoneError as e:
  102. return _tombstone_response(e, uid)
  103. else:
  104. out_headers.update(rsrc.head())
  105. if isinstance(rsrc, LdpRs) \
  106. or is_accept_hdr_rdf_parsable() \
  107. or force_rdf:
  108. rsp = rsrc.get()
  109. return negotiate_content(rsp, out_headers)
  110. else:
  111. logger.info('Streaming out binary content.')
  112. rsp = make_response(send_file(rsrc.local_path, as_attachment=True,
  113. attachment_filename=rsrc.filename))
  114. rsp.headers['Link'] = '<{}/fcr:metadata>; rel="describedby"'\
  115. .format(rsrc.uri)
  116. return rsp
  117. @ldp.route('/<path:parent>', methods=['POST'], strict_slashes=False)
  118. @ldp.route('/', defaults={'parent': ''}, methods=['POST'],
  119. strict_slashes=False)
  120. def post_resource(parent):
  121. '''
  122. Add a new resource in a new URI.
  123. '''
  124. out_headers = std_headers
  125. try:
  126. slug = request.headers['Slug']
  127. logger.info('Slug: {}'.format(slug))
  128. except KeyError:
  129. slug = None
  130. handling, disposition = set_post_put_params()
  131. stream, mimetype = bitstream_from_req()
  132. try:
  133. uid = uuid_for_post(parent, slug)
  134. logger.debug('Generated UID for POST: {}'.format(uid))
  135. rsrc = LdpFactory.from_provided(uid, content_length=request.content_length,
  136. stream=stream, mimetype=mimetype, handling=handling,
  137. disposition=disposition)
  138. except ResourceNotExistsError as e:
  139. return str(e), 404
  140. except InvalidResourceError as e:
  141. return str(e), 409
  142. except TombstoneError as e:
  143. return _tombstone_response(e, uid)
  144. try:
  145. rsrc.post()
  146. except ServerManagedTermError as e:
  147. return str(e), 412
  148. hdr = {
  149. 'Location' : rsrc.uri,
  150. }
  151. if isinstance(rsrc, LdpNr):
  152. hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="<{0}>"'\
  153. .format(rsrc.uri)
  154. out_headers.update(hdr)
  155. return rsrc.uri, 201, out_headers
  156. @ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
  157. def get_version_info(uid):
  158. '''
  159. Get version info (`fcr:versions`).
  160. '''
  161. try:
  162. rsp = Ldpr(uid).get_version_info()
  163. except ResourceNotExistsError as e:
  164. return str(e), 404
  165. except InvalidResourceError as e:
  166. return str(e), 409
  167. except TombstoneError as e:
  168. return _tombstone_response(e, uid)
  169. else:
  170. return negotiate_content(rsp)
  171. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
  172. def get_version(uid, ver_uid):
  173. '''
  174. Get an individual resource version.
  175. @param uid (string) Resource UID.
  176. @param ver_uid (string) Version UID.
  177. '''
  178. try:
  179. rsp = Ldpr(uid).get_version(ver_uid)
  180. except ResourceNotExistsError as e:
  181. return str(e), 404
  182. except InvalidResourceError as e:
  183. return str(e), 409
  184. except TombstoneError as e:
  185. return _tombstone_response(e, uid)
  186. else:
  187. return negotiate_content(rsp)
  188. @ldp.route('/<path:uid>/fcr:versions', methods=['POST', 'PUT'])
  189. def post_version(uid):
  190. '''
  191. Create a new resource version.
  192. '''
  193. if request.method == 'PUT':
  194. return 'Method not allowed.', 405
  195. ver_uid = request.headers.get('slug', None)
  196. try:
  197. ver_uri = LdpFactory.from_stored(uid).create_version(ver_uid)
  198. except ResourceNotExistsError as e:
  199. return str(e), 404
  200. except InvalidResourceError as e:
  201. return str(e), 409
  202. except TombstoneError as e:
  203. return _tombstone_response(e, uid)
  204. else:
  205. return '', 201, {'Location': ver_uri}
  206. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['PATCH'])
  207. def patch_version(uid, ver_uid):
  208. '''
  209. Revert to a previous version.
  210. NOTE: This creates a new version snapshot.
  211. @param uid (string) Resource UID.
  212. @param ver_uid (string) Version UID.
  213. '''
  214. try:
  215. LdpFactory.from_stored(uid).revert_to_version(ver_uid)
  216. except ResourceNotExistsError as e:
  217. return str(e), 404
  218. except InvalidResourceError as e:
  219. return str(e), 409
  220. except TombstoneError as e:
  221. return _tombstone_response(e, uid)
  222. else:
  223. return '', 204
  224. @ldp.route('/<path:uid>', methods=['PUT'], strict_slashes=False)
  225. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  226. methods=['PUT'])
  227. def put_resource(uid):
  228. '''
  229. Add a new resource at a specified URI.
  230. '''
  231. # Parse headers.
  232. logger.info('Request headers: {}'.format(request.headers))
  233. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  234. handling, disposition = set_post_put_params()
  235. stream, mimetype = bitstream_from_req()
  236. try:
  237. rsrc = LdpFactory.from_provided(uid, content_length=request.content_length,
  238. stream=stream, mimetype=mimetype, handling=handling,
  239. disposition=disposition)
  240. if not request.content_length and rsrc.is_stored:
  241. raise InvalidResourceError(rsrc.uid,
  242. 'Resource {} already exists and no data set was provided.')
  243. except InvalidResourceError as e:
  244. return str(e), 409
  245. except (ServerManagedTermError, SingleSubjectError) as e:
  246. return str(e), 412
  247. except IncompatibleLdpTypeError as e:
  248. return str(e), 415
  249. try:
  250. ret = rsrc.put()
  251. rsp_headers.update(rsrc.head())
  252. except (InvalidResourceError, ResourceExistsError) as e:
  253. return str(e), 409
  254. except TombstoneError as e:
  255. return _tombstone_response(e, uid)
  256. if ret == Ldpr.RES_CREATED:
  257. rsp_code = 201
  258. rsp_headers['Location'] = rsp_body = rsrc.uri
  259. if isinstance(rsrc, LdpNr):
  260. rsp_headers['Link'] = '<{0}/fcr:metadata>; rel="describedby"'\
  261. .format(rsrc.uri)
  262. else:
  263. rsp_code = 204
  264. rsp_body = ''
  265. return rsp_body, rsp_code, rsp_headers
  266. @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
  267. def patch_resource(uid):
  268. '''
  269. Update an existing resource with a SPARQL-UPDATE payload.
  270. '''
  271. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  272. rsrc = LdpRs(uid)
  273. if request.mimetype != 'application/sparql-update':
  274. return 'Provided content type is not a valid parsable format: {}'\
  275. .format(request.mimetype), 415
  276. try:
  277. rsrc.patch(request.get_data().decode('utf-8'))
  278. except ResourceNotExistsError as e:
  279. return str(e), 404
  280. except TombstoneError as e:
  281. return _tombstone_response(e, uid)
  282. except (ServerManagedTermError, SingleSubjectError) as e:
  283. return str(e), 412
  284. else:
  285. rsp_headers.update(rsrc.head())
  286. return '', 204, rsp_headers
  287. @ldp.route('/<path:uid>/fcr:metadata', methods=['PATCH'])
  288. def patch_resource_metadata(uid):
  289. return patch_resource(uid)
  290. @ldp.route('/<path:uid>', methods=['DELETE'])
  291. def delete_resource(uid):
  292. '''
  293. Delete a resource and optionally leave a tombstone.
  294. This behaves differently from FCREPO. A tombstone indicated that the
  295. resource is no longer available at its current location, but its historic
  296. snapshots still are. Also, deleting a resource with a tombstone creates
  297. one more version snapshot of the resource prior to being deleted.
  298. In order to completely wipe out all traces of a resource, the tombstone
  299. must be deleted as well, or the `Prefer:no-tombstone` header can be used.
  300. The latter will purge the resource immediately.
  301. '''
  302. headers = std_headers
  303. # If referential integrity is enforced, grab all inbound relationships
  304. # to break them.
  305. repr_opts = {'incl_inbound' : True} \
  306. if current_app.config['store']['ldp_rs']['referential_integrity'] \
  307. else {}
  308. if 'prefer' in request.headers:
  309. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  310. leave_tstone = 'no-tombstone' not in prefer
  311. else:
  312. leave_tstone = True
  313. try:
  314. LdpFactory.from_stored(uid, repr_opts).delete(
  315. leave_tstone=leave_tstone)
  316. except ResourceNotExistsError as e:
  317. return str(e), 404
  318. except TombstoneError as e:
  319. return _tombstone_response(e, uid)
  320. return '', 204, headers
  321. @ldp.route('/<path:uid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  322. 'PATCH', 'DELETE'])
  323. def tombstone(uid):
  324. '''
  325. Handle all tombstone operations.
  326. The only allowed methods are POST and DELETE; any other verb will return a
  327. 405.
  328. '''
  329. logger.debug('Deleting tombstone for {}.'.format(uid))
  330. rsrc = Ldpr(uid)
  331. try:
  332. rsrc.metadata
  333. except TombstoneError as e:
  334. if request.method == 'DELETE':
  335. if e.uid == uid:
  336. rsrc.purge()
  337. return '', 204
  338. else:
  339. return _tombstone_response(e, uid)
  340. elif request.method == 'POST':
  341. if e.uid == uid:
  342. rsrc_uri = rsrc.resurrect()
  343. headers = {'Location' : rsrc_uri}
  344. return rsrc_uri, 201, headers
  345. else:
  346. return _tombstone_response(e, uid)
  347. else:
  348. return 'Method Not Allowed.', 405
  349. except ResourceNotExistsError as e:
  350. return str(e), 404
  351. else:
  352. return '', 404
  353. def negotiate_content(rsp, headers=None):
  354. '''
  355. Return HTML or serialized RDF depending on accept headers.
  356. '''
  357. if request.accept_mimetypes.best == 'text/html':
  358. rsrc = rsp.resource(request.path)
  359. return render_template(
  360. 'resource.html', rsrc=rsrc, nsm=nsm,
  361. blacklist = vw_blacklist)
  362. else:
  363. for p in vw_blacklist:
  364. rsp.remove((None, p, None))
  365. return (rsp.serialize(format='turtle'), headers)
  366. def uuid_for_post(parent_uid, slug=None):
  367. '''
  368. Validate conditions to perform a POST and return an LDP resource
  369. UID for using with the `post` method.
  370. This may raise an exception resulting in a 404 if the parent is not
  371. found or a 409 if the parent is not a valid container.
  372. '''
  373. def split_if_legacy(uid):
  374. if current_app.config['store']['ldp_rs']['legacy_ptree_split']:
  375. uid = g.tbox.split_uuid(uid)
  376. return uid
  377. # Shortcut!
  378. if not slug and parent_uid == '':
  379. uid = split_if_legacy(str(uuid4()))
  380. return uid
  381. parent = LdpFactory.from_stored(parent_uid,
  382. repr_opts={'incl_children' : False})
  383. if nsc['fcrepo'].Pairtree in parent.types:
  384. raise InvalidResourceError(parent.uid,
  385. 'Resource {} cannot be created under a pairtree.')
  386. # Set prefix.
  387. if parent_uid:
  388. logger.debug('Parent types: {}'.format(pformat(parent.types)))
  389. if nsc['ldp'].Container not in parent.types:
  390. raise InvalidResourceError(parent_uid,
  391. 'Parent {} is not a container.')
  392. pfx = parent_uid + '/'
  393. else:
  394. pfx = ''
  395. # Create candidate UID and validate.
  396. if slug:
  397. cnd_uid = pfx + slug
  398. if current_app.rdfly.ask_rsrc_exists(cnd_uid):
  399. uid = pfx + split_if_legacy(str(uuid4()))
  400. else:
  401. uid = cnd_uid
  402. else:
  403. uid = pfx + split_if_legacy(str(uuid4()))
  404. return uid
  405. def bitstream_from_req():
  406. '''
  407. Find how a binary file and its MIMEtype were uploaded in the request.
  408. '''
  409. logger.debug('Content type: {}'.format(request.mimetype))
  410. logger.debug('files: {}'.format(request.files))
  411. logger.debug('stream: {}'.format(request.stream))
  412. if request.mimetype == 'multipart/form-data':
  413. # This seems the "right" way to upload a binary file, with a
  414. # multipart/form-data MIME type and the file in the `file`
  415. # field. This however is not supported by FCREPO4.
  416. stream = request.files.get('file').stream
  417. mimetype = request.files.get('file').content_type
  418. # @TODO This will turn out useful to provide metadata
  419. # with the binary.
  420. #metadata = request.files.get('metadata').stream
  421. #provided_imr = [parse RDF here...]
  422. else:
  423. # This is a less clean way, with the file in the form body and
  424. # the request as application/x-www-form-urlencoded.
  425. # This is how FCREPO4 accepts binary uploads.
  426. stream = request.stream
  427. mimetype = request.mimetype
  428. return stream, mimetype
  429. def _get_bitstream(rsrc):
  430. # @TODO This may change in favor of more low-level handling if the file
  431. # system is not local.
  432. return send_file(rsrc.local_path, as_attachment=True,
  433. attachment_filename=rsrc.filename)
  434. def _tombstone_response(e, uid):
  435. headers = {
  436. 'Link' : '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  437. } if e.uid == uid else {}
  438. return str(e), 410, headers
  439. def set_post_put_params():
  440. '''
  441. Sets handling and content disposition for POST and PUT by parsing headers.
  442. '''
  443. handling = None
  444. if 'prefer' in request.headers:
  445. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  446. logger.debug('Parsed Prefer header: {}'.format(prefer))
  447. if 'handling' in prefer:
  448. handling = prefer['handling']['value']
  449. try:
  450. disposition = g.tbox.parse_rfc7240(
  451. request.headers['content-disposition'])
  452. except KeyError:
  453. disposition = None
  454. return handling, disposition
  455. def is_accept_hdr_rdf_parsable():
  456. '''
  457. Check if any of the 'Accept' header values provided is a RDF parsable
  458. format.
  459. '''
  460. for mimetype in request.accept_mimetypes.values():
  461. if LdpFactory.is_rdf_parsable(mimetype):
  462. return True
  463. return False
  464. def parse_repr_options(retr_opts):
  465. '''
  466. Set options to retrieve IMR.
  467. Ideally, IMR retrieval is done once per request, so all the options
  468. are set once in the `imr()` property.
  469. @param retr_opts (dict): Options parsed from `Prefer` header.
  470. '''
  471. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  472. imr_options = {}
  473. if retr_opts.get('value') == 'minimal':
  474. imr_options = {
  475. 'embed_children' : False,
  476. 'incl_children' : False,
  477. 'incl_inbound' : False,
  478. 'incl_srv_mgd' : False,
  479. }
  480. else:
  481. # Default.
  482. imr_options = {
  483. 'embed_children' : False,
  484. 'incl_children' : True,
  485. 'incl_inbound' : False,
  486. 'incl_srv_mgd' : True,
  487. }
  488. # Override defaults.
  489. if 'parameters' in retr_opts:
  490. include = retr_opts['parameters']['include'].split(' ') \
  491. if 'include' in retr_opts['parameters'] else []
  492. omit = retr_opts['parameters']['omit'].split(' ') \
  493. if 'omit' in retr_opts['parameters'] else []
  494. logger.debug('Include: {}'.format(include))
  495. logger.debug('Omit: {}'.format(omit))
  496. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  497. imr_options['embed_children'] = True
  498. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  499. imr_options['incl_children'] = False
  500. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  501. imr_options['incl_inbound'] = True
  502. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  503. imr_options['incl_srv_mgd'] = False
  504. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  505. return imr_options