ldp.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. import logging
  2. from collections import defaultdict
  3. from pprint import pformat
  4. from uuid import uuid4
  5. import arrow
  6. from flask import (
  7. Blueprint, current_app, g, make_response, render_template,
  8. request, send_file)
  9. from rdflib.namespace import RDF, XSD
  10. from rdflib.term import Literal
  11. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  12. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  13. from lakesuperior.exceptions import *
  14. from lakesuperior.model.ldpr import Ldpr
  15. from lakesuperior.model.ldp_nr import LdpNr
  16. from lakesuperior.model.ldp_rs import LdpRs
  17. from lakesuperior.toolbox import Toolbox
  18. logger = logging.getLogger(__name__)
  19. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  20. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  21. # for backward compatibility.
  22. ldp = Blueprint(
  23. 'ldp', __name__, template_folder='templates',
  24. static_url_path='/static', static_folder='../../static')
  25. accept_patch = (
  26. 'application/sparql-update',
  27. )
  28. accept_rdf = (
  29. 'application/ld+json',
  30. 'application/n-triples',
  31. 'application/rdf+xml',
  32. #'application/x-turtle',
  33. #'application/xhtml+xml',
  34. #'application/xml',
  35. #'text/html',
  36. 'text/n3',
  37. #'text/plain',
  38. 'text/rdf+n3',
  39. 'text/turtle',
  40. )
  41. std_headers = {
  42. 'Accept-Patch' : ','.join(accept_patch),
  43. 'Accept-Post' : ','.join(accept_rdf),
  44. #'Allow' : ','.join(allow),
  45. }
  46. @ldp.url_defaults
  47. def bp_url_defaults(endpoint, values):
  48. url_prefix = getattr(g, 'url_prefix', None)
  49. if url_prefix is not None:
  50. values.setdefault('url_prefix', url_prefix)
  51. @ldp.url_value_preprocessor
  52. def bp_url_value_preprocessor(endpoint, values):
  53. g.url_prefix = values.pop('url_prefix')
  54. g.webroot = request.host_url + g.url_prefix
  55. @ldp.before_request
  56. def log_request_start():
  57. logger.info('\n\n** Start {} {} **'.format(request.method, request.url))
  58. @ldp.before_request
  59. def instantiate_toolbox():
  60. g.tbox = Toolbox()
  61. @ldp.before_request
  62. def request_timestamp():
  63. g.timestamp = arrow.utcnow()
  64. g.timestamp_term = Literal(g.timestamp, datatype=XSD.dateTime)
  65. @ldp.after_request
  66. def log_request_end(rsp):
  67. logger.info('** End {} {} **\n\n'.format(request.method, request.url))
  68. return rsp
  69. ## REST SERVICES ##
  70. @ldp.route('/<path:uuid>', methods=['GET'], strict_slashes=False)
  71. @ldp.route('/', defaults={'uuid': None}, methods=['GET'], strict_slashes=False)
  72. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  73. methods=['GET'])
  74. def get_resource(uuid, force_rdf=False):
  75. '''
  76. Retrieve RDF or binary content.
  77. @param uuid (string) UUID of resource to retrieve.
  78. @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
  79. a LDP-NR. This is not available in the API but is used e.g. by the
  80. `*/fcr:metadata` endpoint. The default is False.
  81. '''
  82. out_headers = std_headers
  83. repr_options = defaultdict(dict)
  84. if 'prefer' in request.headers:
  85. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  86. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  87. if 'return' in prefer:
  88. repr_options = parse_repr_options(prefer['return'])
  89. try:
  90. rsrc = Ldpr.outbound_inst(uuid, repr_options)
  91. except ResourceNotExistsError as e:
  92. return str(e), 404
  93. except TombstoneError as e:
  94. return _tombstone_response(e, uuid)
  95. else:
  96. out_headers.update(rsrc.head())
  97. if isinstance(rsrc, LdpRs) \
  98. or is_accept_hdr_rdf_parsable() \
  99. or force_rdf:
  100. resp = rsrc.get()
  101. if request.accept_mimetypes.best == 'text/html':
  102. rsrc = resp.resource(request.path)
  103. return render_template('resource.html', rsrc=rsrc, nsm=nsm)
  104. else:
  105. return (resp.serialize(format='turtle'), out_headers)
  106. else:
  107. logger.info('Streaming out binary content.')
  108. rsp = make_response(send_file(rsrc.local_path, as_attachment=True,
  109. attachment_filename=rsrc.filename))
  110. rsp.headers['Link'] = '<{}/fcr:metadata>; rel="describedby"'\
  111. .format(rsrc.uri)
  112. return rsp
  113. @ldp.route('/<path:parent>', methods=['POST'], strict_slashes=False)
  114. @ldp.route('/', defaults={'parent': None}, methods=['POST'],
  115. strict_slashes=False)
  116. def post_resource(parent):
  117. '''
  118. Add a new resource in a new URI.
  119. '''
  120. out_headers = std_headers
  121. try:
  122. slug = request.headers['Slug']
  123. logger.info('Slug: {}'.format(slug))
  124. except KeyError:
  125. slug = None
  126. handling, disposition = set_post_put_params()
  127. stream, mimetype = bitstream_from_req()
  128. try:
  129. uuid = uuid_for_post(parent, slug)
  130. logger.debug('Generated UUID for POST: {}'.format(uuid))
  131. rsrc = Ldpr.inbound_inst(uuid, content_length=request.content_length,
  132. stream=stream, mimetype=mimetype, handling=handling,
  133. disposition=disposition)
  134. except ResourceNotExistsError as e:
  135. return str(e), 404
  136. except InvalidResourceError as e:
  137. return str(e), 409
  138. except TombstoneError as e:
  139. return _tombstone_response(e, uuid)
  140. try:
  141. rsrc.post()
  142. except ServerManagedTermError as e:
  143. return str(e), 412
  144. hdr = {
  145. 'Location' : rsrc.uri,
  146. }
  147. if isinstance(rsrc, LdpNr):
  148. hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="<{0}>"'\
  149. .format(rsrc.uri)
  150. out_headers.update(hdr)
  151. return rsrc.uri, 201, out_headers
  152. @ldp.route('/<path:uuid>/fcr:versions', methods=['GET'])
  153. def get_version_info(uuid):
  154. '''
  155. Get version info (`fcr:versions`).
  156. '''
  157. try:
  158. rsp = Ldpr(uuid).version_info
  159. except ResourceNotExistsError as e:
  160. return str(e), 404
  161. except InvalidResourceError as e:
  162. return str(e), 409
  163. except TombstoneError as e:
  164. return _tombstone_response(e, uuid)
  165. else:
  166. return rsp.serialize(format='turtle'), 200
  167. @ldp.route('/<path:uuid>/fcr:versions/<ver_uid>', methods=['GET'])
  168. def get_version(uuid, ver_uid):
  169. '''
  170. Get an individual resource version.
  171. @param uuid (string) Resource UUID.
  172. @param ver_uid (string) Version UID.
  173. '''
  174. try:
  175. rsp = Ldpr(uuid).get_version(ver_uid)
  176. except ResourceNotExistsError as e:
  177. return str(e), 404
  178. except InvalidResourceError as e:
  179. return str(e), 409
  180. except TombstoneError as e:
  181. return _tombstone_response(e, uuid)
  182. else:
  183. return rsp.serialize(format='turtle'), 200
  184. @ldp.route('/<path:uuid>/fcr:versions', methods=['POST'])
  185. def post_version(uuid):
  186. '''
  187. Create a new resource version.
  188. '''
  189. ver_uid = request.headers.get('slug', None)
  190. if not ver_uid:
  191. ver_uid = str(uuid4())
  192. try:
  193. ver_uri = Ldpr.outbound_inst(uuid).create_version(ver_uid)
  194. except ResourceNotExistsError as e:
  195. return str(e), 404
  196. except InvalidResourceError as e:
  197. return str(e), 409
  198. except TombstoneError as e:
  199. return _tombstone_response(e, uuid)
  200. else:
  201. return '', 201, {'Location': ver_uri}
  202. @ldp.route('/<path:uuid>', methods=['PUT'], strict_slashes=False)
  203. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  204. methods=['PUT'])
  205. def put_resource(uuid):
  206. '''
  207. Add a new resource at a specified URI.
  208. '''
  209. # Parse headers.
  210. logger.info('Request headers: {}'.format(request.headers))
  211. rsp_headers = std_headers
  212. handling, disposition = set_post_put_params()
  213. stream, mimetype = bitstream_from_req()
  214. try:
  215. rsrc = Ldpr.inbound_inst(uuid, content_length=request.content_length,
  216. stream=stream, mimetype=mimetype, handling=handling,
  217. disposition=disposition)
  218. except InvalidResourceError as e:
  219. return str(e), 409
  220. except ServerManagedTermError as e:
  221. return str(e), 412
  222. except IncompatibleLdpTypeError as e:
  223. return str(e), 415
  224. try:
  225. ret = rsrc.put()
  226. except (InvalidResourceError, ResourceExistsError ) as e:
  227. return str(e), 409
  228. except TombstoneError as e:
  229. return _tombstone_response(e, uuid)
  230. if ret == Ldpr.RES_CREATED:
  231. rsp_code = 201
  232. rsp_headers['Location'] = rsp_body = rsrc.uri
  233. if isinstance(rsrc, LdpNr):
  234. rsp_headers['Link'] = '<{0}/fcr:metadata>; rel="describedby"'\
  235. .format(rsrc.uri)
  236. else:
  237. rsp_code = 204
  238. rsp_body = ''
  239. return rsp_body, rsp_code, rsp_headers
  240. @ldp.route('/<path:uuid>', methods=['PATCH'], strict_slashes=False)
  241. def patch_resource(uuid):
  242. '''
  243. Update an existing resource with a SPARQL-UPDATE payload.
  244. '''
  245. headers = std_headers
  246. rsrc = LdpRs(uuid)
  247. if request.mimetype != 'application/sparql-update':
  248. return 'Provided content type is not a valid parsable format: {}'\
  249. .format(request.mimetype), 415
  250. try:
  251. rsrc.patch(request.get_data().decode('utf-8'))
  252. except ResourceNotExistsError as e:
  253. return str(e), 404
  254. except TombstoneError as e:
  255. return _tombstone_response(e, uuid)
  256. except ServerManagedTermError as e:
  257. return str(e), 412
  258. return '', 204, headers
  259. @ldp.route('/<path:uuid>/fcr:metadata', methods=['PATCH'])
  260. def patch_resource_metadata(uuid):
  261. return patch_resource(uuid)
  262. @ldp.route('/<path:uuid>', methods=['DELETE'])
  263. def delete_resource(uuid):
  264. '''
  265. Delete a resource.
  266. '''
  267. headers = std_headers
  268. # If referential integrity is enforced, grab all inbound relationships
  269. # to break them.
  270. repr_opts = {'incl_inbound' : True} \
  271. if current_app.config['store']['ldp_rs']['referential_integrity'] \
  272. else {}
  273. if 'prefer' in request.headers:
  274. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  275. leave_tstone = 'no-tombstone' not in prefer
  276. else:
  277. leave_tstone = True
  278. try:
  279. Ldpr.outbound_inst(uuid, repr_opts).delete(leave_tstone=leave_tstone)
  280. except ResourceNotExistsError as e:
  281. return str(e), 404
  282. except TombstoneError as e:
  283. return _tombstone_response(e, uuid)
  284. return '', 204, headers
  285. @ldp.route('/<path:uuid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  286. 'PATCH', 'DELETE'])
  287. def tombstone(uuid):
  288. '''
  289. Handle all tombstone operations.
  290. The only allowed method is DELETE; any other verb will return a 405.
  291. '''
  292. logger.debug('Deleting tombstone for {}.'.format(uuid))
  293. rsrc = Ldpr(uuid)
  294. try:
  295. imr = rsrc.imr
  296. except TombstoneError as e:
  297. if request.method == 'DELETE':
  298. if e.uuid == uuid:
  299. rsrc.delete_tombstone()
  300. return '', 204
  301. else:
  302. return _tombstone_response(e, uuid)
  303. else:
  304. return 'Method Not Allowed.', 405
  305. except ResourceNotExistsError as e:
  306. return str(e), 404
  307. else:
  308. return '', 404
  309. def uuid_for_post(parent_uuid=None, slug=None):
  310. '''
  311. Validate conditions to perform a POST and return an LDP resource
  312. UUID for using with the `post` method.
  313. This may raise an exception resulting in a 404 if the parent is not
  314. found or a 409 if the parent is not a valid container.
  315. '''
  316. def split_if_legacy(uuid):
  317. if current_app.config['store']['ldp_rs']['legacy_ptree_split']:
  318. uuid = g.tbox.split_uuid(uuid)
  319. return uuid
  320. # Shortcut!
  321. if not slug and not parent_uuid:
  322. uuid = split_if_legacy(str(uuid4()))
  323. return uuid
  324. parent = Ldpr.outbound_inst(parent_uuid, repr_opts={'incl_children' : False})
  325. if nsc['fcrepo'].Pairtree in parent.types:
  326. raise InvalidResourceError(parent.uuid,
  327. 'Resources cannot be created under a pairtree.')
  328. # Set prefix.
  329. if parent_uuid:
  330. parent_types = { t.identifier for t in \
  331. parent.imr.objects(RDF.type) }
  332. logger.debug('Parent types: {}'.format(pformat(parent_types)))
  333. if nsc['ldp'].Container not in parent_types:
  334. raise InvalidResourceError('Parent {} is not a container.'
  335. .format(parent_uuid))
  336. pfx = parent_uuid + '/'
  337. else:
  338. pfx = ''
  339. # Create candidate UUID and validate.
  340. if slug:
  341. cnd_uuid = pfx + slug
  342. if current_app.rdfly.ask_rsrc_exists(nsc['fcres'][cnd_uuid]):
  343. uuid = pfx + split_if_legacy(str(uuid4()))
  344. else:
  345. uuid = cnd_uuid
  346. else:
  347. uuid = pfx + split_if_legacy(str(uuid4()))
  348. return uuid
  349. def bitstream_from_req():
  350. '''
  351. Find how a binary file and its MIMEtype were uploaded in the request.
  352. '''
  353. logger.debug('Content type: {}'.format(request.mimetype))
  354. logger.debug('files: {}'.format(request.files))
  355. logger.debug('stream: {}'.format(request.stream))
  356. if request.mimetype == 'multipart/form-data':
  357. # This seems the "right" way to upload a binary file, with a
  358. # multipart/form-data MIME type and the file in the `file`
  359. # field. This however is not supported by FCREPO4.
  360. stream = request.files.get('file').stream
  361. mimetype = request.files.get('file').content_type
  362. # @TODO This will turn out useful to provide metadata
  363. # with the binary.
  364. #metadata = request.files.get('metadata').stream
  365. #provided_imr = [parse RDF here...]
  366. else:
  367. # This is a less clean way, with the file in the form body and
  368. # the request as application/x-www-form-urlencoded.
  369. # This is how FCREPO4 accepts binary uploads.
  370. stream = request.stream
  371. mimetype = request.mimetype
  372. return stream, mimetype
  373. def _get_bitstream(rsrc):
  374. out_headers = std_headers
  375. # @TODO This may change in favor of more low-level handling if the file
  376. # system is not local.
  377. return send_file(rsrc.local_path, as_attachment=True,
  378. attachment_filename=rsrc.filename)
  379. def _tombstone_response(e, uuid):
  380. headers = {
  381. 'Link' : '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  382. } if e.uuid == uuid else {}
  383. return str(e), 410, headers
  384. def set_post_put_params():
  385. '''
  386. Sets handling and content disposition for POST and PUT by parsing headers.
  387. '''
  388. handling = None
  389. if 'prefer' in request.headers:
  390. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  391. logger.debug('Parsed Prefer header: {}'.format(prefer))
  392. if 'handling' in prefer:
  393. handling = prefer['handling']['value']
  394. try:
  395. disposition = g.tbox.parse_rfc7240(
  396. request.headers['content-disposition'])
  397. except KeyError:
  398. disposition = None
  399. return handling, disposition
  400. def is_accept_hdr_rdf_parsable():
  401. '''
  402. Check if any of the 'Accept' header values provided is a RDF parsable
  403. format.
  404. '''
  405. for mimetype in request.accept_mimetypes.values():
  406. if Ldpr.is_rdf_parsable(mimetype):
  407. return True
  408. return False
  409. def parse_repr_options(retr_opts):
  410. '''
  411. Set options to retrieve IMR.
  412. Ideally, IMR retrieval is done once per request, so all the options
  413. are set once in the `imr()` property.
  414. @param retr_opts (dict): Options parsed from `Prefer` header.
  415. '''
  416. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  417. imr_options = {}
  418. if retr_opts.get('value') == 'minimal':
  419. imr_options = {
  420. 'embed_children' : False,
  421. 'incl_children' : False,
  422. 'incl_inbound' : False,
  423. 'incl_srv_mgd' : False,
  424. }
  425. else:
  426. # Default.
  427. imr_options = {
  428. 'embed_children' : False,
  429. 'incl_children' : True,
  430. 'incl_inbound' : False,
  431. 'incl_srv_mgd' : True,
  432. }
  433. # Override defaults.
  434. if 'parameters' in retr_opts:
  435. include = retr_opts['parameters']['include'].split(' ') \
  436. if 'include' in retr_opts['parameters'] else []
  437. omit = retr_opts['parameters']['omit'].split(' ') \
  438. if 'omit' in retr_opts['parameters'] else []
  439. logger.debug('Include: {}'.format(include))
  440. logger.debug('Omit: {}'.format(omit))
  441. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  442. imr_options['embed_children'] = True
  443. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  444. imr_options['incl_children'] = False
  445. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  446. imr_options['incl_inbound'] = True
  447. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  448. imr_options['incl_srv_mgd'] = False
  449. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  450. return imr_options