ldp.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. import logging
  2. from collections import defaultdict
  3. from pprint import pformat
  4. from uuid import uuid4
  5. import arrow
  6. from flask import (
  7. Blueprint, current_app, g, make_response, render_template,
  8. request, send_file)
  9. from rdflib.namespace import XSD
  10. from rdflib.term import Literal
  11. from lakesuperior.api.resource import transaction
  12. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  13. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  14. from lakesuperior.exceptions import (ResourceNotExistsError, TombstoneError,
  15. ServerManagedTermError, InvalidResourceError, SingleSubjectError,
  16. ResourceExistsError, IncompatibleLdpTypeError)
  17. from lakesuperior.model.ldp_factory import LdpFactory
  18. from lakesuperior.model.ldp_nr import LdpNr
  19. from lakesuperior.model.ldp_rs import LdpRs
  20. from lakesuperior.model.ldpr import Ldpr
  21. from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
  22. from lakesuperior.toolbox import Toolbox
  23. logger = logging.getLogger(__name__)
  24. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  25. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  26. # for backward compatibility.
  27. ldp = Blueprint(
  28. 'ldp', __name__, template_folder='templates',
  29. static_url_path='/static', static_folder='../../static')
  30. accept_patch = (
  31. 'application/sparql-update',
  32. )
  33. accept_rdf = (
  34. 'application/ld+json',
  35. 'application/n-triples',
  36. 'application/rdf+xml',
  37. #'application/x-turtle',
  38. #'application/xhtml+xml',
  39. #'application/xml',
  40. #'text/html',
  41. 'text/n3',
  42. #'text/plain',
  43. 'text/rdf+n3',
  44. 'text/turtle',
  45. )
  46. std_headers = {
  47. 'Accept-Patch' : ','.join(accept_patch),
  48. 'Accept-Post' : ','.join(accept_rdf),
  49. #'Allow' : ','.join(allow),
  50. }
  51. '''Predicates excluded by view.'''
  52. vw_blacklist = {
  53. }
  54. @ldp.url_defaults
  55. def bp_url_defaults(endpoint, values):
  56. url_prefix = getattr(g, 'url_prefix', None)
  57. if url_prefix is not None:
  58. values.setdefault('url_prefix', url_prefix)
  59. @ldp.url_value_preprocessor
  60. def bp_url_value_preprocessor(endpoint, values):
  61. g.url_prefix = values.pop('url_prefix')
  62. g.webroot = request.host_url + g.url_prefix
  63. @ldp.before_request
  64. def log_request_start():
  65. logger.info('\n\n** Start {} {} **'.format(request.method, request.url))
  66. @ldp.before_request
  67. def instantiate_req_vars():
  68. g.store = current_app.rdfly.store
  69. g.tbox = Toolbox()
  70. @ldp.before_request
  71. def request_timestamp():
  72. g.timestamp = arrow.utcnow()
  73. g.timestamp_term = Literal(g.timestamp, datatype=XSD.dateTime)
  74. @ldp.after_request
  75. def log_request_end(rsp):
  76. logger.info('** End {} {} **\n\n'.format(request.method, request.url))
  77. return rsp
  78. ## REST SERVICES ##
  79. @ldp.route('/<path:uid>', methods=['GET'], strict_slashes=False)
  80. @ldp.route('/', defaults={'uid': ''}, methods=['GET'], strict_slashes=False)
  81. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  82. methods=['GET'])
  83. @transaction()
  84. def get_resource(uid, force_rdf=False):
  85. '''
  86. Retrieve RDF or binary content.
  87. @param uid (string) UID of resource to retrieve. The repository root has
  88. an empty string for UID.
  89. @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
  90. a LDP-NR. This is not available in the API but is used e.g. by the
  91. `*/fcr:metadata` endpoint. The default is False.
  92. '''
  93. out_headers = std_headers
  94. repr_options = defaultdict(dict)
  95. if 'prefer' in request.headers:
  96. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  97. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  98. if 'return' in prefer:
  99. repr_options = parse_repr_options(prefer['return'])
  100. try:
  101. rsrc = LdpFactory.from_stored(uid, repr_options)
  102. except ResourceNotExistsError as e:
  103. return str(e), 404
  104. except TombstoneError as e:
  105. return _tombstone_response(e, uid)
  106. else:
  107. out_headers.update(rsrc.head())
  108. if (
  109. isinstance(rsrc, LdpRs)
  110. or is_accept_hdr_rdf_parsable()
  111. or force_rdf):
  112. rsp = rsrc.get()
  113. return negotiate_content(rsp, out_headers)
  114. else:
  115. logger.info('Streaming out binary content.')
  116. rsp = make_response(send_file(rsrc.local_path, as_attachment=True,
  117. attachment_filename=rsrc.filename, mimetype=rsrc.mimetype))
  118. rsp.headers['Link'] = '<{}/fcr:metadata>; rel="describedby"'\
  119. .format(rsrc.uri)
  120. return rsp
  121. @ldp.route('/<path:parent>', methods=['POST'], strict_slashes=False)
  122. @ldp.route('/', defaults={'parent': ''}, methods=['POST'],
  123. strict_slashes=False)
  124. def post_resource(parent):
  125. '''
  126. Add a new resource in a new URI.
  127. '''
  128. out_headers = std_headers
  129. try:
  130. slug = request.headers['Slug']
  131. logger.info('Slug: {}'.format(slug))
  132. except KeyError:
  133. slug = None
  134. handling, disposition = set_post_put_params()
  135. stream, mimetype = bitstream_from_req()
  136. try:
  137. with TxnManager(g.store, True):
  138. uid = LdpFactory.mint_uid(parent, slug)
  139. logger.debug('Generated UID for POST: {}'.format(uid))
  140. rsrc = LdpFactory.from_provided(
  141. uid, content_length=request.content_length,
  142. stream=stream, mimetype=mimetype, handling=handling,
  143. disposition=disposition)
  144. rsrc.post()
  145. except ResourceNotExistsError as e:
  146. return str(e), 404
  147. except InvalidResourceError as e:
  148. return str(e), 409
  149. except TombstoneError as e:
  150. return _tombstone_response(e, uid)
  151. except ServerManagedTermError as e:
  152. return str(e), 412
  153. hdr = {
  154. 'Location' : rsrc.uri,
  155. }
  156. if isinstance(rsrc, LdpNr):
  157. hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="<{0}>"'\
  158. .format(rsrc.uri)
  159. out_headers.update(hdr)
  160. return rsrc.uri, 201, out_headers
  161. @ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
  162. @transaction()
  163. def get_version_info(uid):
  164. '''
  165. Get version info (`fcr:versions`).
  166. '''
  167. try:
  168. rsp = Ldpr(uid).get_version_info()
  169. except ResourceNotExistsError as e:
  170. return str(e), 404
  171. except InvalidResourceError as e:
  172. return str(e), 409
  173. except TombstoneError as e:
  174. return _tombstone_response(e, uid)
  175. else:
  176. return negotiate_content(rsp)
  177. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
  178. @transaction()
  179. def get_version(uid, ver_uid):
  180. '''
  181. Get an individual resource version.
  182. @param uid (string) Resource UID.
  183. @param ver_uid (string) Version UID.
  184. '''
  185. try:
  186. rsp = Ldpr(uid).get_version(ver_uid)
  187. except ResourceNotExistsError as e:
  188. return str(e), 404
  189. except InvalidResourceError as e:
  190. return str(e), 409
  191. except TombstoneError as e:
  192. return _tombstone_response(e, uid)
  193. else:
  194. return negotiate_content(rsp)
  195. @ldp.route('/<path:uid>/fcr:versions', methods=['POST', 'PUT'])
  196. @transaction(True)
  197. def post_version(uid):
  198. '''
  199. Create a new resource version.
  200. '''
  201. if request.method == 'PUT':
  202. return 'Method not allowed.', 405
  203. ver_uid = request.headers.get('slug', None)
  204. try:
  205. ver_uri = LdpFactory.from_stored(uid).create_version(ver_uid)
  206. except ResourceNotExistsError as e:
  207. return str(e), 404
  208. except InvalidResourceError as e:
  209. return str(e), 409
  210. except TombstoneError as e:
  211. return _tombstone_response(e, uid)
  212. else:
  213. return '', 201, {'Location': ver_uri}
  214. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['PATCH'])
  215. @transaction(True)
  216. def patch_version(uid, ver_uid):
  217. '''
  218. Revert to a previous version.
  219. NOTE: This creates a new version snapshot.
  220. @param uid (string) Resource UID.
  221. @param ver_uid (string) Version UID.
  222. '''
  223. try:
  224. LdpFactory.from_stored(uid).revert_to_version(ver_uid)
  225. except ResourceNotExistsError as e:
  226. return str(e), 404
  227. except InvalidResourceError as e:
  228. return str(e), 409
  229. except TombstoneError as e:
  230. return _tombstone_response(e, uid)
  231. else:
  232. return '', 204
  233. @ldp.route('/<path:uid>', methods=['PUT'], strict_slashes=False)
  234. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  235. methods=['PUT'])
  236. @transaction(True)
  237. def put_resource(uid):
  238. '''
  239. Add a new resource at a specified URI.
  240. '''
  241. # Parse headers.
  242. logger.info('Request headers: {}'.format(request.headers))
  243. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  244. handling, disposition = set_post_put_params()
  245. stream, mimetype = bitstream_from_req()
  246. try:
  247. rsrc = LdpFactory.from_provided(
  248. uid, content_length=request.content_length,
  249. stream=stream, mimetype=mimetype, handling=handling,
  250. disposition=disposition)
  251. if not request.content_length and rsrc.is_stored:
  252. raise InvalidResourceError(rsrc.uid,
  253. 'Resource {} already exists and no data set was provided.')
  254. except InvalidResourceError as e:
  255. return str(e), 409
  256. except (ServerManagedTermError, SingleSubjectError) as e:
  257. return str(e), 412
  258. except IncompatibleLdpTypeError as e:
  259. return str(e), 415
  260. try:
  261. ret = rsrc.put()
  262. rsp_headers.update(rsrc.head())
  263. except (InvalidResourceError, ResourceExistsError) as e:
  264. return str(e), 409
  265. except TombstoneError as e:
  266. return _tombstone_response(e, uid)
  267. if ret == Ldpr.RES_CREATED:
  268. rsp_code = 201
  269. rsp_headers['Location'] = rsp_body = rsrc.uri
  270. if isinstance(rsrc, LdpNr):
  271. rsp_headers['Link'] = '<{0}/fcr:metadata>; rel="describedby"'\
  272. .format(rsrc.uri)
  273. else:
  274. rsp_code = 204
  275. rsp_body = ''
  276. return rsp_body, rsp_code, rsp_headers
  277. @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
  278. @transaction(True)
  279. def patch_resource(uid):
  280. '''
  281. Update an existing resource with a SPARQL-UPDATE payload.
  282. '''
  283. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  284. rsrc = LdpRs(uid)
  285. if request.mimetype != 'application/sparql-update':
  286. return 'Provided content type is not a valid parsable format: {}'\
  287. .format(request.mimetype), 415
  288. try:
  289. rsrc.patch(request.get_data().decode('utf-8'))
  290. except ResourceNotExistsError as e:
  291. return str(e), 404
  292. except TombstoneError as e:
  293. return _tombstone_response(e, uid)
  294. except (ServerManagedTermError, SingleSubjectError) as e:
  295. return str(e), 412
  296. else:
  297. rsp_headers.update(rsrc.head())
  298. return '', 204, rsp_headers
  299. @ldp.route('/<path:uid>/fcr:metadata', methods=['PATCH'])
  300. @transaction(True)
  301. def patch_resource_metadata(uid):
  302. return patch_resource(uid)
  303. @ldp.route('/<path:uid>', methods=['DELETE'])
  304. @transaction(True)
  305. def delete_resource(uid):
  306. '''
  307. Delete a resource and optionally leave a tombstone.
  308. This behaves differently from FCREPO. A tombstone indicated that the
  309. resource is no longer available at its current location, but its historic
  310. snapshots still are. Also, deleting a resource with a tombstone creates
  311. one more version snapshot of the resource prior to being deleted.
  312. In order to completely wipe out all traces of a resource, the tombstone
  313. must be deleted as well, or the `Prefer:no-tombstone` header can be used.
  314. The latter will purge the resource immediately.
  315. '''
  316. headers = std_headers
  317. # If referential integrity is enforced, grab all inbound relationships
  318. # to break them.
  319. repr_opts = {'incl_inbound' : True} \
  320. if current_app.config['store']['ldp_rs']['referential_integrity'] \
  321. else {}
  322. if 'prefer' in request.headers:
  323. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  324. leave_tstone = 'no-tombstone' not in prefer
  325. else:
  326. leave_tstone = True
  327. try:
  328. LdpFactory.from_stored(uid, repr_opts).delete(
  329. leave_tstone=leave_tstone)
  330. except ResourceNotExistsError as e:
  331. return str(e), 404
  332. except TombstoneError as e:
  333. return _tombstone_response(e, uid)
  334. return '', 204, headers
  335. @ldp.route('/<path:uid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  336. 'PATCH', 'DELETE'])
  337. @transaction(True)
  338. def tombstone(uid):
  339. '''
  340. Handle all tombstone operations.
  341. The only allowed methods are POST and DELETE; any other verb will return a
  342. 405.
  343. '''
  344. logger.debug('Deleting tombstone for {}.'.format(uid))
  345. rsrc = Ldpr(uid)
  346. try:
  347. rsrc.metadata
  348. except TombstoneError as e:
  349. if request.method == 'DELETE':
  350. if e.uid == uid:
  351. rsrc.purge()
  352. return '', 204
  353. else:
  354. return _tombstone_response(e, uid)
  355. elif request.method == 'POST':
  356. if e.uid == uid:
  357. rsrc_uri = rsrc.resurrect()
  358. headers = {'Location' : rsrc_uri}
  359. return rsrc_uri, 201, headers
  360. else:
  361. return _tombstone_response(e, uid)
  362. else:
  363. return 'Method Not Allowed.', 405
  364. except ResourceNotExistsError as e:
  365. return str(e), 404
  366. else:
  367. return '', 404
  368. def negotiate_content(rsp, headers=None):
  369. '''
  370. Return HTML or serialized RDF depending on accept headers.
  371. '''
  372. if request.accept_mimetypes.best == 'text/html':
  373. rsrc = rsp.resource(request.path)
  374. return render_template(
  375. 'resource.html', rsrc=rsrc, nsm=nsm,
  376. blacklist = vw_blacklist)
  377. else:
  378. for p in vw_blacklist:
  379. rsp.remove((None, p, None))
  380. return (rsp.serialize(format='turtle'), headers)
  381. def bitstream_from_req():
  382. '''
  383. Find how a binary file and its MIMEtype were uploaded in the request.
  384. '''
  385. logger.debug('Content type: {}'.format(request.mimetype))
  386. logger.debug('files: {}'.format(request.files))
  387. logger.debug('stream: {}'.format(request.stream))
  388. if request.mimetype == 'multipart/form-data':
  389. # This seems the "right" way to upload a binary file, with a
  390. # multipart/form-data MIME type and the file in the `file`
  391. # field. This however is not supported by FCREPO4.
  392. stream = request.files.get('file').stream
  393. mimetype = request.files.get('file').content_type
  394. # @TODO This will turn out useful to provide metadata
  395. # with the binary.
  396. #metadata = request.files.get('metadata').stream
  397. #provided_imr = [parse RDF here...]
  398. else:
  399. # This is a less clean way, with the file in the form body and
  400. # the request as application/x-www-form-urlencoded.
  401. # This is how FCREPO4 accepts binary uploads.
  402. stream = request.stream
  403. mimetype = request.mimetype
  404. return stream, mimetype
  405. def _get_bitstream(rsrc):
  406. # @TODO This may change in favor of more low-level handling if the file
  407. # system is not local.
  408. return send_file(rsrc.local_path, as_attachment=True,
  409. attachment_filename=rsrc.filename)
  410. def _tombstone_response(e, uid):
  411. headers = {
  412. 'Link': '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  413. } if e.uid == uid else {}
  414. return str(e), 410, headers
  415. def set_post_put_params():
  416. '''
  417. Sets handling and content disposition for POST and PUT by parsing headers.
  418. '''
  419. handling = 'strict'
  420. if 'prefer' in request.headers:
  421. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  422. logger.debug('Parsed Prefer header: {}'.format(prefer))
  423. if 'handling' in prefer:
  424. handling = prefer['handling']['value']
  425. try:
  426. disposition = g.tbox.parse_rfc7240(
  427. request.headers['content-disposition'])
  428. except KeyError:
  429. disposition = None
  430. return handling, disposition
  431. def is_accept_hdr_rdf_parsable():
  432. '''
  433. Check if any of the 'Accept' header values provided is a RDF parsable
  434. format.
  435. '''
  436. for mimetype in request.accept_mimetypes.values():
  437. if LdpFactory.is_rdf_parsable(mimetype):
  438. return True
  439. return False
  440. def parse_repr_options(retr_opts):
  441. '''
  442. Set options to retrieve IMR.
  443. Ideally, IMR retrieval is done once per request, so all the options
  444. are set once in the `imr()` property.
  445. @param retr_opts (dict): Options parsed from `Prefer` header.
  446. '''
  447. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  448. imr_options = {}
  449. if retr_opts.get('value') == 'minimal':
  450. imr_options = {
  451. 'embed_children' : False,
  452. 'incl_children' : False,
  453. 'incl_inbound' : False,
  454. 'incl_srv_mgd' : False,
  455. }
  456. else:
  457. # Default.
  458. imr_options = {
  459. 'embed_children' : False,
  460. 'incl_children' : True,
  461. 'incl_inbound' : False,
  462. 'incl_srv_mgd' : True,
  463. }
  464. # Override defaults.
  465. if 'parameters' in retr_opts:
  466. include = retr_opts['parameters']['include'].split(' ') \
  467. if 'include' in retr_opts['parameters'] else []
  468. omit = retr_opts['parameters']['omit'].split(' ') \
  469. if 'omit' in retr_opts['parameters'] else []
  470. logger.debug('Include: {}'.format(include))
  471. logger.debug('Omit: {}'.format(omit))
  472. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  473. imr_options['embed_children'] = True
  474. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  475. imr_options['incl_children'] = False
  476. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  477. imr_options['incl_inbound'] = True
  478. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  479. imr_options['incl_srv_mgd'] = False
  480. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  481. return imr_options