ldp.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. import logging
  2. import pdb
  3. from collections import defaultdict
  4. from io import BytesIO
  5. from pprint import pformat
  6. from uuid import uuid4
  7. import arrow
  8. from flask import (
  9. Blueprint, g, make_response, render_template,
  10. request, send_file)
  11. from rdflib.namespace import XSD
  12. from rdflib.term import Literal
  13. from lakesuperior.api import resource as rsrc_api
  14. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  15. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  16. from lakesuperior.exceptions import (ResourceNotExistsError, TombstoneError,
  17. ServerManagedTermError, InvalidResourceError, SingleSubjectError,
  18. ResourceExistsError, IncompatibleLdpTypeError)
  19. from lakesuperior.globals import RES_CREATED
  20. from lakesuperior.model.ldp_factory import LdpFactory
  21. from lakesuperior.model.ldp_nr import LdpNr
  22. from lakesuperior.model.ldp_rs import LdpRs
  23. from lakesuperior.model.ldpr import Ldpr
  24. from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
  25. from lakesuperior.toolbox import Toolbox
  26. logger = logging.getLogger(__name__)
  27. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  28. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  29. # for backward compatibility.
  30. ldp = Blueprint(
  31. 'ldp', __name__, template_folder='templates',
  32. static_url_path='/static', static_folder='../../static')
  33. accept_patch = (
  34. 'application/sparql-update',
  35. )
  36. accept_rdf = (
  37. 'application/ld+json',
  38. 'application/n-triples',
  39. 'application/rdf+xml',
  40. #'application/x-turtle',
  41. #'application/xhtml+xml',
  42. #'application/xml',
  43. #'text/html',
  44. 'text/n3',
  45. #'text/plain',
  46. 'text/rdf+n3',
  47. 'text/turtle',
  48. )
  49. std_headers = {
  50. 'Accept-Patch' : ','.join(accept_patch),
  51. 'Accept-Post' : ','.join(accept_rdf),
  52. #'Allow' : ','.join(allow),
  53. }
  54. '''Predicates excluded by view.'''
  55. vw_blacklist = {
  56. }
  57. @ldp.url_defaults
  58. def bp_url_defaults(endpoint, values):
  59. url_prefix = getattr(g, 'url_prefix', None)
  60. if url_prefix is not None:
  61. values.setdefault('url_prefix', url_prefix)
  62. @ldp.url_value_preprocessor
  63. def bp_url_value_preprocessor(endpoint, values):
  64. g.url_prefix = values.pop('url_prefix')
  65. g.webroot = request.host_url + g.url_prefix
  66. @ldp.before_request
  67. def log_request_start():
  68. logger.info('\n\n** Start {} {} **'.format(request.method, request.url))
  69. @ldp.before_request
  70. def instantiate_req_vars():
  71. g.tbox = Toolbox()
  72. @ldp.after_request
  73. def log_request_end(rsp):
  74. logger.info('** End {} {} **\n\n'.format(request.method, request.url))
  75. return rsp
  76. ## REST SERVICES ##
  77. @ldp.route('/<path:uid>', methods=['GET'], strict_slashes=False)
  78. @ldp.route('/', defaults={'uid': ''}, methods=['GET'], strict_slashes=False)
  79. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  80. methods=['GET'])
  81. def get_resource(uid, force_rdf=False):
  82. '''
  83. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  84. Retrieve RDF or binary content.
  85. @param uid (string) UID of resource to retrieve. The repository root has
  86. an empty string for UID.
  87. @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
  88. a LDP-NR. This is not available in the API but is used e.g. by the
  89. `*/fcr:metadata` endpoint. The default is False.
  90. '''
  91. out_headers = std_headers
  92. repr_options = defaultdict(dict)
  93. if 'prefer' in request.headers:
  94. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  95. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  96. if 'return' in prefer:
  97. repr_options = parse_repr_options(prefer['return'])
  98. try:
  99. rsrc = rsrc_api.get(uid, repr_options)
  100. except ResourceNotExistsError as e:
  101. return str(e), 404
  102. except TombstoneError as e:
  103. return _tombstone_response(e, uid)
  104. else:
  105. out_headers.update(_headers_from_metadata(rsrc))
  106. if (
  107. isinstance(rsrc, LdpRs)
  108. or is_accept_hdr_rdf_parsable()
  109. or force_rdf):
  110. gr = g.tbox.globalize_graph(rsrc.out_graph)
  111. gr.namespace_manager = nsm
  112. return _negotiate_content(gr, out_headers)
  113. else:
  114. logger.info('Streaming out binary content.')
  115. rsp = make_response(send_file(
  116. rsrc.local_path, as_attachment=True,
  117. attachment_filename=rsrc.filename,
  118. mimetype=rsrc.mimetype))
  119. rsp.headers = out_headers
  120. rsp.headers['Link'] = (
  121. '<{}/fcr:metadata>; rel="describedby"'.format(rsrc.uri))
  122. return rsp
  123. @ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
  124. def get_version_info(uid):
  125. '''
  126. Get version info (`fcr:versions`).
  127. '''
  128. try:
  129. gr = rsrc_api.get_version_info(uid)
  130. except ResourceNotExistsError as e:
  131. return str(e), 404
  132. except InvalidResourceError as e:
  133. return str(e), 409
  134. except TombstoneError as e:
  135. return _tombstone_response(e, uid)
  136. else:
  137. return _negotiate_content(g.tbox.globalize_graph(gr))
  138. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
  139. def get_version(uid, ver_uid):
  140. '''
  141. Get an individual resource version.
  142. @param uid (string) Resource UID.
  143. @param ver_uid (string) Version UID.
  144. '''
  145. try:
  146. gr = rsrc_api.get_version(uid, ver_uid)
  147. except ResourceNotExistsError as e:
  148. return str(e), 404
  149. except InvalidResourceError as e:
  150. return str(e), 409
  151. except TombstoneError as e:
  152. return _tombstone_response(e, uid)
  153. else:
  154. return _negotiate_content(g.tbox.globalize_graph(gr))
  155. @ldp.route('/<path:parent>', methods=['POST'], strict_slashes=False)
  156. @ldp.route('/', defaults={'parent': ''}, methods=['POST'],
  157. strict_slashes=False)
  158. def post_resource(parent):
  159. '''
  160. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  161. Add a new resource in a new URI.
  162. '''
  163. out_headers = std_headers
  164. try:
  165. slug = request.headers['Slug']
  166. logger.debug('Slug: {}'.format(slug))
  167. except KeyError:
  168. slug = None
  169. handling, disposition = set_post_put_params()
  170. stream, mimetype = _bistream_from_req()
  171. if LdpFactory.is_rdf_parsable(mimetype):
  172. # If the content is RDF, localize in-repo URIs.
  173. global_rdf = stream.read()
  174. local_rdf = global_rdf.replace(
  175. g.webroot.encode('utf-8'), nsc['fcres'].encode('utf-8'))
  176. stream = BytesIO(local_rdf)
  177. is_rdf = True
  178. else:
  179. is_rdf = False
  180. try:
  181. uid = rsrc_api.create(
  182. parent, slug, stream=stream, mimetype=mimetype,
  183. handling=handling, disposition=disposition)
  184. except ResourceNotExistsError as e:
  185. return str(e), 404
  186. except InvalidResourceError as e:
  187. return str(e), 409
  188. except TombstoneError as e:
  189. return _tombstone_response(e, uid)
  190. except ServerManagedTermError as e:
  191. return str(e), 412
  192. uri = g.tbox.uid_to_uri(uid)
  193. hdr = {'Location' : uri}
  194. if mimetype and not is_rdf:
  195. hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="<{0}>"'\
  196. .format(uri)
  197. out_headers.update(hdr)
  198. return uri, 201, out_headers
  199. @ldp.route('/<path:uid>', methods=['PUT'], strict_slashes=False)
  200. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  201. methods=['PUT'])
  202. def put_resource(uid):
  203. '''
  204. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  205. Add or replace a new resource at a specified URI.
  206. '''
  207. # Parse headers.
  208. logger.debug('Request headers: {}'.format(request.headers))
  209. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  210. handling, disposition = set_post_put_params()
  211. #import pdb; pdb.set_trace()
  212. stream, mimetype = _bistream_from_req()
  213. if LdpFactory.is_rdf_parsable(mimetype):
  214. # If the content is RDF, localize in-repo URIs.
  215. global_rdf = stream.read()
  216. local_rdf = global_rdf.replace(
  217. (g.webroot + '/').encode('utf-8'),
  218. nsc['fcres'].encode('utf-8')
  219. ).replace(
  220. g.webroot.encode('utf-8'),
  221. nsc['fcres'].encode('utf-8')
  222. )
  223. stream = BytesIO(local_rdf)
  224. is_rdf = True
  225. else:
  226. is_rdf = False
  227. try:
  228. evt = rsrc_api.create_or_replace(uid, stream=stream, mimetype=mimetype,
  229. handling=handling, disposition=disposition)
  230. except (InvalidResourceError, ResourceExistsError) as e:
  231. return str(e), 409
  232. except (ServerManagedTermError, SingleSubjectError) as e:
  233. return str(e), 412
  234. except IncompatibleLdpTypeError as e:
  235. return str(e), 415
  236. except TombstoneError as e:
  237. return _tombstone_response(e, uid)
  238. uri = g.tbox.uid_to_uri(uid)
  239. if evt == RES_CREATED:
  240. rsp_code = 201
  241. rsp_headers['Location'] = rsp_body = uri
  242. if mimetype and not is_rdf:
  243. rsp_headers['Link'] = (
  244. '<{0}/fcr:metadata>; rel="describedby"'.format(uri))
  245. else:
  246. rsp_code = 204
  247. rsp_body = ''
  248. return rsp_body, rsp_code, rsp_headers
  249. @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
  250. def patch_resource(uid, is_metadata=False):
  251. '''
  252. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  253. Update an existing resource with a SPARQL-UPDATE payload.
  254. '''
  255. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  256. if request.mimetype != 'application/sparql-update':
  257. return 'Provided content type is not a valid parsable format: {}'\
  258. .format(request.mimetype), 415
  259. update_str = request.get_data().decode('utf-8')
  260. local_update_str = g.tbox.localize_ext_str(update_str, nsc['fcres'][uid])
  261. try:
  262. rsrc = rsrc_api.update(uid, local_update_str, is_metadata)
  263. except ResourceNotExistsError as e:
  264. return str(e), 404
  265. except TombstoneError as e:
  266. return _tombstone_response(e, uid)
  267. except (ServerManagedTermError, SingleSubjectError) as e:
  268. return str(e), 412
  269. except InvalidResourceError as e:
  270. return str(e), 415
  271. else:
  272. rsp_headers.update(_headers_from_metadata(rsrc))
  273. return '', 204, rsp_headers
  274. @ldp.route('/<path:uid>/fcr:metadata', methods=['PATCH'])
  275. def patch_resource_metadata(uid):
  276. return patch_resource(uid, True)
  277. @ldp.route('/<path:uid>', methods=['DELETE'])
  278. def delete_resource(uid):
  279. '''
  280. Delete a resource and optionally leave a tombstone.
  281. This behaves differently from FCREPO. A tombstone indicated that the
  282. resource is no longer available at its current location, but its historic
  283. snapshots still are. Also, deleting a resource with a tombstone creates
  284. one more version snapshot of the resource prior to being deleted.
  285. In order to completely wipe out all traces of a resource, the tombstone
  286. must be deleted as well, or the `Prefer:no-tombstone` header can be used.
  287. The latter will forget (completely delete) the resource immediately.
  288. '''
  289. headers = std_headers
  290. if 'prefer' in request.headers:
  291. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  292. leave_tstone = 'no-tombstone' not in prefer
  293. else:
  294. leave_tstone = True
  295. try:
  296. rsrc_api.delete(uid, leave_tstone)
  297. except ResourceNotExistsError as e:
  298. return str(e), 404
  299. except TombstoneError as e:
  300. return _tombstone_response(e, uid)
  301. return '', 204, headers
  302. @ldp.route('/<path:uid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  303. 'PATCH', 'DELETE'])
  304. def tombstone(uid):
  305. '''
  306. Handle all tombstone operations.
  307. The only allowed methods are POST and DELETE; any other verb will return a
  308. 405.
  309. '''
  310. try:
  311. rsrc = rsrc_api.get(uid)
  312. except TombstoneError as e:
  313. if request.method == 'DELETE':
  314. if e.uid == uid:
  315. rsrc_api.forget(uid)
  316. return '', 204
  317. else:
  318. return _tombstone_response(e, uid)
  319. elif request.method == 'POST':
  320. if e.uid == uid:
  321. rsrc_uri = rsrc_api.resurrect(uid)
  322. headers = {'Location' : rsrc_uri}
  323. return rsrc_uri, 201, headers
  324. else:
  325. return _tombstone_response(e, uid)
  326. else:
  327. return 'Method Not Allowed.', 405
  328. except ResourceNotExistsError as e:
  329. return str(e), 404
  330. else:
  331. return '', 404
  332. @ldp.route('/<path:uid>/fcr:versions', methods=['POST', 'PUT'])
  333. def post_version(uid):
  334. '''
  335. Create a new resource version.
  336. '''
  337. if request.method == 'PUT':
  338. return 'Method not allowed.', 405
  339. ver_uid = request.headers.get('slug', None)
  340. try:
  341. ver_uid = rsrc_api.create_version(uid, ver_uid)
  342. except ResourceNotExistsError as e:
  343. return str(e), 404
  344. except InvalidResourceError as e:
  345. return str(e), 409
  346. except TombstoneError as e:
  347. return _tombstone_response(e, uid)
  348. else:
  349. return '', 201, {'Location': g.tbox.uid_to_uri(ver_uid)}
  350. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['PATCH'])
  351. def patch_version(uid, ver_uid):
  352. '''
  353. Revert to a previous version.
  354. NOTE: This creates a new version snapshot.
  355. @param uid (string) Resource UID.
  356. @param ver_uid (string) Version UID.
  357. '''
  358. try:
  359. LdpFactory.from_stored(uid).revert_to_version(ver_uid)
  360. except ResourceNotExistsError as e:
  361. return str(e), 404
  362. except InvalidResourceError as e:
  363. return str(e), 409
  364. except TombstoneError as e:
  365. return _tombstone_response(e, uid)
  366. else:
  367. return '', 204
  368. ## PRIVATE METHODS ##
  369. def _negotiate_content(rsp, headers=None):
  370. '''
  371. Return HTML or serialized RDF depending on accept headers.
  372. '''
  373. if request.accept_mimetypes.best == 'text/html':
  374. rsrc = rsp.resource(request.path)
  375. return render_template(
  376. 'resource.html', rsrc=rsrc, nsm=nsm,
  377. blacklist = vw_blacklist)
  378. else:
  379. for p in vw_blacklist:
  380. rsp.remove((None, p, None))
  381. return (rsp.serialize(format='turtle'), headers)
  382. def _bistream_from_req():
  383. '''
  384. Find how a binary file and its MIMEtype were uploaded in the request.
  385. '''
  386. #logger.debug('Content type: {}'.format(request.mimetype))
  387. #logger.debug('files: {}'.format(request.files))
  388. #logger.debug('stream: {}'.format(request.stream))
  389. if request.mimetype == 'multipart/form-data':
  390. # This seems the "right" way to upload a binary file, with a
  391. # multipart/form-data MIME type and the file in the `file`
  392. # field. This however is not supported by FCREPO4.
  393. stream = request.files.get('file').stream
  394. mimetype = request.files.get('file').content_type
  395. # @TODO This will turn out useful to provide metadata
  396. # with the binary.
  397. #metadata = request.files.get('metadata').stream
  398. else:
  399. # This is a less clean way, with the file in the form body and
  400. # the request as application/x-www-form-urlencoded.
  401. # This is how FCREPO4 accepts binary uploads.
  402. stream = request.stream
  403. # @FIXME Must decide what to do with this.
  404. mimetype = request.mimetype
  405. if mimetype == '' or mimetype == 'application/x-www-form-urlencoded':
  406. if stream.limit == 0:
  407. stream = mimetype = None
  408. else:
  409. mimetype = 'application/octet-stream'
  410. return stream, mimetype
  411. def _get_bitstream(rsrc):
  412. # @TODO This may change in favor of more low-level handling if the file
  413. # system is not local.
  414. return send_file(rsrc.local_path, as_attachment=True,
  415. attachment_filename=rsrc.filename)
  416. def _tombstone_response(e, uid):
  417. headers = {
  418. 'Link': '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  419. } if e.uid == uid else {}
  420. return str(e), 410, headers
  421. def set_post_put_params():
  422. '''
  423. Sets handling and content disposition for POST and PUT by parsing headers.
  424. '''
  425. handling = 'strict'
  426. if 'prefer' in request.headers:
  427. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  428. logger.debug('Parsed Prefer header: {}'.format(prefer))
  429. if 'handling' in prefer:
  430. handling = prefer['handling']['value']
  431. try:
  432. disposition = g.tbox.parse_rfc7240(
  433. request.headers['content-disposition'])
  434. except KeyError:
  435. disposition = None
  436. return handling, disposition
  437. def is_accept_hdr_rdf_parsable():
  438. '''
  439. Check if any of the 'Accept' header values provided is a RDF parsable
  440. format.
  441. '''
  442. for mimetype in request.accept_mimetypes.values():
  443. if LdpFactory.is_rdf_parsable(mimetype):
  444. return True
  445. return False
  446. def parse_repr_options(retr_opts):
  447. '''
  448. Set options to retrieve IMR.
  449. Ideally, IMR retrieval is done once per request, so all the options
  450. are set once in the `imr()` property.
  451. @param retr_opts (dict): Options parsed from `Prefer` header.
  452. '''
  453. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  454. imr_options = {}
  455. if retr_opts.get('value') == 'minimal':
  456. imr_options = {
  457. 'embed_children' : False,
  458. 'incl_children' : False,
  459. 'incl_inbound' : False,
  460. 'incl_srv_mgd' : False,
  461. }
  462. else:
  463. # Default.
  464. imr_options = {
  465. 'embed_children' : False,
  466. 'incl_children' : True,
  467. 'incl_inbound' : False,
  468. 'incl_srv_mgd' : True,
  469. }
  470. # Override defaults.
  471. if 'parameters' in retr_opts:
  472. include = retr_opts['parameters']['include'].split(' ') \
  473. if 'include' in retr_opts['parameters'] else []
  474. omit = retr_opts['parameters']['omit'].split(' ') \
  475. if 'omit' in retr_opts['parameters'] else []
  476. logger.debug('Include: {}'.format(include))
  477. logger.debug('Omit: {}'.format(omit))
  478. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  479. imr_options['embed_children'] = True
  480. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  481. imr_options['incl_children'] = False
  482. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  483. imr_options['incl_inbound'] = True
  484. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  485. imr_options['incl_srv_mgd'] = False
  486. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  487. return imr_options
  488. def _headers_from_metadata(rsrc):
  489. '''
  490. Create a dict of headers from a metadata graph.
  491. @param rsrc (lakesuperior.model.ldpr.Ldpr) Resource to extract metadata
  492. from.
  493. '''
  494. out_headers = defaultdict(list)
  495. digest = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
  496. if digest:
  497. etag = digest.identifier.split(':')[-1]
  498. etag_str = (
  499. 'W/"{}"'.format(etag)
  500. if nsc['ldp'].RDFSource in rsrc.ldp_types
  501. else etag)
  502. out_headers['ETag'] = etag_str,
  503. last_updated_term = rsrc.metadata.value(nsc['fcrepo'].lastModified)
  504. if last_updated_term:
  505. out_headers['Last-Modified'] = arrow.get(last_updated_term)\
  506. .format('ddd, D MMM YYYY HH:mm:ss Z')
  507. for t in rsrc.ldp_types:
  508. out_headers['Link'].append(
  509. '{};rel="type"'.format(t.n3()))
  510. mimetype = rsrc.metadata.value(nsc['ebucore'].hasMimeType)
  511. if mimetype:
  512. out_headers['Content-Type'] = mimetype
  513. return out_headers