ldp.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. import logging
  2. from collections import defaultdict
  3. from pprint import pformat
  4. from functools import wraps
  5. from uuid import uuid4
  6. import arrow
  7. from flask import (
  8. Blueprint, current_app, g, make_response, render_template,
  9. request, send_file)
  10. from rdflib.namespace import XSD
  11. from rdflib.term import Literal
  12. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  13. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  14. from lakesuperior.exceptions import (ResourceNotExistsError, TombstoneError,
  15. ServerManagedTermError, InvalidResourceError, SingleSubjectError,
  16. ResourceExistsError, IncompatibleLdpTypeError)
  17. from lakesuperior.model.generic_resource import PathSegment
  18. from lakesuperior.model.ldp_factory import LdpFactory
  19. from lakesuperior.model.ldp_nr import LdpNr
  20. from lakesuperior.model.ldp_rs import LdpRs
  21. from lakesuperior.model.ldpr import Ldpr
  22. from lakesuperior.store_layouts.ldp_rs.lmdb_store import LmdbStore, TxnManager
  23. from lakesuperior.toolbox import Toolbox
  24. logger = logging.getLogger(__name__)
  25. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  26. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  27. # for backward compatibility.
  28. ldp = Blueprint(
  29. 'ldp', __name__, template_folder='templates',
  30. static_url_path='/static', static_folder='../../static')
  31. accept_patch = (
  32. 'application/sparql-update',
  33. )
  34. accept_rdf = (
  35. 'application/ld+json',
  36. 'application/n-triples',
  37. 'application/rdf+xml',
  38. #'application/x-turtle',
  39. #'application/xhtml+xml',
  40. #'application/xml',
  41. #'text/html',
  42. 'text/n3',
  43. #'text/plain',
  44. 'text/rdf+n3',
  45. 'text/turtle',
  46. )
  47. std_headers = {
  48. 'Accept-Patch' : ','.join(accept_patch),
  49. 'Accept-Post' : ','.join(accept_rdf),
  50. #'Allow' : ','.join(allow),
  51. }
  52. '''Predicates excluded by view.'''
  53. vw_blacklist = {
  54. }
  55. @ldp.url_defaults
  56. def bp_url_defaults(endpoint, values):
  57. url_prefix = getattr(g, 'url_prefix', None)
  58. if url_prefix is not None:
  59. values.setdefault('url_prefix', url_prefix)
  60. @ldp.url_value_preprocessor
  61. def bp_url_value_preprocessor(endpoint, values):
  62. g.url_prefix = values.pop('url_prefix')
  63. g.webroot = request.host_url + g.url_prefix
  64. @ldp.before_request
  65. def log_request_start():
  66. logger.info('\n\n** Start {} {} **'.format(request.method, request.url))
  67. @ldp.before_request
  68. def instantiate_req_vars():
  69. g.store = current_app.rdfly.store
  70. g.tbox = Toolbox()
  71. @ldp.before_request
  72. def request_timestamp():
  73. g.timestamp = arrow.utcnow()
  74. g.timestamp_term = Literal(g.timestamp, datatype=XSD.dateTime)
  75. @ldp.after_request
  76. def log_request_end(rsp):
  77. logger.info('** End {} {} **\n\n'.format(request.method, request.url))
  78. return rsp
  79. def transaction(write=False):
  80. '''
  81. Handle atomic operations in a store.
  82. This wrapper ensures that a write operation is performed atomically. It
  83. also takes care of sending a message for each resource changed in the
  84. transaction.
  85. '''
  86. def _transaction_deco(fn):
  87. @wraps(fn)
  88. def _wrapper(*args, **kwargs):
  89. g.changelog = []
  90. store = current_app.rdfly.store
  91. if isinstance(store, LmdbStore):
  92. with TxnManager(store, write=write) as txn:
  93. ret = fn(*args, **kwargs)
  94. return ret
  95. else:
  96. try:
  97. ret = fn(*args, **kwargs)
  98. except:
  99. logger.warn('Rolling back transaction.')
  100. store.rollback()
  101. raise
  102. else:
  103. logger.info('Committing transaction.')
  104. #if hasattr(store, '_edits'):
  105. # # @FIXME ugly.
  106. # self.rdfly._conn.optimize_edits()
  107. store.commit()
  108. return ret
  109. # @TODO re-enable, maybe leave out the delta part
  110. #for ev in g.changelog:
  111. # #self._logger.info('Message: {}'.format(pformat(ev)))
  112. # send_event_msg(*ev)
  113. return _wrapper
  114. return _transaction_deco
  115. def send_msg(self, ev_type, remove_trp=None, add_trp=None):
  116. '''
  117. Sent a message about a changed (created, modified, deleted) resource.
  118. '''
  119. try:
  120. type = self.types
  121. actor = self.metadata.value(nsc['fcrepo'].createdBy)
  122. except (ResourceNotExistsError, TombstoneError):
  123. type = set()
  124. actor = None
  125. for t in add_trp:
  126. if t[1] == RDF.type:
  127. type.add(t[2])
  128. elif actor is None and t[1] == nsc['fcrepo'].createdBy:
  129. actor = t[2]
  130. g.changelog.append((set(remove_trp), set(add_trp), {
  131. 'ev_type' : ev_type,
  132. 'time' : g.timestamp,
  133. 'type' : type,
  134. 'actor' : actor,
  135. }))
  136. ## REST SERVICES ##
  137. @ldp.route('/<path:uid>', methods=['GET'], strict_slashes=False)
  138. @ldp.route('/', defaults={'uid': ''}, methods=['GET'], strict_slashes=False)
  139. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  140. methods=['GET'])
  141. @transaction()
  142. def get_resource(uid, force_rdf=False):
  143. '''
  144. Retrieve RDF or binary content.
  145. @param uid (string) UID of resource to retrieve. The repository root has
  146. an empty string for UID.
  147. @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
  148. a LDP-NR. This is not available in the API but is used e.g. by the
  149. `*/fcr:metadata` endpoint. The default is False.
  150. '''
  151. out_headers = std_headers
  152. repr_options = defaultdict(dict)
  153. if 'prefer' in request.headers:
  154. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  155. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  156. if 'return' in prefer:
  157. repr_options = parse_repr_options(prefer['return'])
  158. try:
  159. rsrc = LdpFactory.from_stored(uid, repr_options)
  160. except ResourceNotExistsError as e:
  161. return str(e), 404
  162. except TombstoneError as e:
  163. return _tombstone_response(e, uid)
  164. else:
  165. out_headers.update(rsrc.head())
  166. if (
  167. isinstance(rsrc, LdpRs)
  168. or isinstance(rsrc, PathSegment)
  169. or is_accept_hdr_rdf_parsable()
  170. or force_rdf):
  171. rsp = rsrc.get()
  172. return negotiate_content(rsp, out_headers)
  173. else:
  174. logger.info('Streaming out binary content.')
  175. rsp = make_response(send_file(rsrc.local_path, as_attachment=True,
  176. attachment_filename=rsrc.filename, mimetype=rsrc.mimetype))
  177. rsp.headers['Link'] = '<{}/fcr:metadata>; rel="describedby"'\
  178. .format(rsrc.uri)
  179. return rsp
  180. @ldp.route('/<path:parent>', methods=['POST'], strict_slashes=False)
  181. @ldp.route('/', defaults={'parent': ''}, methods=['POST'],
  182. strict_slashes=False)
  183. def post_resource(parent):
  184. '''
  185. Add a new resource in a new URI.
  186. '''
  187. out_headers = std_headers
  188. try:
  189. slug = request.headers['Slug']
  190. logger.info('Slug: {}'.format(slug))
  191. except KeyError:
  192. slug = None
  193. handling, disposition = set_post_put_params()
  194. stream, mimetype = bitstream_from_req()
  195. try:
  196. with TxnManager(g.store, True):
  197. uid = LdpFactory.mint_uid(parent, slug)
  198. logger.debug('Generated UID for POST: {}'.format(uid))
  199. rsrc = LdpFactory.from_provided(
  200. uid, content_length=request.content_length,
  201. stream=stream, mimetype=mimetype, handling=handling,
  202. disposition=disposition)
  203. rsrc.post()
  204. except ResourceNotExistsError as e:
  205. return str(e), 404
  206. except InvalidResourceError as e:
  207. return str(e), 409
  208. except TombstoneError as e:
  209. return _tombstone_response(e, uid)
  210. except ServerManagedTermError as e:
  211. return str(e), 412
  212. hdr = {
  213. 'Location' : rsrc.uri,
  214. }
  215. if isinstance(rsrc, LdpNr):
  216. hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="<{0}>"'\
  217. .format(rsrc.uri)
  218. out_headers.update(hdr)
  219. return rsrc.uri, 201, out_headers
  220. @ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
  221. @transaction()
  222. def get_version_info(uid):
  223. '''
  224. Get version info (`fcr:versions`).
  225. '''
  226. try:
  227. rsp = Ldpr(uid).get_version_info()
  228. except ResourceNotExistsError as e:
  229. return str(e), 404
  230. except InvalidResourceError as e:
  231. return str(e), 409
  232. except TombstoneError as e:
  233. return _tombstone_response(e, uid)
  234. else:
  235. return negotiate_content(rsp)
  236. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
  237. @transaction()
  238. def get_version(uid, ver_uid):
  239. '''
  240. Get an individual resource version.
  241. @param uid (string) Resource UID.
  242. @param ver_uid (string) Version UID.
  243. '''
  244. try:
  245. rsp = Ldpr(uid).get_version(ver_uid)
  246. except ResourceNotExistsError as e:
  247. return str(e), 404
  248. except InvalidResourceError as e:
  249. return str(e), 409
  250. except TombstoneError as e:
  251. return _tombstone_response(e, uid)
  252. else:
  253. return negotiate_content(rsp)
  254. @ldp.route('/<path:uid>/fcr:versions', methods=['POST', 'PUT'])
  255. @transaction(True)
  256. def post_version(uid):
  257. '''
  258. Create a new resource version.
  259. '''
  260. if request.method == 'PUT':
  261. return 'Method not allowed.', 405
  262. ver_uid = request.headers.get('slug', None)
  263. try:
  264. ver_uri = LdpFactory.from_stored(uid).create_version(ver_uid)
  265. except ResourceNotExistsError as e:
  266. return str(e), 404
  267. except InvalidResourceError as e:
  268. return str(e), 409
  269. except TombstoneError as e:
  270. return _tombstone_response(e, uid)
  271. else:
  272. return '', 201, {'Location': ver_uri}
  273. @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['PATCH'])
  274. @transaction(True)
  275. def patch_version(uid, ver_uid):
  276. '''
  277. Revert to a previous version.
  278. NOTE: This creates a new version snapshot.
  279. @param uid (string) Resource UID.
  280. @param ver_uid (string) Version UID.
  281. '''
  282. try:
  283. LdpFactory.from_stored(uid).revert_to_version(ver_uid)
  284. except ResourceNotExistsError as e:
  285. return str(e), 404
  286. except InvalidResourceError as e:
  287. return str(e), 409
  288. except TombstoneError as e:
  289. return _tombstone_response(e, uid)
  290. else:
  291. return '', 204
  292. @ldp.route('/<path:uid>', methods=['PUT'], strict_slashes=False)
  293. @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
  294. methods=['PUT'])
  295. @transaction(True)
  296. def put_resource(uid):
  297. '''
  298. Add a new resource at a specified URI.
  299. '''
  300. # Parse headers.
  301. logger.info('Request headers: {}'.format(request.headers))
  302. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  303. handling, disposition = set_post_put_params()
  304. stream, mimetype = bitstream_from_req()
  305. try:
  306. rsrc = LdpFactory.from_provided(
  307. uid, content_length=request.content_length,
  308. stream=stream, mimetype=mimetype, handling=handling,
  309. disposition=disposition)
  310. if not request.content_length and rsrc.is_stored:
  311. raise InvalidResourceError(rsrc.uid,
  312. 'Resource {} already exists and no data set was provided.')
  313. except InvalidResourceError as e:
  314. return str(e), 409
  315. except (ServerManagedTermError, SingleSubjectError) as e:
  316. return str(e), 412
  317. except IncompatibleLdpTypeError as e:
  318. return str(e), 415
  319. try:
  320. ret = rsrc.put()
  321. rsp_headers.update(rsrc.head())
  322. except (InvalidResourceError, ResourceExistsError) as e:
  323. return str(e), 409
  324. except TombstoneError as e:
  325. return _tombstone_response(e, uid)
  326. if ret == Ldpr.RES_CREATED:
  327. rsp_code = 201
  328. rsp_headers['Location'] = rsp_body = rsrc.uri
  329. if isinstance(rsrc, LdpNr):
  330. rsp_headers['Link'] = '<{0}/fcr:metadata>; rel="describedby"'\
  331. .format(rsrc.uri)
  332. else:
  333. rsp_code = 204
  334. rsp_body = ''
  335. return rsp_body, rsp_code, rsp_headers
  336. @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
  337. @transaction(True)
  338. def patch_resource(uid):
  339. '''
  340. Update an existing resource with a SPARQL-UPDATE payload.
  341. '''
  342. rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
  343. rsrc = LdpRs(uid)
  344. if request.mimetype != 'application/sparql-update':
  345. return 'Provided content type is not a valid parsable format: {}'\
  346. .format(request.mimetype), 415
  347. try:
  348. rsrc.patch(request.get_data().decode('utf-8'))
  349. except ResourceNotExistsError as e:
  350. return str(e), 404
  351. except TombstoneError as e:
  352. return _tombstone_response(e, uid)
  353. except (ServerManagedTermError, SingleSubjectError) as e:
  354. return str(e), 412
  355. else:
  356. rsp_headers.update(rsrc.head())
  357. return '', 204, rsp_headers
  358. @ldp.route('/<path:uid>/fcr:metadata', methods=['PATCH'])
  359. @transaction(True)
  360. def patch_resource_metadata(uid):
  361. return patch_resource(uid)
  362. @ldp.route('/<path:uid>', methods=['DELETE'])
  363. @transaction(True)
  364. def delete_resource(uid):
  365. '''
  366. Delete a resource and optionally leave a tombstone.
  367. This behaves differently from FCREPO. A tombstone indicated that the
  368. resource is no longer available at its current location, but its historic
  369. snapshots still are. Also, deleting a resource with a tombstone creates
  370. one more version snapshot of the resource prior to being deleted.
  371. In order to completely wipe out all traces of a resource, the tombstone
  372. must be deleted as well, or the `Prefer:no-tombstone` header can be used.
  373. The latter will purge the resource immediately.
  374. '''
  375. headers = std_headers
  376. # If referential integrity is enforced, grab all inbound relationships
  377. # to break them.
  378. repr_opts = {'incl_inbound' : True} \
  379. if current_app.config['store']['ldp_rs']['referential_integrity'] \
  380. else {}
  381. if 'prefer' in request.headers:
  382. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  383. leave_tstone = 'no-tombstone' not in prefer
  384. else:
  385. leave_tstone = True
  386. try:
  387. LdpFactory.from_stored(uid, repr_opts).delete(
  388. leave_tstone=leave_tstone)
  389. except ResourceNotExistsError as e:
  390. return str(e), 404
  391. except TombstoneError as e:
  392. return _tombstone_response(e, uid)
  393. return '', 204, headers
  394. @ldp.route('/<path:uid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  395. 'PATCH', 'DELETE'])
  396. @transaction(True)
  397. def tombstone(uid):
  398. '''
  399. Handle all tombstone operations.
  400. The only allowed methods are POST and DELETE; any other verb will return a
  401. 405.
  402. '''
  403. logger.debug('Deleting tombstone for {}.'.format(uid))
  404. rsrc = Ldpr(uid)
  405. try:
  406. rsrc.metadata
  407. except TombstoneError as e:
  408. if request.method == 'DELETE':
  409. if e.uid == uid:
  410. rsrc.purge()
  411. return '', 204
  412. else:
  413. return _tombstone_response(e, uid)
  414. elif request.method == 'POST':
  415. if e.uid == uid:
  416. rsrc_uri = rsrc.resurrect()
  417. headers = {'Location' : rsrc_uri}
  418. return rsrc_uri, 201, headers
  419. else:
  420. return _tombstone_response(e, uid)
  421. else:
  422. return 'Method Not Allowed.', 405
  423. except ResourceNotExistsError as e:
  424. return str(e), 404
  425. else:
  426. return '', 404
  427. def negotiate_content(rsp, headers=None):
  428. '''
  429. Return HTML or serialized RDF depending on accept headers.
  430. '''
  431. if request.accept_mimetypes.best == 'text/html':
  432. rsrc = rsp.resource(request.path)
  433. return render_template(
  434. 'resource.html', rsrc=rsrc, nsm=nsm,
  435. blacklist = vw_blacklist)
  436. else:
  437. for p in vw_blacklist:
  438. rsp.remove((None, p, None))
  439. return (rsp.serialize(format='turtle'), headers)
  440. def bitstream_from_req():
  441. '''
  442. Find how a binary file and its MIMEtype were uploaded in the request.
  443. '''
  444. logger.debug('Content type: {}'.format(request.mimetype))
  445. logger.debug('files: {}'.format(request.files))
  446. logger.debug('stream: {}'.format(request.stream))
  447. if request.mimetype == 'multipart/form-data':
  448. # This seems the "right" way to upload a binary file, with a
  449. # multipart/form-data MIME type and the file in the `file`
  450. # field. This however is not supported by FCREPO4.
  451. stream = request.files.get('file').stream
  452. mimetype = request.files.get('file').content_type
  453. # @TODO This will turn out useful to provide metadata
  454. # with the binary.
  455. #metadata = request.files.get('metadata').stream
  456. #provided_imr = [parse RDF here...]
  457. else:
  458. # This is a less clean way, with the file in the form body and
  459. # the request as application/x-www-form-urlencoded.
  460. # This is how FCREPO4 accepts binary uploads.
  461. stream = request.stream
  462. mimetype = request.mimetype
  463. return stream, mimetype
  464. def _get_bitstream(rsrc):
  465. # @TODO This may change in favor of more low-level handling if the file
  466. # system is not local.
  467. return send_file(rsrc.local_path, as_attachment=True,
  468. attachment_filename=rsrc.filename)
  469. def _tombstone_response(e, uid):
  470. headers = {
  471. 'Link': '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  472. } if e.uid == uid else {}
  473. return str(e), 410, headers
  474. def set_post_put_params():
  475. '''
  476. Sets handling and content disposition for POST and PUT by parsing headers.
  477. '''
  478. handling = 'strict'
  479. if 'prefer' in request.headers:
  480. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  481. logger.debug('Parsed Prefer header: {}'.format(prefer))
  482. if 'handling' in prefer:
  483. handling = prefer['handling']['value']
  484. try:
  485. disposition = g.tbox.parse_rfc7240(
  486. request.headers['content-disposition'])
  487. except KeyError:
  488. disposition = None
  489. return handling, disposition
  490. def is_accept_hdr_rdf_parsable():
  491. '''
  492. Check if any of the 'Accept' header values provided is a RDF parsable
  493. format.
  494. '''
  495. for mimetype in request.accept_mimetypes.values():
  496. if LdpFactory.is_rdf_parsable(mimetype):
  497. return True
  498. return False
  499. def parse_repr_options(retr_opts):
  500. '''
  501. Set options to retrieve IMR.
  502. Ideally, IMR retrieval is done once per request, so all the options
  503. are set once in the `imr()` property.
  504. @param retr_opts (dict): Options parsed from `Prefer` header.
  505. '''
  506. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  507. imr_options = {}
  508. if retr_opts.get('value') == 'minimal':
  509. imr_options = {
  510. 'embed_children' : False,
  511. 'incl_children' : False,
  512. 'incl_inbound' : False,
  513. 'incl_srv_mgd' : False,
  514. }
  515. else:
  516. # Default.
  517. imr_options = {
  518. 'embed_children' : False,
  519. 'incl_children' : True,
  520. 'incl_inbound' : False,
  521. 'incl_srv_mgd' : True,
  522. }
  523. # Override defaults.
  524. if 'parameters' in retr_opts:
  525. include = retr_opts['parameters']['include'].split(' ') \
  526. if 'include' in retr_opts['parameters'] else []
  527. omit = retr_opts['parameters']['omit'].split(' ') \
  528. if 'omit' in retr_opts['parameters'] else []
  529. logger.debug('Include: {}'.format(include))
  530. logger.debug('Omit: {}'.format(omit))
  531. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  532. imr_options['embed_children'] = True
  533. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  534. imr_options['incl_children'] = False
  535. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  536. imr_options['incl_inbound'] = True
  537. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  538. imr_options['incl_srv_mgd'] = False
  539. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  540. return imr_options