ldp.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. import logging
  2. from collections import defaultdict
  3. from pprint import pformat
  4. from uuid import uuid4
  5. import arrow
  6. from flask import (Blueprint, current_app, g, render_template, request,
  7. send_file, url_for)
  8. from rdflib import Graph
  9. from rdflib.namespace import RDF, XSD
  10. from rdflib.term import Literal
  11. from werkzeug.datastructures import FileStorage
  12. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  13. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  14. from lakesuperior.exceptions import *
  15. from lakesuperior.model.ldpr import Ldpr
  16. from lakesuperior.model.ldp_nr import LdpNr
  17. from lakesuperior.model.ldp_rs import Ldpc, LdpDc, LdpIc, LdpRs
  18. logger = logging.getLogger(__name__)
  19. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  20. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  21. # for backward compatibility.
  22. ldp = Blueprint('ldp', __name__, template_folder='templates',
  23. static_url_path='/static', static_folder='../../static')
  24. accept_patch = (
  25. 'application/sparql-update',
  26. )
  27. accept_rdf = (
  28. 'application/ld+json',
  29. 'application/n-triples',
  30. 'application/rdf+xml',
  31. #'application/x-turtle',
  32. #'application/xhtml+xml',
  33. #'application/xml',
  34. #'text/html',
  35. 'text/n3',
  36. #'text/plain',
  37. 'text/rdf+n3',
  38. 'text/turtle',
  39. )
  40. std_headers = {
  41. 'Accept-Patch' : ','.join(accept_patch),
  42. 'Accept-Post' : ','.join(accept_rdf),
  43. #'Allow' : ','.join(allow),
  44. }
  45. @ldp.url_defaults
  46. def bp_url_defaults(endpoint, values):
  47. url_prefix = getattr(g, 'url_prefix', None)
  48. if url_prefix is not None:
  49. values.setdefault('url_prefix', url_prefix)
  50. @ldp.url_value_preprocessor
  51. def bp_url_value_preprocessor(endpoint, values):
  52. g.url_prefix = values.pop('url_prefix')
  53. g.webroot = request.host_url + g.url_prefix
  54. @ldp.before_request
  55. def log_request_start():
  56. logger.info('\n\n** Start {} {} **'.format(request.method, request.url))
  57. @ldp.before_request
  58. def instantiate_toolbox():
  59. g.tbox = Toolbox()
  60. @ldp.before_request
  61. def request_timestamp():
  62. g.timestamp = arrow.utcnow()
  63. g.timestamp_term = Literal(g.timestamp, datatype=XSD.dateTime)
  64. @ldp.after_request
  65. def log_request_end(rsp):
  66. logger.info('** End {} {} **\n\n'.format(request.method, request.url))
  67. return rsp
  68. ## REST SERVICES ##
  69. @ldp.route('/<path:uuid>', methods=['GET'], strict_slashes=False)
  70. @ldp.route('/', defaults={'uuid': None}, methods=['GET'], strict_slashes=False)
  71. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  72. methods=['GET'])
  73. def get_resource(uuid, force_rdf=False):
  74. '''
  75. Retrieve RDF or binary content.
  76. @param uuid (string) UUID of resource to retrieve.
  77. @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
  78. a LDP-NR. This is not available in the API but is used e.g. by the
  79. `*/fcr:metadata` endpoint. The default is False.
  80. '''
  81. out_headers = std_headers
  82. repr_options = defaultdict(dict)
  83. if 'prefer' in request.headers:
  84. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  85. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  86. if 'return' in prefer:
  87. repr_options = parse_repr_options(prefer['return'])
  88. try:
  89. rsrc = Ldpr.outbound_inst(uuid, repr_options)
  90. except ResourceNotExistsError as e:
  91. return str(e), 404
  92. except TombstoneError as e:
  93. return _tombstone_response(e, uuid)
  94. else:
  95. out_headers.update(rsrc.head())
  96. if isinstance(rsrc, LdpRs) \
  97. or is_accept_hdr_rdf_parsable() \
  98. or force_rdf:
  99. resp = rsrc.get()
  100. if request.accept_mimetypes.best == 'text/html':
  101. rsrc = resp.resource(request.path)
  102. return render_template('resource.html', rsrc=rsrc, nsm=nsm)
  103. else:
  104. return (resp.serialize(format='turtle'), out_headers)
  105. else:
  106. return send_file(rsrc.local_path, as_attachment=True,
  107. attachment_filename=rsrc.filename)
  108. logger.info('Streaming out binary content.')
  109. @ldp.route('/<path:parent>', methods=['POST'], strict_slashes=False)
  110. @ldp.route('/', defaults={'parent': None}, methods=['POST'],
  111. strict_slashes=False)
  112. def post_resource(parent):
  113. '''
  114. Add a new resource in a new URI.
  115. '''
  116. out_headers = std_headers
  117. try:
  118. slug = request.headers['Slug']
  119. logger.info('Slug: {}'.format(slug))
  120. except KeyError:
  121. slug = None
  122. handling, disposition = set_post_put_params()
  123. stream, mimetype = bitstream_from_req()
  124. try:
  125. uuid = uuid_for_post(parent, slug)
  126. logger.debug('Generated UUID for POST: {}'.format(uuid))
  127. rsrc = Ldpr.inbound_inst(uuid, content_length=request.content_length,
  128. stream=stream, mimetype=mimetype, handling=handling,
  129. disposition=disposition)
  130. except ResourceNotExistsError as e:
  131. return str(e), 404
  132. except InvalidResourceError as e:
  133. return str(e), 409
  134. except TombstoneError as e:
  135. return _tombstone_response(e, uuid)
  136. try:
  137. rsrc.post()
  138. except ServerManagedTermError as e:
  139. return str(e), 412
  140. out_headers.update({
  141. 'Location' : rsrc.uri,
  142. })
  143. return rsrc.uri, 201, out_headers
  144. @ldp.route('/<path:uuid>', methods=['PUT'], strict_slashes=False)
  145. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  146. methods=['PUT'])
  147. def put_resource(uuid):
  148. '''
  149. Add a new resource at a specified URI.
  150. '''
  151. # Parse headers.
  152. logger.info('Request headers: {}'.format(request.headers))
  153. rsp_headers = std_headers
  154. handling, disposition = set_post_put_params()
  155. stream, mimetype = bitstream_from_req()
  156. try:
  157. rsrc = Ldpr.inbound_inst(uuid, content_length=request.content_length,
  158. stream=stream, mimetype=mimetype, handling=handling,
  159. disposition=disposition)
  160. except InvalidResourceError as e:
  161. return str(e), 409
  162. except ServerManagedTermError as e:
  163. return str(e), 412
  164. except IncompatibleLdpTypeError as e:
  165. return str(e), 415
  166. try:
  167. ret = rsrc.put()
  168. except (InvalidResourceError, ResourceExistsError ) as e:
  169. return str(e), 409
  170. except TombstoneError as e:
  171. return _tombstone_response(e, uuid)
  172. if ret == Ldpr.RES_CREATED:
  173. rsp_code = 201
  174. rsp_headers['Location'] = rsp_body = rsrc.uri
  175. else:
  176. rsp_code = 204
  177. rsp_body = ''
  178. return rsp_body, rsp_code, rsp_headers
  179. @ldp.route('/<path:uuid>', methods=['PATCH'], strict_slashes=False)
  180. def patch_resource(uuid):
  181. '''
  182. Update an existing resource with a SPARQL-UPDATE payload.
  183. '''
  184. headers = std_headers
  185. rsrc = LdpRs(uuid)
  186. if request.mimetype != 'application/sparql-update':
  187. return 'Provided content type is not a valid parsable format: {}'\
  188. .format(request.mimetype), 415
  189. try:
  190. rsrc.patch(request.get_data().decode('utf-8'))
  191. except ResourceNotExistsError as e:
  192. return str(e), 404
  193. except TombstoneError as e:
  194. return _tombstone_response(e, uuid)
  195. except ServerManagedTermError as e:
  196. return str(e), 412
  197. return '', 204, headers
  198. @ldp.route('/<path:uuid>/fcr:metadata', methods=['PATCH'])
  199. def patch_resource_metadata(uuid):
  200. return patch_resource(uuid)
  201. @ldp.route('/<path:uuid>', methods=['DELETE'])
  202. def delete_resource(uuid):
  203. '''
  204. Delete a resource.
  205. '''
  206. headers = std_headers
  207. # If referential integrity is enforced, grab all inbound relationships
  208. # to break them.
  209. repr_opts = {'incl_inbound' : True} \
  210. if current_app.config['store']['ldp_rs']['referential_integrity'] \
  211. else {}
  212. if 'prefer' in request.headers:
  213. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  214. leave_tstone = 'no-tombstone' not in prefer
  215. else:
  216. leave_tstone = True
  217. try:
  218. Ldpr.outbound_inst(uuid, repr_opts).delete(leave_tstone=leave_tstone)
  219. except ResourceNotExistsError as e:
  220. return str(e), 404
  221. except TombstoneError as e:
  222. return _tombstone_response(e, uuid)
  223. return '', 204, headers
  224. @ldp.route('/<path:uuid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  225. 'PATCH', 'DELETE'])
  226. def tombstone(uuid):
  227. '''
  228. Handle all tombstone operations.
  229. The only allowed method is DELETE; any other verb will return a 405.
  230. '''
  231. logger.debug('Deleting tombstone for {}.'.format(uuid))
  232. rsrc = Ldpr(uuid)
  233. try:
  234. imr = rsrc.imr
  235. except TombstoneError as e:
  236. if request.method == 'DELETE':
  237. if e.uuid == uuid:
  238. rsrc.delete_tombstone()
  239. return '', 204
  240. else:
  241. return _tombstone_response(e, uuid)
  242. else:
  243. return 'Method Not Allowed.', 405
  244. except ResourceNotExistsError as e:
  245. return str(e), 404
  246. else:
  247. return '', 404
  248. def uuid_for_post(parent_uuid=None, slug=None):
  249. '''
  250. Validate conditions to perform a POST and return an LDP resource
  251. UUID for using with the `post` method.
  252. This may raise an exception resulting in a 404 if the parent is not
  253. found or a 409 if the parent is not a valid container.
  254. '''
  255. def split_if_legacy(uuid):
  256. if current_app.config['store']['ldp_rs']['legacy_ptree_split']:
  257. uuid = g.tbox.split_uuid(uuid)
  258. return uuid
  259. # Shortcut!
  260. if not slug and not parent_uuid:
  261. uuid = split_if_legacy(str(uuid4()))
  262. return uuid
  263. parent = Ldpr.outbound_inst(parent_uuid, repr_opts={'incl_children' : False})
  264. if nsc['fcrepo'].Pairtree in parent.types:
  265. raise InvalidResourceError(parent.uuid,
  266. 'Resources cannot be created under a pairtree.')
  267. # Set prefix.
  268. if parent_uuid:
  269. parent_types = { t.identifier for t in \
  270. parent.imr.objects(RDF.type) }
  271. logger.debug('Parent types: {}'.format(pformat(parent_types)))
  272. if nsc['ldp'].Container not in parent_types:
  273. raise InvalidResourceError('Parent {} is not a container.'
  274. .format(parent_uuid))
  275. pfx = parent_uuid + '/'
  276. else:
  277. pfx = ''
  278. # Create candidate UUID and validate.
  279. if slug:
  280. cnd_uuid = pfx + slug
  281. if current_app.rdfly.ask_rsrc_exists(nsc['fcres'][cnd_uuid]):
  282. uuid = pfx + split_if_legacy(str(uuid4()))
  283. else:
  284. uuid = cnd_uuid
  285. else:
  286. uuid = pfx + split_if_legacy(str(uuid4()))
  287. return uuid
  288. def bitstream_from_req():
  289. '''
  290. Find how a binary file and its MIMEtype were uploaded in the request.
  291. '''
  292. logger.debug('Content type: {}'.format(request.mimetype))
  293. logger.debug('files: {}'.format(request.files))
  294. logger.debug('stream: {}'.format(request.stream))
  295. if request.mimetype == 'multipart/form-data':
  296. # This seems the "right" way to upload a binary file, with a
  297. # multipart/form-data MIME type and the file in the `file`
  298. # field. This however is not supported by FCREPO4.
  299. stream = request.files.get('file').stream
  300. mimetype = request.files.get('file').content_type
  301. # @TODO This will turn out useful to provide metadata
  302. # with the binary.
  303. #metadata = request.files.get('metadata').stream
  304. #provided_imr = [parse RDF here...]
  305. else:
  306. # This is a less clean way, with the file in the form body and
  307. # the request as application/x-www-form-urlencoded.
  308. # This is how FCREPO4 accepts binary uploads.
  309. stream = request.stream
  310. mimetype = request.mimetype
  311. return stream, mimetype
  312. def _get_bitstream(rsrc):
  313. out_headers = std_headers
  314. # @TODO This may change in favor of more low-level handling if the file
  315. # system is not local.
  316. return send_file(rsrc.local_path, as_attachment=True,
  317. attachment_filename=rsrc.filename)
  318. def _tombstone_response(e, uuid):
  319. headers = {
  320. 'Link' : '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  321. } if e.uuid == uuid else {}
  322. return str(e), 410, headers
  323. def set_post_put_params():
  324. '''
  325. Sets handling and content disposition for POST and PUT by parsing headers.
  326. '''
  327. handling = None
  328. if 'prefer' in request.headers:
  329. prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
  330. logger.debug('Parsed Prefer header: {}'.format(prefer))
  331. if 'handling' in prefer:
  332. handling = prefer['handling']['value']
  333. try:
  334. disposition = g.tbox.parse_rfc7240(
  335. request.headers['content-disposition'])
  336. except KeyError:
  337. disposition = None
  338. return handling, disposition
  339. def is_accept_hdr_rdf_parsable():
  340. '''
  341. Check if any of the 'Accept' header values provided is a RDF parsable
  342. format.
  343. '''
  344. for mimetype in request.accept_mimetypes.values():
  345. if Ldpr.is_rdf_parsable(mimetype):
  346. return True
  347. return False
  348. def parse_repr_options(retr_opts):
  349. '''
  350. Set options to retrieve IMR.
  351. Ideally, IMR retrieval is done once per request, so all the options
  352. are set once in the `imr()` property.
  353. @param retr_opts (dict): Options parsed from `Prefer` header.
  354. '''
  355. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  356. imr_options = {}
  357. if retr_opts.setdefault('value') == 'minimal':
  358. imr_options = {
  359. 'embed_children' : False,
  360. 'incl_children' : False,
  361. 'incl_inbound' : False,
  362. 'incl_srv_mgd' : False,
  363. }
  364. else:
  365. # Default.
  366. imr_options = {
  367. 'embed_children' : False,
  368. 'incl_children' : True,
  369. 'incl_inbound' : False,
  370. 'incl_srv_mgd' : True,
  371. }
  372. # Override defaults.
  373. if 'parameters' in retr_opts:
  374. include = retr_opts['parameters']['include'].split(' ') \
  375. if 'include' in retr_opts['parameters'] else []
  376. omit = retr_opts['parameters']['omit'].split(' ') \
  377. if 'omit' in retr_opts['parameters'] else []
  378. logger.debug('Include: {}'.format(include))
  379. logger.debug('Omit: {}'.format(omit))
  380. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  381. imr_options['embed_children'] = True
  382. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  383. imr_options['incl_children'] = False
  384. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  385. imr_options['incl_inbound'] = True
  386. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  387. imr_options['incl_srv_mgd'] = False
  388. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  389. return imr_options