ldp.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. import logging
  2. from collections import defaultdict
  3. from uuid import uuid4
  4. from flask import Blueprint, current_app, g, request, send_file, url_for
  5. from rdflib import Graph
  6. from rdflib.namespace import RDF, XSD
  7. from werkzeug.datastructures import FileStorage
  8. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  9. from lakesuperior.exceptions import *
  10. from lakesuperior.model.ldpr import Ldpr
  11. from lakesuperior.model.ldp_nr import LdpNr
  12. from lakesuperior.model.ldp_rs import Ldpc, LdpDc, LdpIc, LdpRs
  13. from lakesuperior.toolbox import Toolbox
  14. logger = logging.getLogger(__name__)
  15. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  16. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  17. # for backward compatibility.
  18. ldp = Blueprint('ldp', __name__)
  19. accept_patch = (
  20. 'application/sparql-update',
  21. )
  22. accept_rdf = (
  23. 'application/ld+json',
  24. 'application/n-triples',
  25. 'application/rdf+xml',
  26. #'application/x-turtle',
  27. #'application/xhtml+xml',
  28. #'application/xml',
  29. #'text/html',
  30. 'text/n3',
  31. #'text/plain',
  32. 'text/rdf+n3',
  33. 'text/turtle',
  34. )
  35. #allow = (
  36. # 'COPY',
  37. # 'DELETE',
  38. # 'GET',
  39. # 'HEAD',
  40. # 'MOVE',
  41. # 'OPTIONS',
  42. # 'PATCH',
  43. # 'POST',
  44. # 'PUT',
  45. #)
  46. std_headers = {
  47. 'Accept-Patch' : ','.join(accept_patch),
  48. 'Accept-Post' : ','.join(accept_rdf),
  49. #'Allow' : ','.join(allow),
  50. }
  51. @ldp.url_defaults
  52. def bp_url_defaults(endpoint, values):
  53. url_prefix = getattr(g, 'url_prefix', None)
  54. if url_prefix is not None:
  55. values.setdefault('url_prefix', url_prefix)
  56. @ldp.url_value_preprocessor
  57. def bp_url_value_preprocessor(endpoint, values):
  58. g.url_prefix = values.pop('url_prefix')
  59. ## REST SERVICES ##
  60. @ldp.route('/<path:uuid>', methods=['GET'])
  61. @ldp.route('/', defaults={'uuid': None}, methods=['GET'], strict_slashes=False)
  62. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  63. methods=['GET'])
  64. def get_resource(uuid, force_rdf=False):
  65. '''
  66. Retrieve RDF or binary content.
  67. @param uuid (string) UUID of resource to retrieve.
  68. @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
  69. a LDP-NR. This is not available in the API but is used e.g. by the
  70. `*/fcr:metadata` endpoint. The default is False.
  71. '''
  72. out_headers = std_headers
  73. repr_options = defaultdict(dict)
  74. if 'prefer' in request.headers:
  75. prefer = Toolbox().parse_rfc7240(request.headers['prefer'])
  76. logger.debug('Parsed Prefer header: {}'.format(prefer))
  77. if 'return' in prefer:
  78. repr_options = parse_repr_options(prefer['return'])
  79. try:
  80. rsrc = Ldpr.outbound_inst(uuid, repr_options)
  81. except ResourceNotExistsError as e:
  82. return str(e), 404
  83. except TombstoneError as e:
  84. return _tombstone_response(e, uuid)
  85. else:
  86. out_headers.update(rsrc.head())
  87. if isinstance(rsrc, LdpRs) \
  88. or is_accept_hdr_rdf_parsable() \
  89. or force_rdf:
  90. return (rsrc.get(), out_headers)
  91. else:
  92. return send_file(rsrc.local_path, as_attachment=True,
  93. attachment_filename=rsrc.filename)
  94. @ldp.route('/<path:parent>', methods=['POST'])
  95. @ldp.route('/', defaults={'parent': None}, methods=['POST'],
  96. strict_slashes=False)
  97. def post_resource(parent):
  98. '''
  99. Add a new resource in a new URI.
  100. '''
  101. out_headers = std_headers
  102. try:
  103. slug = request.headers['Slug']
  104. except KeyError:
  105. slug = None
  106. handling, disposition = set_post_put_params()
  107. stream, mimetype = bitstream_from_req()
  108. try:
  109. uuid = uuid_for_post(parent, slug)
  110. rsrc = Ldpr.inbound_inst(uuid, content_length=request.content_length,
  111. stream=stream, mimetype=mimetype, handling=handling,
  112. disposition=disposition)
  113. except ResourceNotExistsError as e:
  114. return str(e), 404
  115. except InvalidResourceError as e:
  116. return str(e), 409
  117. except TombstoneError as e:
  118. return _tombstone_response(e, uuid)
  119. try:
  120. rsrc.post()
  121. except ServerManagedTermError as e:
  122. return str(e), 412
  123. out_headers.update({
  124. 'Location' : rsrc.uri,
  125. })
  126. return rsrc.uri, 201, out_headers
  127. @ldp.route('/<path:uuid>', methods=['PUT'])
  128. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  129. methods=['PUT'])
  130. def put_resource(uuid):
  131. '''
  132. Add a new resource at a specified URI.
  133. '''
  134. # Parse headers.
  135. logger.info('Request headers: {}'.format(request.headers))
  136. rsp_headers = std_headers
  137. handling, disposition = set_post_put_params()
  138. stream, mimetype = bitstream_from_req()
  139. try:
  140. rsrc = Ldpr.inbound_inst(uuid, content_length=request.content_length,
  141. stream=stream, mimetype=mimetype, handling=handling,
  142. disposition=disposition)
  143. except ServerManagedTermError as e:
  144. return str(e), 412
  145. except IncompatibleLdpTypeError as e:
  146. return str(e), 415
  147. try:
  148. ret = rsrc.put()
  149. except (InvalidResourceError, ResourceExistsError ) as e:
  150. return str(e), 409
  151. except TombstoneError as e:
  152. return _tombstone_response(e, uuid)
  153. res_code = 201 if ret == Ldpr.RES_CREATED else 204
  154. return '', res_code, rsp_headers
  155. @ldp.route('/<path:uuid>', methods=['PATCH'])
  156. def patch_resource(uuid):
  157. '''
  158. Update an existing resource with a SPARQL-UPDATE payload.
  159. '''
  160. headers = std_headers
  161. rsrc = LdpRs(uuid)
  162. if request.mimetype != 'application/sparql-update':
  163. return 'Provided content type is not a valid parsable format: {}'\
  164. .format(request.mimetype), 415
  165. try:
  166. rsrc.patch(request.get_data().decode('utf-8'))
  167. except ResourceNotExistsError as e:
  168. return str(e), 404
  169. except TombstoneError as e:
  170. return _tombstone_response(e, uuid)
  171. except ServerManagedTermError as e:
  172. return str(e), 412
  173. return '', 204, headers
  174. @ldp.route('/<path:uuid>/fcr:metadata', methods=['PATCH'])
  175. def patch_resource_metadata(uuid):
  176. return patch_resource(uuid)
  177. @ldp.route('/<path:uuid>', methods=['DELETE'])
  178. def delete_resource(uuid):
  179. '''
  180. Delete a resource.
  181. '''
  182. headers = std_headers
  183. # If referential integrity is enforced, grab all inbound relationships
  184. # to break them.
  185. repr_opts = {'incl_inbound' : True} \
  186. if current_app.config['store']['ldp_rs']['referential_integrity'] \
  187. else {}
  188. if 'prefer' in request.headers:
  189. prefer = Toolbox().parse_rfc7240(request.headers['prefer'])
  190. leave_tstone = 'no-tombstone' not in prefer
  191. else:
  192. leave_tstone = True
  193. try:
  194. Ldpr.outbound_inst(uuid, repr_opts).delete(leave_tstone=leave_tstone)
  195. except ResourceNotExistsError as e:
  196. return str(e), 404
  197. except TombstoneError as e:
  198. return _tombstone_response(e, uuid)
  199. return '', 204, headers
  200. @ldp.route('/<path:uuid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  201. 'PATCH', 'DELETE'])
  202. def tombstone(uuid):
  203. '''
  204. Handle all tombstone operations.
  205. The only allowed method is DELETE; any other verb will return a 405.
  206. '''
  207. logger.debug('Deleting tombstone for {}.'.format(uuid))
  208. rsrc = Ldpr(uuid)
  209. try:
  210. imr = rsrc.imr
  211. except TombstoneError as e:
  212. if request.method == 'DELETE':
  213. if e.uuid == uuid:
  214. rsrc.delete_tombstone()
  215. return '', 204
  216. else:
  217. return _tombstone_response(e, uuid)
  218. else:
  219. return 'Method Not Allowed.', 405
  220. except ResourceNotExistsError as e:
  221. return str(e), 404
  222. else:
  223. return '', 404
  224. def uuid_for_post(parent_uuid=None, slug=None):
  225. '''
  226. Validate conditions to perform a POST and return an LDP resource
  227. UUID for using with the `post` method.
  228. This may raise an exception resulting in a 404 if the parent is not
  229. found or a 409 if the parent is not a valid container.
  230. '''
  231. # Shortcut!
  232. if not slug and not parent_uuid:
  233. return str(uuid4())
  234. parent = Ldpr.outbound_inst(parent_uuid, repr_opts={'incl_children' : False})
  235. # Set prefix.
  236. if parent_uuid:
  237. parent_types = { t.identifier for t in \
  238. parent.imr.objects(RDF.type) }
  239. logger.debug('Parent types: {}'.format(
  240. parent_types))
  241. if nsc['ldp'].Container not in parent_types:
  242. raise InvalidResourceError('Parent {} is not a container.'
  243. .format(parent_uuid))
  244. pfx = parent_uuid + '/'
  245. else:
  246. pfx = ''
  247. # Create candidate UUID and validate.
  248. if slug:
  249. cnd_uuid = pfx + slug
  250. if current_app.rdfly.ask_rsrc_exists(nsc['fcres'][cnd_uuid]):
  251. uuid = pfx + str(uuid4())
  252. else:
  253. uuid = cnd_uuid
  254. else:
  255. uuid = pfx + str(uuid4())
  256. return uuid
  257. def bitstream_from_req():
  258. '''
  259. Find how a binary file and its MIMEtype were uploaded in the request.
  260. '''
  261. logger.debug('Content type: {}'.format(request.mimetype))
  262. logger.debug('files: {}'.format(request.files))
  263. logger.debug('stream: {}'.format(request.stream))
  264. if request.mimetype == 'multipart/form-data':
  265. # This seems the "right" way to upload a binary file, with a
  266. # multipart/form-data MIME type and the file in the `file`
  267. # field. This however is not supported by FCREPO4.
  268. stream = request.files.get('file').stream
  269. mimetype = request.files.get('file').content_type
  270. # @TODO This will turn out useful to provide metadata
  271. # with the binary.
  272. #metadata = request.files.get('metadata').stream
  273. #provided_imr = [parse RDF here...]
  274. else:
  275. # This is a less clean way, with the file in the form body and
  276. # the request as application/x-www-form-urlencoded.
  277. # This is how FCREPO4 accepts binary uploads.
  278. stream = request.stream
  279. mimetype = request.mimetype
  280. return stream, mimetype
  281. def _get_bitstream(rsrc):
  282. out_headers = std_headers
  283. # @TODO This may change in favor of more low-level handling if the file
  284. # system is not local.
  285. return send_file(rsrc.local_path, as_attachment=True,
  286. attachment_filename=rsrc.filename)
  287. def _tombstone_response(e, uuid):
  288. headers = {
  289. 'Link' : '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  290. } if e.uuid == uuid else {}
  291. return str(e), 410, headers
  292. def set_post_put_params():
  293. '''
  294. Sets handling and content disposition for POST and PUT by parsing headers.
  295. '''
  296. handling = None
  297. if 'prefer' in request.headers:
  298. prefer = Toolbox().parse_rfc7240(request.headers['prefer'])
  299. logger.debug('Parsed Prefer header: {}'.format(prefer))
  300. if 'handling' in prefer:
  301. handling = prefer['handling']['value']
  302. try:
  303. disposition = Toolbox().parse_rfc7240(
  304. request.headers['content-disposition'])
  305. except KeyError:
  306. disposition = None
  307. return handling, disposition
  308. def is_accept_hdr_rdf_parsable():
  309. '''
  310. Check if any of the 'Accept' header values provided is a RDF parsable
  311. format.
  312. '''
  313. for mimetype in request.accept_mimetypes.values():
  314. if Ldpr.is_rdf_parsable(mimetype):
  315. return True
  316. return False
  317. def parse_repr_options(retr_opts):
  318. '''
  319. Set options to retrieve IMR.
  320. Ideally, IMR retrieval is done once per request, so all the options
  321. are set once in the `imr()` property.
  322. @param retr_opts (dict): Options parsed from `Prefer` header.
  323. '''
  324. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  325. imr_options = {}
  326. if retr_opts.setdefault('value') == 'minimal':
  327. imr_options = {
  328. 'embed_children' : False,
  329. 'incl_children' : False,
  330. 'incl_inbound' : False,
  331. 'incl_srv_mgd' : False,
  332. }
  333. else:
  334. # Default.
  335. imr_options = {
  336. 'embed_children' : False,
  337. 'incl_children' : True,
  338. 'incl_inbound' : False,
  339. 'incl_srv_mgd' : True,
  340. }
  341. # Override defaults.
  342. if 'parameters' in retr_opts:
  343. include = retr_opts['parameters']['include'].split(' ') \
  344. if 'include' in retr_opts['parameters'] else []
  345. omit = retr_opts['parameters']['omit'].split(' ') \
  346. if 'omit' in retr_opts['parameters'] else []
  347. logger.debug('Include: {}'.format(include))
  348. logger.debug('Omit: {}'.format(omit))
  349. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  350. imr_options['embed_children'] = True
  351. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  352. imr_options['incl_children'] = False
  353. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  354. imr_options['incl_inbound'] = True
  355. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  356. imr_options['incl_srv_mgd'] = False
  357. logger.debug('Retrieval options: {}'.format(imr_options))
  358. return imr_options