ldp.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. import logging
  2. from collections import defaultdict
  3. from pprint import pformat
  4. from uuid import uuid4
  5. import arrow
  6. from flask import Blueprint, current_app, g, request, send_file, url_for
  7. from rdflib import Graph
  8. from rdflib.namespace import RDF, XSD
  9. from rdflib.term import Literal
  10. from werkzeug.datastructures import FileStorage
  11. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  12. from lakesuperior.exceptions import *
  13. from lakesuperior.model.ldpr import Ldpr
  14. from lakesuperior.model.ldp_nr import LdpNr
  15. from lakesuperior.model.ldp_rs import Ldpc, LdpDc, LdpIc, LdpRs
  16. from lakesuperior.toolbox import Toolbox
  17. logger = logging.getLogger(__name__)
  18. # Blueprint for LDP REST API. This is what is usually found under `/rest/` in
  19. # standard fcrepo4. Here, it is under `/ldp` but initially `/rest` can be kept
  20. # for backward compatibility.
  21. ldp = Blueprint('ldp', __name__)
  22. accept_patch = (
  23. 'application/sparql-update',
  24. )
  25. accept_rdf = (
  26. 'application/ld+json',
  27. 'application/n-triples',
  28. 'application/rdf+xml',
  29. #'application/x-turtle',
  30. #'application/xhtml+xml',
  31. #'application/xml',
  32. #'text/html',
  33. 'text/n3',
  34. #'text/plain',
  35. 'text/rdf+n3',
  36. 'text/turtle',
  37. )
  38. #allow = (
  39. # 'COPY',
  40. # 'DELETE',
  41. # 'GET',
  42. # 'HEAD',
  43. # 'MOVE',
  44. # 'OPTIONS',
  45. # 'PATCH',
  46. # 'POST',
  47. # 'PUT',
  48. #)
  49. std_headers = {
  50. 'Accept-Patch' : ','.join(accept_patch),
  51. 'Accept-Post' : ','.join(accept_rdf),
  52. #'Allow' : ','.join(allow),
  53. }
  54. @ldp.url_defaults
  55. def bp_url_defaults(endpoint, values):
  56. url_prefix = getattr(g, 'url_prefix', None)
  57. if url_prefix is not None:
  58. values.setdefault('url_prefix', url_prefix)
  59. @ldp.url_value_preprocessor
  60. def bp_url_value_preprocessor(endpoint, values):
  61. g.url_prefix = values.pop('url_prefix')
  62. @ldp.before_request
  63. def request_timestamp():
  64. g.timestamp = arrow.utcnow()
  65. g.timestamp_term = Literal(g.timestamp, datatype=XSD.dateTime)
  66. ## REST SERVICES ##
  67. @ldp.route('/<path:uuid>', methods=['GET'], strict_slashes=False)
  68. @ldp.route('/', defaults={'uuid': None}, methods=['GET'], strict_slashes=False)
  69. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  70. methods=['GET'])
  71. def get_resource(uuid, force_rdf=False):
  72. '''
  73. Retrieve RDF or binary content.
  74. @param uuid (string) UUID of resource to retrieve.
  75. @param force_rdf (boolean) Whether to retrieve RDF even if the resource is
  76. a LDP-NR. This is not available in the API but is used e.g. by the
  77. `*/fcr:metadata` endpoint. The default is False.
  78. '''
  79. out_headers = std_headers
  80. repr_options = defaultdict(dict)
  81. if 'prefer' in request.headers:
  82. prefer = Toolbox().parse_rfc7240(request.headers['prefer'])
  83. logger.debug('Parsed Prefer header: {}'.format(pformat(prefer)))
  84. if 'return' in prefer:
  85. repr_options = parse_repr_options(prefer['return'])
  86. try:
  87. rsrc = Ldpr.outbound_inst(uuid, repr_options)
  88. except ResourceNotExistsError as e:
  89. return str(e), 404
  90. except TombstoneError as e:
  91. return _tombstone_response(e, uuid)
  92. else:
  93. out_headers.update(rsrc.head())
  94. if isinstance(rsrc, LdpRs) \
  95. or is_accept_hdr_rdf_parsable() \
  96. or force_rdf:
  97. return (rsrc.get(), out_headers)
  98. else:
  99. return send_file(rsrc.local_path, as_attachment=True,
  100. attachment_filename=rsrc.filename)
  101. @ldp.route('/<path:parent>', methods=['POST'], strict_slashes=False)
  102. @ldp.route('/', defaults={'parent': None}, methods=['POST'],
  103. strict_slashes=False)
  104. def post_resource(parent):
  105. '''
  106. Add a new resource in a new URI.
  107. '''
  108. out_headers = std_headers
  109. try:
  110. slug = request.headers['Slug']
  111. logger.info('Slug: {}'.format(slug))
  112. except KeyError:
  113. slug = None
  114. handling, disposition = set_post_put_params()
  115. stream, mimetype = bitstream_from_req()
  116. try:
  117. uuid = uuid_for_post(parent, slug)
  118. logger.debug('Generated UUID for POST: {}'.format(uuid))
  119. rsrc = Ldpr.inbound_inst(uuid, content_length=request.content_length,
  120. stream=stream, mimetype=mimetype, handling=handling,
  121. disposition=disposition)
  122. except ResourceNotExistsError as e:
  123. return str(e), 404
  124. except InvalidResourceError as e:
  125. return str(e), 409
  126. except TombstoneError as e:
  127. return _tombstone_response(e, uuid)
  128. try:
  129. rsrc.post()
  130. except ServerManagedTermError as e:
  131. return str(e), 412
  132. out_headers.update({
  133. 'Location' : rsrc.uri,
  134. })
  135. return rsrc.uri, 201, out_headers
  136. @ldp.route('/<path:uuid>', methods=['PUT'], strict_slashes=False)
  137. @ldp.route('/<path:uuid>/fcr:metadata', defaults={'force_rdf' : True},
  138. methods=['PUT'])
  139. def put_resource(uuid):
  140. '''
  141. Add a new resource at a specified URI.
  142. '''
  143. # Parse headers.
  144. logger.info('Request headers: {}'.format(request.headers))
  145. rsp_headers = std_headers
  146. handling, disposition = set_post_put_params()
  147. stream, mimetype = bitstream_from_req()
  148. try:
  149. rsrc = Ldpr.inbound_inst(uuid, content_length=request.content_length,
  150. stream=stream, mimetype=mimetype, handling=handling,
  151. disposition=disposition)
  152. except InvalidResourceError as e:
  153. return str(e), 409
  154. except ServerManagedTermError as e:
  155. return str(e), 412
  156. except IncompatibleLdpTypeError as e:
  157. return str(e), 415
  158. try:
  159. ret = rsrc.put()
  160. except (InvalidResourceError, ResourceExistsError ) as e:
  161. return str(e), 409
  162. except TombstoneError as e:
  163. return _tombstone_response(e, uuid)
  164. res_code = 201 if ret == Ldpr.RES_CREATED else 204
  165. return rsrc.uri, res_code, rsp_headers
  166. @ldp.route('/<path:uuid>', methods=['PATCH'], strict_slashes=False)
  167. def patch_resource(uuid):
  168. '''
  169. Update an existing resource with a SPARQL-UPDATE payload.
  170. '''
  171. headers = std_headers
  172. rsrc = LdpRs(uuid)
  173. if request.mimetype != 'application/sparql-update':
  174. return 'Provided content type is not a valid parsable format: {}'\
  175. .format(request.mimetype), 415
  176. try:
  177. rsrc.patch(request.get_data().decode('utf-8'))
  178. except ResourceNotExistsError as e:
  179. return str(e), 404
  180. except TombstoneError as e:
  181. return _tombstone_response(e, uuid)
  182. except ServerManagedTermError as e:
  183. return str(e), 412
  184. return '', 204, headers
  185. @ldp.route('/<path:uuid>/fcr:metadata', methods=['PATCH'])
  186. def patch_resource_metadata(uuid):
  187. return patch_resource(uuid)
  188. @ldp.route('/<path:uuid>', methods=['DELETE'])
  189. def delete_resource(uuid):
  190. '''
  191. Delete a resource.
  192. '''
  193. headers = std_headers
  194. # If referential integrity is enforced, grab all inbound relationships
  195. # to break them.
  196. repr_opts = {'incl_inbound' : True} \
  197. if current_app.config['store']['ldp_rs']['referential_integrity'] \
  198. else {}
  199. if 'prefer' in request.headers:
  200. prefer = Toolbox().parse_rfc7240(request.headers['prefer'])
  201. leave_tstone = 'no-tombstone' not in prefer
  202. else:
  203. leave_tstone = True
  204. try:
  205. Ldpr.outbound_inst(uuid, repr_opts).delete(leave_tstone=leave_tstone)
  206. except ResourceNotExistsError as e:
  207. return str(e), 404
  208. except TombstoneError as e:
  209. return _tombstone_response(e, uuid)
  210. return '', 204, headers
  211. @ldp.route('/<path:uuid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
  212. 'PATCH', 'DELETE'])
  213. def tombstone(uuid):
  214. '''
  215. Handle all tombstone operations.
  216. The only allowed method is DELETE; any other verb will return a 405.
  217. '''
  218. logger.debug('Deleting tombstone for {}.'.format(uuid))
  219. rsrc = Ldpr(uuid)
  220. try:
  221. imr = rsrc.imr
  222. except TombstoneError as e:
  223. if request.method == 'DELETE':
  224. if e.uuid == uuid:
  225. rsrc.delete_tombstone()
  226. return '', 204
  227. else:
  228. return _tombstone_response(e, uuid)
  229. else:
  230. return 'Method Not Allowed.', 405
  231. except ResourceNotExistsError as e:
  232. return str(e), 404
  233. else:
  234. return '', 404
  235. def uuid_for_post(parent_uuid=None, slug=None):
  236. '''
  237. Validate conditions to perform a POST and return an LDP resource
  238. UUID for using with the `post` method.
  239. This may raise an exception resulting in a 404 if the parent is not
  240. found or a 409 if the parent is not a valid container.
  241. '''
  242. def split_if_legacy(uuid):
  243. if current_app.config['store']['ldp_rs']['legacy_ptree_split']:
  244. uuid = Toolbox().split_uuid(uuid)
  245. return uuid
  246. # Shortcut!
  247. if not slug and not parent_uuid:
  248. uuid = split_if_legacy(str(uuid4()))
  249. return uuid
  250. parent = Ldpr.outbound_inst(parent_uuid, repr_opts={'incl_children' : False})
  251. if nsc['fcrepo'].Pairtree in parent.types:
  252. raise InvalidResourceError(parent.uuid,
  253. 'Resources cannot be created under a pairtree.')
  254. # Set prefix.
  255. if parent_uuid:
  256. parent_types = { t.identifier for t in \
  257. parent.imr.objects(RDF.type) }
  258. logger.debug('Parent types: {}'.format(pformat(parent_types)))
  259. if nsc['ldp'].Container not in parent_types:
  260. raise InvalidResourceError('Parent {} is not a container.'
  261. .format(parent_uuid))
  262. pfx = parent_uuid + '/'
  263. else:
  264. pfx = ''
  265. # Create candidate UUID and validate.
  266. if slug:
  267. cnd_uuid = pfx + slug
  268. if current_app.rdfly.ask_rsrc_exists(nsc['fcres'][cnd_uuid]):
  269. uuid = pfx + split_if_legacy(str(uuid4()))
  270. else:
  271. uuid = cnd_uuid
  272. else:
  273. uuid = pfx + split_if_legacy(str(uuid4()))
  274. return uuid
  275. def bitstream_from_req():
  276. '''
  277. Find how a binary file and its MIMEtype were uploaded in the request.
  278. '''
  279. logger.debug('Content type: {}'.format(request.mimetype))
  280. logger.debug('files: {}'.format(request.files))
  281. logger.debug('stream: {}'.format(request.stream))
  282. if request.mimetype == 'multipart/form-data':
  283. # This seems the "right" way to upload a binary file, with a
  284. # multipart/form-data MIME type and the file in the `file`
  285. # field. This however is not supported by FCREPO4.
  286. stream = request.files.get('file').stream
  287. mimetype = request.files.get('file').content_type
  288. # @TODO This will turn out useful to provide metadata
  289. # with the binary.
  290. #metadata = request.files.get('metadata').stream
  291. #provided_imr = [parse RDF here...]
  292. else:
  293. # This is a less clean way, with the file in the form body and
  294. # the request as application/x-www-form-urlencoded.
  295. # This is how FCREPO4 accepts binary uploads.
  296. stream = request.stream
  297. mimetype = request.mimetype
  298. return stream, mimetype
  299. def _get_bitstream(rsrc):
  300. out_headers = std_headers
  301. # @TODO This may change in favor of more low-level handling if the file
  302. # system is not local.
  303. return send_file(rsrc.local_path, as_attachment=True,
  304. attachment_filename=rsrc.filename)
  305. def _tombstone_response(e, uuid):
  306. headers = {
  307. 'Link' : '<{}/fcr:tombstone>; rel="hasTombstone"'.format(request.url),
  308. } if e.uuid == uuid else {}
  309. return str(e), 410, headers
  310. def set_post_put_params():
  311. '''
  312. Sets handling and content disposition for POST and PUT by parsing headers.
  313. '''
  314. handling = None
  315. if 'prefer' in request.headers:
  316. prefer = Toolbox().parse_rfc7240(request.headers['prefer'])
  317. logger.debug('Parsed Prefer header: {}'.format(prefer))
  318. if 'handling' in prefer:
  319. handling = prefer['handling']['value']
  320. try:
  321. disposition = Toolbox().parse_rfc7240(
  322. request.headers['content-disposition'])
  323. except KeyError:
  324. disposition = None
  325. return handling, disposition
  326. def is_accept_hdr_rdf_parsable():
  327. '''
  328. Check if any of the 'Accept' header values provided is a RDF parsable
  329. format.
  330. '''
  331. for mimetype in request.accept_mimetypes.values():
  332. if Ldpr.is_rdf_parsable(mimetype):
  333. return True
  334. return False
  335. def parse_repr_options(retr_opts):
  336. '''
  337. Set options to retrieve IMR.
  338. Ideally, IMR retrieval is done once per request, so all the options
  339. are set once in the `imr()` property.
  340. @param retr_opts (dict): Options parsed from `Prefer` header.
  341. '''
  342. logger.debug('Parsing retrieval options: {}'.format(retr_opts))
  343. imr_options = {}
  344. if retr_opts.setdefault('value') == 'minimal':
  345. imr_options = {
  346. 'embed_children' : False,
  347. 'incl_children' : False,
  348. 'incl_inbound' : False,
  349. 'incl_srv_mgd' : False,
  350. }
  351. else:
  352. # Default.
  353. imr_options = {
  354. 'embed_children' : False,
  355. 'incl_children' : True,
  356. 'incl_inbound' : False,
  357. 'incl_srv_mgd' : True,
  358. }
  359. # Override defaults.
  360. if 'parameters' in retr_opts:
  361. include = retr_opts['parameters']['include'].split(' ') \
  362. if 'include' in retr_opts['parameters'] else []
  363. omit = retr_opts['parameters']['omit'].split(' ') \
  364. if 'omit' in retr_opts['parameters'] else []
  365. logger.debug('Include: {}'.format(include))
  366. logger.debug('Omit: {}'.format(omit))
  367. if str(Ldpr.EMBED_CHILD_RES_URI) in include:
  368. imr_options['embed_children'] = True
  369. if str(Ldpr.RETURN_CHILD_RES_URI) in omit:
  370. imr_options['incl_children'] = False
  371. if str(Ldpr.RETURN_INBOUND_REF_URI) in include:
  372. imr_options['incl_inbound'] = True
  373. if str(Ldpr.RETURN_SRV_MGD_RES_URI) in omit:
  374. imr_options['incl_srv_mgd'] = False
  375. logger.debug('Retrieval options: {}'.format(pformat(imr_options)))
  376. return imr_options