ldp_rs.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. from copy import deepcopy
  2. import arrow
  3. from flask import current_app
  4. from rdflib import Graph
  5. from rdflib.resource import Resource
  6. from rdflib.namespace import RDF, XSD
  7. from rdflib.plugins.sparql.parser import parseUpdate
  8. from rdflib.term import URIRef, Literal, Variable
  9. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  10. from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \
  11. srv_mgd_predicates, srv_mgd_types
  12. from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
  13. from lakesuperior.exceptions import ResourceNotExistsError, \
  14. ServerManagedTermError, SingleSubjectError
  15. from lakesuperior.toolbox import Toolbox
  16. class LdpRs(Ldpr):
  17. '''LDP-RS (LDP RDF source).
  18. Definition: https://www.w3.org/TR/ldp/#ldprs
  19. '''
  20. DEFAULT_USER = Literal('BypassAdmin')
  21. base_types = {
  22. nsc['fcrepo'].Resource,
  23. nsc['ldp'].Resource,
  24. nsc['ldp'].RDFSource,
  25. }
  26. ## LDP METHODS ##
  27. def get(self, repr_opts):
  28. '''
  29. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  30. '''
  31. return Toolbox().globalize_rsrc(self.imr)
  32. @transactional
  33. def post(self, data, format='text/turtle', handling=None):
  34. '''
  35. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  36. Perform a POST action after a valid resource URI has been found.
  37. '''
  38. return self._create_or_update_rsrc(data, format, handling,
  39. create_only=True)
  40. @transactional
  41. def put(self, data, format='text/turtle', handling=None):
  42. '''
  43. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  44. '''
  45. return self._create_or_update_rsrc(data, format, handling)
  46. @transactional
  47. @must_exist
  48. def patch(self, update_str):
  49. '''
  50. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  51. Update an existing resource by applying a SPARQL-UPDATE query.
  52. @param update_str (string) SPARQL-Update staements.
  53. '''
  54. delta = self._sparql_delta(update_str)
  55. return self.rdfly.modify_dataset(*delta)
  56. ## PROTECTED METHODS ##
  57. def _create_or_update_rsrc(self, data, format, handling,
  58. create_only=False):
  59. '''
  60. Create or update a resource. PUT and POST methods, which are almost
  61. identical, are wrappers for this method.
  62. @param data (string) RDF data to parse for insertion.
  63. @param format(string) MIME type of RDF data.
  64. @param handling (sting) One of `strict` or `lenient`. This determines
  65. how to handle provided server-managed triples. If `strict` is selected,
  66. any server-managed triple included in the input RDF will trigger an
  67. exception. If `lenient`, server-managed triples are ignored.
  68. @param create_only (boolean) Whether the operation is a create-only
  69. one (i.e. POST) or a create-or-update one (i.e. PUT).
  70. '''
  71. g = Graph()
  72. if data:
  73. g.parse(data=data, format=format, publicID=self.urn)
  74. self.provided_imr = Resource(self._check_mgd_terms(g, handling),
  75. self.urn)
  76. self._add_srv_mgd_triples(create=True)
  77. self._ensure_single_subject_rdf(self.provided_imr.graph)
  78. cnf = self.rdfly.conf['referential_integrity']
  79. if cnf != 'none':
  80. self._check_ref_int(cnf)
  81. if create_only:
  82. res = self.rdfly.create_rsrc(self.provided_imr)
  83. else:
  84. res = self.rdfly.create_or_replace_rsrc(self.provided_imr)
  85. self._set_containment_rel()
  86. return res
  87. def _check_mgd_terms(self, g, handling='strict'):
  88. '''
  89. Check whether server-managed terms are in a RDF payload.
  90. @param handling (string) One of `strict` (the default) or `lenient`.
  91. `strict` raises an error if a server-managed term is in the graph.
  92. `lenient` removes all sever-managed triples encountered.
  93. '''
  94. offending_subjects = set(g.subjects()) & srv_mgd_subjects
  95. if offending_subjects:
  96. if handling=='strict':
  97. raise ServerManagedTermError(offending_subjects, 's')
  98. else:
  99. for s in offending_subjects:
  100. self._logger.info('Removing offending subj: {}'.format(s))
  101. g.remove((s, None, None))
  102. offending_predicates = set(g.predicates()) & srv_mgd_predicates
  103. if offending_predicates:
  104. if handling=='strict':
  105. raise ServerManagedTermError(offending_predicates, 'p')
  106. else:
  107. for p in offending_predicates:
  108. self._logger.info('Removing offending pred: {}'.format(p))
  109. g.remove((None, p, None))
  110. offending_types = set(g.objects(predicate=RDF.type)) & srv_mgd_types
  111. if offending_types:
  112. if handling=='strict':
  113. raise ServerManagedTermError(offending_types, 't')
  114. else:
  115. for t in offending_types:
  116. self._logger.info('Removing offending type: {}'.format(t))
  117. g.remove((None, RDF.type, t))
  118. self._logger.debug('Sanitized graph: {}'.format(g.serialize(
  119. format='turtle').decode('utf-8')))
  120. return g
  121. def _add_srv_mgd_triples(self, create=False):
  122. '''
  123. Add server-managed triples to a resource.
  124. @param create (boolean) Whether the resource is being created.
  125. '''
  126. # Message digest.
  127. cksum = Toolbox().rdf_cksum(self.provided_imr.graph)
  128. self.provided_imr.set(nsc['premis'].hasMessageDigest,
  129. URIRef('urn:sha1:{}'.format(cksum)))
  130. # Create and modify timestamp.
  131. # @TODO Use gunicorn to get request timestamp.
  132. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  133. if create:
  134. self.provided_imr.set(nsc['fcrepo'].created, ts)
  135. self.provided_imr.set(nsc['fcrepo'].createdBy, self.DEFAULT_USER)
  136. self.provided_imr.set(nsc['fcrepo'].lastModified, ts)
  137. self.provided_imr.set(nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER)
  138. # Base LDP types.
  139. for t in self.base_types:
  140. self.provided_imr.add(RDF.type, t)
  141. def _sparql_delta(self, q, handling=None):
  142. '''
  143. Calculate the delta obtained by a SPARQL Update operation.
  144. This is a critical component of the SPARQL query prcess and does a
  145. couple of things:
  146. 1. It ensures that no resources outside of the subject of the request
  147. are modified (e.g. by variable subjects)
  148. 2. It verifies that none of the terms being modified is server managed.
  149. This method extracts an in-memory copy of the resource and performs the
  150. query on that once it has checked if any of the server managed terms is
  151. in the delta. If it is, it raises an exception.
  152. NOTE: This only checks if a server-managed term is effectively being
  153. modified. If a server-managed term is present in the query but does not
  154. cause any change in the updated resource, no error is raised.
  155. @return tuple Remove and add triples. These can be used with
  156. `BaseStoreLayout.update_resource` and/or recorded as separate events in
  157. a provenance tracking system.
  158. '''
  159. pre_g = self.imr.graph
  160. post_g = deepcopy(pre_g)
  161. post_g.update(q)
  162. remove = pre_g - post_g
  163. add = post_g - pre_g
  164. self._logger.info('Removing: {}'.format(
  165. remove.serialize(format='turtle').decode('utf8')))
  166. self._logger.info('Adding: {}'.format(
  167. add.serialize(format='turtle').decode('utf8')))
  168. remove = self._check_mgd_terms(remove, handling)
  169. add = self._check_mgd_terms(add, handling)
  170. return remove, add
  171. def _ensure_single_subject_rdf(self, g):
  172. '''
  173. Ensure that a RDF payload for a POST or PUT has a single resource.
  174. '''
  175. for s in set(g.subjects()):
  176. if not s == self.urn:
  177. raise SingleSubjectError(s, self.uuid)
  178. def _check_ref_int(self, config):
  179. g = self.provided_imr.graph
  180. for o in g.objects():
  181. if isinstance(o, URIRef) and str(o).startswith(Toolbox().base_url)\
  182. and not self.rdfly.ask_rsrc_exists(o):
  183. if config == 'strict':
  184. raise RefIntViolationError(o)
  185. else:
  186. self._logger.info(
  187. 'Removing link to non-existent repo resource: {}'
  188. .format(o))
  189. g.remove((None, None, o))
  190. class Ldpc(LdpRs):
  191. '''LDPC (LDP Container).'''
  192. def __init__(self, uuid, *args, **kwargs):
  193. super().__init__(uuid, *args, **kwargs)
  194. self.base_types.update({
  195. nsc['ldp'].Container,
  196. })
  197. class LdpBc(Ldpc):
  198. '''LDP-BC (LDP Basic Container).'''
  199. def __init__(self, uuid, *args, **kwargs):
  200. super().__init__(uuid, *args, **kwargs)
  201. self.base_types.update({
  202. nsc['ldp'].BasicContainer,
  203. })
  204. class LdpDc(Ldpc):
  205. '''LDP-DC (LDP Direct Container).'''
  206. def __init__(self, uuid, *args, **kwargs):
  207. super().__init__(uuid, *args, **kwargs)
  208. self.base_types.update({
  209. nsc['ldp'].DirectContainer,
  210. })
  211. class LdpIc(Ldpc):
  212. '''LDP-IC (LDP Indirect Container).'''
  213. def __init__(self, uuid, *args, **kwargs):
  214. super().__init__(uuid, *args, **kwargs)
  215. self.base_types.update({
  216. nsc['ldp'].IndirectContainer,
  217. })