ldp_rs.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. from copy import deepcopy
  2. import arrow
  3. from flask import current_app
  4. from rdflib import Graph
  5. from rdflib.resource import Resource
  6. from rdflib.namespace import RDF, XSD
  7. from rdflib.plugins.sparql.parser import parseUpdate
  8. from rdflib.term import URIRef, Literal, Variable
  9. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  10. from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \
  11. srv_mgd_predicates, srv_mgd_types
  12. from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
  13. from lakesuperior.exceptions import ResourceNotExistsError, \
  14. ServerManagedTermError, SingleSubjectError
  15. from lakesuperior.toolbox import Toolbox
  16. class LdpRs(Ldpr):
  17. '''LDP-RS (LDP RDF source).
  18. Definition: https://www.w3.org/TR/ldp/#ldprs
  19. '''
  20. DEFAULT_USER = Literal('BypassAdmin')
  21. base_types = {
  22. nsc['fcrepo'].Resource,
  23. nsc['ldp'].Resource,
  24. nsc['ldp'].RDFSource,
  25. }
  26. ## LDP METHODS ##
  27. def get(self, repr_opts):
  28. '''
  29. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  30. '''
  31. return Toolbox().globalize_rsrc(self.imr)
  32. @transactional
  33. def post(self, data, format='text/turtle', handling=None):
  34. '''
  35. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  36. Perform a POST action after a valid resource URI has been found.
  37. '''
  38. return self._create_or_update_rsrc(data, format, handling,
  39. create_only=True)
  40. @transactional
  41. def put(self, data, format='text/turtle', handling=None):
  42. '''
  43. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  44. '''
  45. return self._create_or_update_rsrc(data, format, handling)
  46. @transactional
  47. @must_exist
  48. def patch(self, update_str):
  49. '''
  50. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  51. Update an existing resource by applying a SPARQL-UPDATE query.
  52. @param update_str (string) SPARQL-Update staements.
  53. '''
  54. delta = self._sparql_delta(update_str.replace('<>', self.urn.n3()))
  55. return self.rdfly.modify_dataset(*delta)
  56. ## PROTECTED METHODS ##
  57. def _create_or_update_rsrc(self, data, format, handling,
  58. create_only=False):
  59. '''
  60. Create or update a resource. PUT and POST methods, which are almost
  61. identical, are wrappers for this method.
  62. @param data (string) RDF data to parse for insertion.
  63. @param format(string) MIME type of RDF data.
  64. @param handling (sting) One of `strict` or `lenient`. This determines
  65. how to handle provided server-managed triples. If `strict` is selected,
  66. any server-managed triple included in the input RDF will trigger an
  67. exception. If `lenient`, server-managed triples are ignored.
  68. @param create_only (boolean) Whether the operation is a create-only
  69. one (i.e. POST) or a create-or-update one (i.e. PUT).
  70. '''
  71. g = Graph()
  72. if data:
  73. g.parse(data=data, format=format, publicID=self.urn)
  74. self.provided_imr = Resource(self._check_mgd_terms(g, handling),
  75. self.urn)
  76. create = create_only or not self.is_stored
  77. self._add_srv_mgd_triples(create)
  78. self._ensure_single_subject_rdf(self.provided_imr.graph)
  79. ref_int = self.rdfly.conf['referential_integrity']
  80. if ref_int:
  81. self._check_ref_int(ref_int)
  82. if create:
  83. ev_type = self._create_rsrc()
  84. else:
  85. ev_type = self._replace_rsrc()
  86. self._set_containment_rel()
  87. return ev_type
  88. ## PROTECTED METHODS ##
  89. def _check_mgd_terms(self, g, handling='strict'):
  90. '''
  91. Check whether server-managed terms are in a RDF payload.
  92. @param handling (string) One of `strict` (the default) or `lenient`.
  93. `strict` raises an error if a server-managed term is in the graph.
  94. `lenient` removes all sever-managed triples encountered.
  95. '''
  96. offending_subjects = set(g.subjects()) & srv_mgd_subjects
  97. if offending_subjects:
  98. if handling=='strict':
  99. raise ServerManagedTermError(offending_subjects, 's')
  100. else:
  101. for s in offending_subjects:
  102. self._logger.info('Removing offending subj: {}'.format(s))
  103. g.remove((s, None, None))
  104. offending_predicates = set(g.predicates()) & srv_mgd_predicates
  105. if offending_predicates:
  106. if handling=='strict':
  107. raise ServerManagedTermError(offending_predicates, 'p')
  108. else:
  109. for p in offending_predicates:
  110. self._logger.info('Removing offending pred: {}'.format(p))
  111. g.remove((None, p, None))
  112. offending_types = set(g.objects(predicate=RDF.type)) & srv_mgd_types
  113. if offending_types:
  114. if handling=='strict':
  115. raise ServerManagedTermError(offending_types, 't')
  116. else:
  117. for t in offending_types:
  118. self._logger.info('Removing offending type: {}'.format(t))
  119. g.remove((None, RDF.type, t))
  120. self._logger.debug('Sanitized graph: {}'.format(g.serialize(
  121. format='turtle').decode('utf-8')))
  122. return g
  123. def _add_srv_mgd_triples(self, create=False):
  124. '''
  125. Add server-managed triples to a provided IMR.
  126. @param create (boolean) Whether the resource is being created.
  127. '''
  128. # Message digest.
  129. cksum = Toolbox().rdf_cksum(self.provided_imr.graph)
  130. self.provided_imr.set(nsc['premis'].hasMessageDigest,
  131. URIRef('urn:sha1:{}'.format(cksum)))
  132. # Create and modify timestamp.
  133. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  134. if create:
  135. self.provided_imr.set(nsc['fcrepo'].created, ts)
  136. self.provided_imr.set(nsc['fcrepo'].createdBy, self.DEFAULT_USER)
  137. self.provided_imr.set(nsc['fcrepo'].lastModified, ts)
  138. self.provided_imr.set(nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER)
  139. # Base LDP types.
  140. for t in self.base_types:
  141. self.provided_imr.add(RDF.type, t)
  142. def _sparql_delta(self, q, handling=None):
  143. '''
  144. Calculate the delta obtained by a SPARQL Update operation.
  145. This is a critical component of the SPARQL query prcess and does a
  146. couple of things:
  147. 1. It ensures that no resources outside of the subject of the request
  148. are modified (e.g. by variable subjects)
  149. 2. It verifies that none of the terms being modified is server managed.
  150. This method extracts an in-memory copy of the resource and performs the
  151. query on that once it has checked if any of the server managed terms is
  152. in the delta. If it is, it raises an exception.
  153. NOTE: This only checks if a server-managed term is effectively being
  154. modified. If a server-managed term is present in the query but does not
  155. cause any change in the updated resource, no error is raised.
  156. @return tuple Remove and add triples. These can be used with
  157. `BaseStoreLayout.update_resource` and/or recorded as separate events in
  158. a provenance tracking system.
  159. '''
  160. pre_g = self.imr.graph
  161. post_g = deepcopy(pre_g)
  162. post_g.update(q)
  163. remove = pre_g - post_g
  164. add = post_g - pre_g
  165. self._logger.info('Removing: {}'.format(
  166. remove.serialize(format='turtle').decode('utf8')))
  167. self._logger.info('Adding: {}'.format(
  168. add.serialize(format='turtle').decode('utf8')))
  169. remove = self._check_mgd_terms(remove, handling)
  170. add = self._check_mgd_terms(add, handling)
  171. return remove, add
  172. def _ensure_single_subject_rdf(self, g):
  173. '''
  174. Ensure that a RDF payload for a POST or PUT has a single resource.
  175. '''
  176. for s in set(g.subjects()):
  177. if not s == self.urn:
  178. raise SingleSubjectError(s, self.uuid)
  179. def _check_ref_int(self, config):
  180. g = self.provided_imr.graph
  181. for o in g.objects():
  182. if isinstance(o, URIRef) and str(o).startswith(Toolbox().base_url)\
  183. and not self.rdfly.ask_rsrc_exists(o):
  184. if config == 'strict':
  185. raise RefIntViolationError(o)
  186. else:
  187. self._logger.info(
  188. 'Removing link to non-existent repo resource: {}'
  189. .format(o))
  190. g.remove((None, None, o))
  191. class Ldpc(LdpRs):
  192. '''LDPC (LDP Container).'''
  193. def __init__(self, uuid, *args, **kwargs):
  194. super().__init__(uuid, *args, **kwargs)
  195. self.base_types.update({
  196. nsc['ldp'].Container,
  197. })
  198. class LdpBc(Ldpc):
  199. '''LDP-BC (LDP Basic Container).'''
  200. def __init__(self, uuid, *args, **kwargs):
  201. super().__init__(uuid, *args, **kwargs)
  202. self.base_types.update({
  203. nsc['ldp'].BasicContainer,
  204. })
  205. class LdpDc(Ldpc):
  206. '''LDP-DC (LDP Direct Container).'''
  207. def __init__(self, uuid, *args, **kwargs):
  208. super().__init__(uuid, *args, **kwargs)
  209. self.base_types.update({
  210. nsc['ldp'].DirectContainer,
  211. })
  212. class LdpIc(Ldpc):
  213. '''LDP-IC (LDP Indirect Container).'''
  214. def __init__(self, uuid, *args, **kwargs):
  215. super().__init__(uuid, *args, **kwargs)
  216. self.base_types.update({
  217. nsc['ldp'].IndirectContainer,
  218. })