ldp_rs.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. from copy import deepcopy
  2. import arrow
  3. from flask import current_app
  4. from rdflib import Graph
  5. from rdflib.resource import Resource
  6. from rdflib.namespace import RDF, XSD
  7. from rdflib.plugins.sparql.parser import parseUpdate
  8. from rdflib.term import URIRef, Literal, Variable
  9. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  10. from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \
  11. srv_mgd_predicates, srv_mgd_types
  12. from lakesuperior.model.ldpr import Ldpr, atomic
  13. from lakesuperior.exceptions import ResourceNotExistsError, \
  14. ServerManagedTermError, SingleSubjectError
  15. from lakesuperior.toolbox import Toolbox
  16. class LdpRs(Ldpr):
  17. '''LDP-RS (LDP RDF source).
  18. Definition: https://www.w3.org/TR/ldp/#ldprs
  19. '''
  20. DEFAULT_USER = Literal('BypassAdmin')
  21. base_types = {
  22. nsc['fcrepo'].Resource,
  23. nsc['ldp'].Resource,
  24. nsc['ldp'].RDFSource,
  25. }
  26. def __init__(self, uuid, repr_opts={}, handling='strict', **kwargs):
  27. '''
  28. Extends Ldpr.__init__ by adding LDP-RS specific parameters.
  29. @param handling (string) One of `strict` (the default) or `lenient`.
  30. `strict` raises an error if a server-managed term is in the graph.
  31. `lenient` removes all sever-managed triples encountered.
  32. '''
  33. super().__init__(uuid, **kwargs)
  34. # provided_imr can be empty. If None, it is an outbound resource.
  35. if self.provided_imr is not None:
  36. self.workflow = self.WRKF_INBOUND
  37. else:
  38. self.workflow = self.WRKF_OUTBOUND
  39. self._imr_options = repr_opts
  40. self.handling = handling
  41. ## LDP METHODS ##
  42. def get(self):
  43. '''
  44. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  45. '''
  46. return self.out_graph.serialize(format='turtle')
  47. @atomic
  48. def post(self):
  49. '''
  50. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  51. Perform a POST action after a valid resource URI has been found.
  52. '''
  53. return self._create_or_replace_rsrc(create_only=True)
  54. @atomic
  55. def put(self):
  56. '''
  57. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  58. '''
  59. return self._create_or_replace_rsrc()
  60. @atomic
  61. def patch(self, update_str):
  62. '''
  63. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  64. Update an existing resource by applying a SPARQL-UPDATE query.
  65. @param update_str (string) SPARQL-Update staements.
  66. '''
  67. delta = self._sparql_delta(update_str.replace('<>', self.urn.n3()))
  68. return self._modify_rsrc(self.RES_UPDATED, *delta)
  69. ## PROTECTED METHODS ##
  70. def _create_or_replace_rsrc(self, create_only=False):
  71. '''
  72. Create or update a resource. PUT and POST methods, which are almost
  73. identical, are wrappers for this method.
  74. @param data (string) RDF data to parse for insertion.
  75. @param format(string) MIME type of RDF data.
  76. @param create_only (boolean) Whether this is a create-only operation.
  77. '''
  78. create = create_only or not self.is_stored
  79. self._add_srv_mgd_triples(create)
  80. self._ensure_single_subject_rdf(self.provided_imr.graph)
  81. ref_int = self.rdfly.config['referential_integrity']
  82. if ref_int:
  83. self._check_ref_int(ref_int)
  84. if create:
  85. ev_type = self._create_rsrc()
  86. else:
  87. ev_type = self._replace_rsrc()
  88. self._set_containment_rel()
  89. return ev_type
  90. ## PROTECTED METHODS ##
  91. def _check_mgd_terms(self, g):
  92. '''
  93. Check whether server-managed terms are in a RDF payload.
  94. '''
  95. offending_subjects = set(g.subjects()) & srv_mgd_subjects
  96. if offending_subjects:
  97. if self.handling=='strict':
  98. raise ServerManagedTermError(offending_subjects, 's')
  99. else:
  100. for s in offending_subjects:
  101. self._logger.info('Removing offending subj: {}'.format(s))
  102. g.remove((s, None, None))
  103. offending_predicates = set(g.predicates()) & srv_mgd_predicates
  104. if offending_predicates:
  105. if self.handling=='strict':
  106. raise ServerManagedTermError(offending_predicates, 'p')
  107. else:
  108. for p in offending_predicates:
  109. self._logger.info('Removing offending pred: {}'.format(p))
  110. g.remove((None, p, None))
  111. offending_types = set(g.objects(predicate=RDF.type)) & srv_mgd_types
  112. if offending_types:
  113. if self.handling=='strict':
  114. raise ServerManagedTermError(offending_types, 't')
  115. else:
  116. for t in offending_types:
  117. self._logger.info('Removing offending type: {}'.format(t))
  118. g.remove((None, RDF.type, t))
  119. self._logger.debug('Sanitized graph: {}'.format(g.serialize(
  120. format='turtle').decode('utf-8')))
  121. return g
  122. def _add_srv_mgd_triples(self, create=False):
  123. '''
  124. Add server-managed triples to a provided IMR.
  125. @param create (boolean) Whether the resource is being created.
  126. '''
  127. # Message digest.
  128. cksum = Toolbox().rdf_cksum(self.provided_imr.graph)
  129. self.provided_imr.set(nsc['premis'].hasMessageDigest,
  130. URIRef('urn:sha1:{}'.format(cksum)))
  131. # Create and modify timestamp.
  132. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  133. if create:
  134. self.provided_imr.set(nsc['fcrepo'].created, ts)
  135. self.provided_imr.set(nsc['fcrepo'].createdBy, self.DEFAULT_USER)
  136. self.provided_imr.set(nsc['fcrepo'].lastModified, ts)
  137. self.provided_imr.set(nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER)
  138. # Base LDP types.
  139. for t in self.base_types:
  140. self.provided_imr.add(RDF.type, t)
  141. def _sparql_delta(self, q):
  142. '''
  143. Calculate the delta obtained by a SPARQL Update operation.
  144. This is a critical component of the SPARQL query prcess and does a
  145. couple of things:
  146. 1. It ensures that no resources outside of the subject of the request
  147. are modified (e.g. by variable subjects)
  148. 2. It verifies that none of the terms being modified is server managed.
  149. This method extracts an in-memory copy of the resource and performs the
  150. query on that once it has checked if any of the server managed terms is
  151. in the delta. If it is, it raises an exception.
  152. NOTE: This only checks if a server-managed term is effectively being
  153. modified. If a server-managed term is present in the query but does not
  154. cause any change in the updated resource, no error is raised.
  155. @return tuple(rdflib.Graph) Remove and add graphs. These can be used
  156. with `BaseStoreLayout.update_resource` and/or recorded as separate
  157. events in a provenance tracking system.
  158. '''
  159. pre_g = self.imr.graph
  160. post_g = deepcopy(pre_g)
  161. post_g.update(q)
  162. #remove = pre_g - post_g
  163. #add = post_g - pre_g
  164. remove_g, add_g = self._dedup_deltas(pre_g, post_g)
  165. #self._logger.info('Removing: {}'.format(
  166. # remove_g.serialize(format='turtle').decode('utf8')))
  167. #self._logger.info('Adding: {}'.format(
  168. # add_g.serialize(format='turtle').decode('utf8')))
  169. remove_g = self._check_mgd_terms(remove_g)
  170. add_g = self._check_mgd_terms(add_g)
  171. return remove_g, add_g
  172. def _ensure_single_subject_rdf(self, g):
  173. '''
  174. Ensure that a RDF payload for a POST or PUT has a single resource.
  175. '''
  176. for s in set(g.subjects()):
  177. if not s == self.urn:
  178. raise SingleSubjectError(s, self.uuid)
  179. def _check_ref_int(self, config):
  180. g = self.provided_imr.graph
  181. for o in g.objects():
  182. if isinstance(o, URIRef) and str(o).startswith(Toolbox().base_url)\
  183. and not self.rdfly.ask_rsrc_exists(o):
  184. if config == 'strict':
  185. raise RefIntViolationError(o)
  186. else:
  187. self._logger.info(
  188. 'Removing link to non-existent repo resource: {}'
  189. .format(o))
  190. g.remove((None, None, o))
  191. class Ldpc(LdpRs):
  192. '''LDPC (LDP Container).'''
  193. def __init__(self, uuid, *args, **kwargs):
  194. super().__init__(uuid, *args, **kwargs)
  195. self.base_types.update({
  196. nsc['ldp'].Container,
  197. })
  198. class LdpBc(Ldpc):
  199. '''LDP-BC (LDP Basic Container).'''
  200. def __init__(self, uuid, *args, **kwargs):
  201. super().__init__(uuid, *args, **kwargs)
  202. self.base_types.update({
  203. nsc['ldp'].BasicContainer,
  204. })
  205. class LdpDc(Ldpc):
  206. '''LDP-DC (LDP Direct Container).'''
  207. def __init__(self, uuid, *args, **kwargs):
  208. super().__init__(uuid, *args, **kwargs)
  209. self.base_types.update({
  210. nsc['ldp'].DirectContainer,
  211. })
  212. class LdpIc(Ldpc):
  213. '''LDP-IC (LDP Indirect Container).'''
  214. def __init__(self, uuid, *args, **kwargs):
  215. super().__init__(uuid, *args, **kwargs)
  216. self.base_types.update({
  217. nsc['ldp'].IndirectContainer,
  218. })