ldp_rs.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. from copy import deepcopy
  2. import arrow
  3. from rdflib import Graph
  4. from rdflib.resource import Resource
  5. from rdflib.namespace import RDF, XSD
  6. from rdflib.plugins.sparql.parser import parseUpdate
  7. from rdflib.term import URIRef, Literal, Variable
  8. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  9. from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \
  10. srv_mgd_predicates, srv_mgd_types
  11. from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
  12. from lakesuperior.exceptions import ResourceNotExistsError, \
  13. ServerManagedTermError, SingleSubjectError
  14. from lakesuperior.util.translator import Translator
  15. class LdpRs(Ldpr):
  16. '''LDP-RS (LDP RDF source).
  17. Definition: https://www.w3.org/TR/ldp/#ldprs
  18. '''
  19. DEFAULT_USER = Literal('BypassAdmin')
  20. RETURN_CHILD_RES_URI = nsc['fcrepo'].EmbedResources
  21. RETURN_INBOUND_REF_URI = nsc['fcrepo'].InboundReferences
  22. RETURN_SRV_MGD_RES_URI = nsc['fcrepo'].ServerManaged
  23. base_types = {
  24. nsc['fcrepo'].Resource,
  25. nsc['ldp'].Resource,
  26. nsc['ldp'].RDFSource,
  27. }
  28. ## LDP METHODS ##
  29. def get(self, pref_return):
  30. '''
  31. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  32. '''
  33. kwargs = {}
  34. minimal = embed_children = incl_inbound = False
  35. kwargs['incl_srv_mgd'] = True
  36. if 'value' in pref_return and pref_return['value'] == 'minimal':
  37. kwargs['minimal'] = True
  38. else:
  39. include = pref_return['parameters']['include'].split(' ') \
  40. if 'include' in pref_return['parameters'] else []
  41. omit = pref_return['parameters']['omit'].split(' ') \
  42. if 'omit' in pref_return['parameters'] else []
  43. self._logger.debug('Include: {}'.format(include))
  44. self._logger.debug('Omit: {}'.format(omit))
  45. if str(self.RETURN_INBOUND_REF_URI) in include:
  46. kwargs['incl_inbound'] = True
  47. if str(self.RETURN_CHILD_RES_URI) in omit:
  48. kwargs['embed_chldren'] = False
  49. if str(self.RETURN_SRV_MGD_RES_URI) in omit:
  50. kwargs['incl_srv_mgd'] = False
  51. return Translator.globalize_rsrc(self.imr)
  52. @transactional
  53. def post(self, data, format='text/turtle', handling=None):
  54. '''
  55. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  56. Perform a POST action after a valid resource URI has been found.
  57. '''
  58. return self._create_or_update_rsrc(data, format, handling,
  59. create_only=True)
  60. @transactional
  61. def put(self, data, format='text/turtle', handling=None):
  62. '''
  63. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  64. '''
  65. return self._create_or_update_rsrc(data, format, handling)
  66. @transactional
  67. @must_exist
  68. def patch(self, update_str):
  69. '''
  70. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  71. Update an existing resource by applying a SPARQL-UPDATE query.
  72. @param update_str (string) SPARQL-Update staements.
  73. '''
  74. delta = self._sparql_delta(update_str)
  75. return self.rdfly.modify_dataset(*delta)
  76. ## PROTECTED METHODS ##
  77. def _create_or_update_rsrc(self, data, format, handling,
  78. create_only=False):
  79. '''
  80. Create or update a resource. PUT and POST methods, which are almost
  81. identical, are wrappers for this method.
  82. @param data (string) RDF data to parse for insertion.
  83. @param format(string) MIME type of RDF data.
  84. @param handling (sting) One of `strict` or `lenient`. This determines
  85. how to handle provided server-managed triples. If `strict` is selected,
  86. any server-managed triple included in the input RDF will trigger an
  87. exception. If `lenient`, server-managed triples are ignored.
  88. @param create_only (boolean) Whether the operation is a create-only
  89. one (i.e. POST) or a create-or-update one (i.e. PUT).
  90. '''
  91. g = Graph().parse(data=data, format=format, publicID=self.urn)
  92. imr = Resource(self._check_mgd_terms(g, handling), self.urn)
  93. imr = self._add_srv_mgd_triples(imr, create=True)
  94. self._ensure_single_subject_rdf(imr.graph)
  95. if create_only:
  96. res = self.rdfly.create_rsrc(imr)
  97. else:
  98. res = self.rdfly.create_or_replace_rsrc(imr)
  99. self._set_containment_rel()
  100. return res
  101. def _check_mgd_terms(self, g, handling='strict'):
  102. '''
  103. Check whether server-managed terms are in a RDF payload.
  104. @param handling (string) One of `strict` (the default) or `lenient`.
  105. `strict` raises an error if a server-managed term is in the graph.
  106. `lenient` removes all sever-managed triples encountered.
  107. '''
  108. offending_subjects = set(g.subjects()) & srv_mgd_subjects
  109. if offending_subjects:
  110. if handling=='strict':
  111. raise ServerManagedTermError(offending_subjects, 's')
  112. else:
  113. for s in offending_subjects:
  114. g.remove((s, Variable('p'), Variable('o')))
  115. offending_predicates = set(g.predicates()) & srv_mgd_predicates
  116. if offending_predicates:
  117. if handling=='strict':
  118. raise ServerManagedTermError(offending_predicates, 'p')
  119. else:
  120. for p in offending_predicates:
  121. g.remove((Variable('s'), p, Variable('o')))
  122. offending_types = set(g.objects(predicate=RDF.type)) & srv_mgd_types
  123. if offending_types:
  124. if handling=='strict':
  125. raise ServerManagedTermError(offending_types, 't')
  126. else:
  127. for t in offending_types:
  128. g.remove((Variable('s'), RDF.type, t))
  129. return g
  130. def _add_srv_mgd_triples(self, rsrc, create=False):
  131. '''
  132. Add server-managed triples to a resource.
  133. @param create (boolean) Whether the resource is being created.
  134. '''
  135. # Message digest.
  136. cksum = Digest.rdf_cksum(rsrc.graph)
  137. rsrc.set(nsc['premis'].hasMessageDigest,
  138. URIRef('urn:sha1:{}'.format(cksum)))
  139. # Create and modify timestamp.
  140. # @TODO Use gunicorn to get request timestamp.
  141. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  142. if create:
  143. rsrc.set(nsc['fcrepo'].created, ts)
  144. rsrc.set(nsc['fcrepo'].createdBy, self.DEFAULT_USER)
  145. rsrc.set(nsc['fcrepo'].lastModified, ts)
  146. rsrc.set(nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER)
  147. # Base LDP types.
  148. for t in self.base_types:
  149. rsrc.add(RDF.type, t)
  150. return rsrc
  151. def _sparql_delta(self, q, handling=None):
  152. '''
  153. Calculate the delta obtained by a SPARQL Update operation.
  154. This is a critical component of the SPARQL query prcess and does a
  155. couple of things:
  156. 1. It ensures that no resources outside of the subject of the request
  157. are modified (e.g. by variable subjects)
  158. 2. It verifies that none of the terms being modified is server managed.
  159. This method extracts an in-memory copy of the resource and performs the
  160. query on that once it has checked if any of the server managed terms is
  161. in the delta. If it is, it raises an exception.
  162. NOTE: This only checks if a server-managed term is effectively being
  163. modified. If a server-managed term is present in the query but does not
  164. cause any change in the updated resource, no error is raised.
  165. @return tuple Remove and add triples. These can be used with
  166. `BaseStoreLayout.update_resource` and/or recorded as separate events in
  167. a provenance tracking system.
  168. '''
  169. pre_g = self.imr.graph
  170. post_g = deepcopy(pre_g)
  171. post_g.update(q)
  172. remove = pre_g - post_g
  173. add = post_g - pre_g
  174. self._logger.info('Removing: {}'.format(
  175. remove.serialize(format='turtle').decode('utf8')))
  176. self._logger.info('Adding: {}'.format(
  177. add.serialize(format='turtle').decode('utf8')))
  178. remove = self._check_mgd_terms(remove, handling)
  179. add = self._check_mgd_terms(add, handling)
  180. return remove, add
  181. def _ensure_single_subject_rdf(self, g):
  182. '''
  183. Ensure that a RDF payload for a POST or PUT has a single resource.
  184. '''
  185. for s in set(g.subjects()):
  186. if not s == self.uri:
  187. return SingleSubjectError(s, self.uri)
  188. class Ldpc(LdpRs):
  189. '''LDPC (LDP Container).'''
  190. def __init__(self, uuid):
  191. super().__init__(uuid)
  192. self.base_types.update({
  193. nsc['ldp'].Container,
  194. })
  195. class LdpBc(Ldpc):
  196. '''LDP-BC (LDP Basic Container).'''
  197. def __init__(self, uuid):
  198. super().__init__(uuid)
  199. self.base_types.update({
  200. nsc['ldp'].BasicContainer,
  201. })
  202. class LdpDc(Ldpc):
  203. '''LDP-DC (LDP Direct Container).'''
  204. def __init__(self, uuid):
  205. super().__init__(uuid)
  206. self.base_types.update({
  207. nsc['ldp'].DirectContainer,
  208. })
  209. class LdpIc(Ldpc):
  210. '''LDP-IC (LDP Indirect Container).'''
  211. def __init__(self, uuid):
  212. super().__init__(uuid)
  213. self.base_types.update({
  214. nsc['ldp'].IndirectContainer,
  215. })