ldp_rs.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. from copy import deepcopy
  2. import arrow
  3. from rdflib import Graph
  4. from rdflib.resource import Resource
  5. from rdflib.namespace import RDF, XSD
  6. from rdflib.plugins.sparql.parser import parseUpdate
  7. from rdflib.term import URIRef, Literal, Variable
  8. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  9. from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \
  10. srv_mgd_predicates, srv_mgd_types
  11. from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
  12. from lakesuperior.exceptions import ResourceNotExistsError, \
  13. ServerManagedTermError, SingleSubjectError
  14. from lakesuperior.util.digest import Digest
  15. from lakesuperior.util.translator import Translator
  16. class LdpRs(Ldpr):
  17. '''LDP-RS (LDP RDF source).
  18. Definition: https://www.w3.org/TR/ldp/#ldprs
  19. '''
  20. DEFAULT_USER = Literal('BypassAdmin')
  21. RETURN_CHILD_RES_URI = nsc['fcrepo'].EmbedResources
  22. RETURN_INBOUND_REF_URI = nsc['fcrepo'].InboundReferences
  23. RETURN_SRV_MGD_RES_URI = nsc['fcrepo'].ServerManaged
  24. base_types = {
  25. nsc['ldp'].RDFSource
  26. }
  27. def get(self, pref_return):
  28. '''
  29. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  30. '''
  31. kwargs = {}
  32. minimal = embed_children = incl_inbound = False
  33. kwargs['incl_srv_mgd'] = True
  34. if 'value' in pref_return and pref_return['value'] == 'minimal':
  35. kwargs['minimal'] = True
  36. else:
  37. include = pref_return['parameters']['include'].split(' ') \
  38. if 'include' in pref_return['parameters'] else []
  39. omit = pref_return['parameters']['omit'].split(' ') \
  40. if 'omit' in pref_return['parameters'] else []
  41. self._logger.debug('Include: {}'.format(include))
  42. self._logger.debug('Omit: {}'.format(omit))
  43. if str(self.RETURN_INBOUND_REF_URI) in include:
  44. kwargs['incl_inbound'] = True
  45. if str(self.RETURN_CHILD_RES_URI) in omit:
  46. kwargs['embed_chldren'] = False
  47. if str(self.RETURN_SRV_MGD_RES_URI) in omit:
  48. kwargs['incl_srv_mgd'] = False
  49. imr = self.rdfly.out_rsrc
  50. if not imr or not len(imr.graph):
  51. raise ResourceNotExistsError(self.uri)
  52. return Translator.globalize_rsrc(imr)
  53. @transactional
  54. def post(self, data, format='text/turtle', handling=None):
  55. '''
  56. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  57. Perform a POST action after a valid resource URI has been found.
  58. '''
  59. return self._create_or_update_rsrc(data, format, handling,
  60. create_only=True)
  61. @transactional
  62. def put(self, data, format='text/turtle', handling=None):
  63. '''
  64. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  65. '''
  66. return self._create_or_update_rsrc(data, format, handling)
  67. @transactional
  68. @must_exist
  69. def patch(self, data):
  70. '''
  71. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  72. '''
  73. trp_remove, trp_add = self._sparql_delta(data)
  74. return self.rdfly.modify_rsrc(trp_remove, trp_add)
  75. ## PROTECTED METHODS ##
  76. def _create_or_update_rsrc(self, data, format, handling,
  77. create_only=False):
  78. '''
  79. Create or update a resource. PUT and POST methods, which are almost
  80. identical, are wrappers for this method.
  81. @param data (string) RDF data to parse for insertion.
  82. @param format(string) MIME type of RDF data.
  83. @param handling (sting) One of `strict` or `lenient`. This determines
  84. how to handle provided server-managed triples. If `strict` is selected,
  85. any server-managed triple included in the input RDF will trigger an
  86. exception. If `lenient`, server-managed triples are ignored.
  87. @param create_only (boolean) Whether the operation is a create-only one (i.e.
  88. POST) or a create-or-update one (i.e. PUT).
  89. '''
  90. g = Graph().parse(data=data, format=format, publicID=self.urn)
  91. imr = Resource(self._check_mgd_terms(g, handling), self.urn)
  92. imr = self._add_srv_mgd_triples(imr, create=True)
  93. self._ensure_single_subject_rdf(imr.graph)
  94. if create_only:
  95. res = self.rdfly.create_rsrc(imr)
  96. else:
  97. res = self.rdfly.create_or_replace_rsrc(imr)
  98. self._set_containment_rel()
  99. return res
  100. def _check_mgd_terms(self, g, handling='strict'):
  101. '''
  102. Check whether server-managed terms are in a RDF payload.
  103. '''
  104. offending_subjects = set(g.subjects()) & srv_mgd_subjects
  105. if offending_subjects:
  106. if handling=='strict':
  107. raise ServerManagedTermError(offending_subjects, 's')
  108. else:
  109. for s in offending_subjects:
  110. g.remove((s, Variable('p'), Variable('o')))
  111. offending_predicates = set(g.predicates()) & srv_mgd_predicates
  112. if offending_predicates:
  113. if handling=='strict':
  114. raise ServerManagedTermError(offending_predicates, 'p')
  115. else:
  116. for p in offending_predicates:
  117. g.remove((Variable('s'), p, Variable('o')))
  118. offending_types = set(g.objects(predicate=RDF.type)) & srv_mgd_types
  119. if offending_types:
  120. if handling=='strict':
  121. raise ServerManagedTermError(offending_types, 't')
  122. else:
  123. for t in offending_types:
  124. g.remove((Variable('s'), RDF.type, t))
  125. return g
  126. def _add_srv_mgd_triples(self, rsrc, create=False):
  127. '''
  128. Add server-managed triples to a resource.
  129. @param create (boolean) Whether the resource is being created.
  130. '''
  131. # Message digest.
  132. cksum = Digest.rdf_cksum(rsrc.graph)
  133. rsrc.set(nsc['premis'].hasMessageDigest,
  134. URIRef('urn:sha1:{}'.format(cksum)))
  135. # Create and modify timestamp.
  136. # @TODO Use gunicorn to get request timestamp.
  137. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  138. if create:
  139. rsrc.set(nsc['fcrepo'].created, ts)
  140. rsrc.set(nsc['fcrepo'].createdBy, self.DEFAULT_USER)
  141. rsrc.set(nsc['fcrepo'].lastModified, ts)
  142. rsrc.set(nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER)
  143. # Base LDP types.
  144. for t in self.base_types:
  145. rsrc.add(RDF.type, t)
  146. return rsrc
  147. def _sparql_delta(self, q, handling=None):
  148. '''
  149. Calculate the delta obtained by a SPARQL Update operation.
  150. This does a couple of extra things:
  151. 1. It ensures that no resources outside of the subject of the request
  152. are modified (e.g. by variable subjects)
  153. 2. It verifies that none of the terms being modified is server managed.
  154. This method extracts an in-memory copy of the resource and performs the
  155. query on that once it has checked if any of the server managed terms is
  156. in the delta. If it is, it raises an exception.
  157. NOTE: This only checks if a server-managed term is effectively being
  158. modified. If a server-managed term is present in the query but does not
  159. cause any change in the updated resource, no error is raised.
  160. @return tuple Remove and add triples. These can be used with
  161. `BaseStoreLayout.update_resource`.
  162. '''
  163. pre_g = self.rdfly.extract_imr().graph
  164. post_g = deepcopy(pre_g)
  165. post_g.update(q)
  166. remove = pre_g - post_g
  167. add = post_g - pre_g
  168. self._logger.info('Removing: {}'.format(
  169. remove.serialize(format='turtle').decode('utf8')))
  170. self._logger.info('Adding: {}'.format(
  171. add.serialize(format='turtle').decode('utf8')))
  172. remove = self._check_mgd_terms(remove, handling)
  173. add = self._check_mgd_terms(add, handling)
  174. return remove, add
  175. def _ensure_single_subject_rdf(self, g):
  176. '''
  177. Ensure that a RDF payload for a POST or PUT has a single resource.
  178. '''
  179. for s in set(g.subjects()):
  180. if not s == self.uri:
  181. return SingleSubjectError(s, self.uri)
  182. class Ldpc(LdpRs):
  183. '''LDPC (LDP Container).'''
  184. def __init__(self, uuid):
  185. super().__init__(uuid)
  186. self.base_types.update({
  187. nsc['ldp'].Container,
  188. })
  189. class LdpBc(Ldpc):
  190. '''LDP-BC (LDP Basic Container).'''
  191. def __init__(self, uuid):
  192. super().__init__(uuid)
  193. self.base_types.update({
  194. nsc['ldp'].BasicContainer,
  195. })
  196. class LdpDc(Ldpc):
  197. '''LDP-DC (LDP Direct Container).'''
  198. def __init__(self, uuid):
  199. super().__init__(uuid)
  200. self.base_types.update({
  201. nsc['ldp'].DirectContainer,
  202. })
  203. class LdpIc(Ldpc):
  204. '''LDP-IC (LDP Indirect Container).'''
  205. def __init__(self, uuid):
  206. super().__init__(uuid)
  207. self.base_types.update({
  208. nsc['ldp'].IndirectContainer,
  209. })