ldp_rs.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. from copy import deepcopy
  2. import arrow
  3. from rdflib import Graph
  4. from rdflib.resource import Resource
  5. from rdflib.namespace import RDF, XSD
  6. from rdflib.plugins.sparql.parser import parseUpdate
  7. from rdflib.term import URIRef, Literal, Variable
  8. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  9. from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \
  10. srv_mgd_predicates, srv_mgd_types
  11. from lakesuperior.model.ldpr import Ldpr, transactional, must_exist
  12. from lakesuperior.exceptions import ResourceNotExistsError, \
  13. ServerManagedTermError, SingleSubjectError
  14. from lakesuperior.util.digest import Digest
  15. from lakesuperior.util.translator import Translator
  16. class LdpRs(Ldpr):
  17. '''LDP-RS (LDP RDF source).
  18. Definition: https://www.w3.org/TR/ldp/#ldprs
  19. '''
  20. DEFAULT_USER = Literal('BypassAdmin')
  21. RETURN_CHILD_RES_URI = nsc['fcrepo'].EmbedResources
  22. RETURN_INBOUND_REF_URI = nsc['fcrepo'].InboundReferences
  23. RETURN_SRV_MGD_RES_URI = nsc['fcrepo'].ServerManaged
  24. base_types = {
  25. nsc['ldp'].RDFSource
  26. }
  27. std_headers = {
  28. 'Accept-Post' : {
  29. 'text/turtle',
  30. 'text/rdf+n3',
  31. 'text/n3',
  32. 'application/rdf+xml',
  33. 'application/n-triples',
  34. 'application/ld+json',
  35. 'multipart/form-data',
  36. 'application/sparql-update',
  37. },
  38. 'Accept-Patch' : {
  39. 'application/sparql-update',
  40. },
  41. }
  42. def get(self, pref_return):
  43. '''
  44. https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
  45. '''
  46. kwargs = {}
  47. minimal = embed_children = incl_inbound = False
  48. kwargs['incl_srv_mgd'] = True
  49. if 'value' in pref_return and pref_return['value'] == 'minimal':
  50. kwargs['minimal'] = True
  51. else:
  52. include = pref_return['parameters']['include'].split(' ') \
  53. if 'include' in pref_return['parameters'] else []
  54. omit = pref_return['parameters']['omit'].split(' ') \
  55. if 'omit' in pref_return['parameters'] else []
  56. self._logger.debug('Include: {}'.format(include))
  57. self._logger.debug('Omit: {}'.format(omit))
  58. if str(self.RETURN_INBOUND_REF_URI) in include:
  59. kwargs['incl_inbound'] = True
  60. if str(self.RETURN_CHILD_RES_URI) in omit:
  61. kwargs['embed_chldren'] = False
  62. if str(self.RETURN_SRV_MGD_RES_URI) in omit:
  63. kwargs['incl_srv_mgd'] = False
  64. imr = self.rdfly.out_rsrc
  65. if not imr or not len(imr.graph):
  66. raise ResourceNotExistsError(self.uri)
  67. return Translator.globalize_rsrc(imr)
  68. @transactional
  69. def post(self, data, format='text/turtle', handling=None):
  70. '''
  71. https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
  72. Perform a POST action after a valid resource URI has been found.
  73. '''
  74. g = Graph().parse(data=data, format=format, publicID=self.urn)
  75. imr = Resource(self._check_mgd_terms(g, handling), self.urn)
  76. imr = self._add_srv_mgd_triples(imr, create=True)
  77. self._ensure_single_subject_rdf(imr.graph)
  78. self.rdfly.create_rsrc(imr)
  79. self._set_containment_rel()
  80. @transactional
  81. def put(self, data, format='text/turtle', handling=None):
  82. '''
  83. https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
  84. '''
  85. g = Graph().parse(data=data, format=format, publicID=self.urn)
  86. imr = Resource(self._check_mgd_terms(g, handling), self.urn)
  87. imr = self._add_srv_mgd_triples(imr, create=True)
  88. self._ensure_single_subject_rdf(imr.graph)
  89. res = self.rdfly.create_or_replace_rsrc(imr)
  90. self._set_containment_rel()
  91. return res
  92. @transactional
  93. @must_exist
  94. def patch(self, data):
  95. '''
  96. https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
  97. '''
  98. trp_remove, trp_add = self._sparql_delta(data)
  99. return self.rdfly.modify_rsrc(trp_remove, trp_add)
  100. ## PROTECTED METHODS ##
  101. def _check_mgd_terms(self, g, handling='strict'):
  102. '''
  103. Check whether server-managed terms are in a RDF payload.
  104. '''
  105. offending_subjects = set(g.subjects()) & srv_mgd_subjects
  106. if offending_subjects:
  107. if handling=='strict':
  108. raise ServerManagedTermError(offending_subjects, 's')
  109. else:
  110. for s in offending_subjects:
  111. g.remove((s, Variable('p'), Variable('o')))
  112. offending_predicates = set(g.predicates()) & srv_mgd_predicates
  113. if offending_predicates:
  114. if handling=='strict':
  115. raise ServerManagedTermError(offending_predicates, 'p')
  116. else:
  117. for p in offending_predicates:
  118. g.remove((Variable('s'), p, Variable('o')))
  119. offending_types = set(g.objects(predicate=RDF.type)) & srv_mgd_types
  120. if offending_types:
  121. if handling=='strict':
  122. raise ServerManagedTermError(offending_types, 't')
  123. else:
  124. for t in offending_types:
  125. g.remove((Variable('s'), RDF.type, t))
  126. return g
  127. def _add_srv_mgd_triples(self, imr, create=False):
  128. '''
  129. Add server-managed triples to a graph.
  130. @param create (boolean) Whether the resource is being created.
  131. '''
  132. # Message digest.
  133. cksum = Digest.rdf_cksum(imr.graph)
  134. imr.set(nsc['premis'].hasMessageDigest,
  135. URIRef('urn:sha1:{}'.format(cksum)))
  136. # Create and modify timestamp.
  137. # @TODO Use gunicorn to get request timestamp.
  138. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  139. if create:
  140. imr.set(nsc['fcrepo'].created, ts)
  141. imr.set(nsc['fcrepo'].createdBy, self.DEFAULT_USER)
  142. imr.set(nsc['fcrepo'].lastModified, ts)
  143. imr.set(nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER)
  144. # Base LDP types.
  145. for t in self.base_types:
  146. imr.add(RDF.type, t)
  147. return imr
  148. def _sparql_delta(self, q, handling=None):
  149. '''
  150. Calculate the delta obtained by a SPARQL Update operation.
  151. This does a couple of extra things:
  152. 1. It ensures that no resources outside of the subject of the request
  153. are modified (e.g. by variable subjects)
  154. 2. It verifies that none of the terms being modified is server managed.
  155. This method extracts an in-memory copy of the resource and performs the
  156. query on that once it has checked if any of the server managed terms is
  157. in the delta. If it is, it raises an exception.
  158. NOTE: This only checks if a server-managed term is effectively being
  159. modified. If a server-managed term is present in the query but does not
  160. cause any change in the updated resource, no error is raised.
  161. @return tuple Remove and add triples. These can be used with
  162. `BaseStoreLayout.update_resource`.
  163. '''
  164. pre_g = self.rdfly.extract_imr().graph
  165. post_g = deepcopy(pre_g)
  166. post_g.update(q)
  167. remove = pre_g - post_g
  168. add = post_g - pre_g
  169. self._logger.info('Removing: {}'.format(
  170. remove.serialize(format='turtle').decode('utf8')))
  171. self._logger.info('Adding: {}'.format(
  172. add.serialize(format='turtle').decode('utf8')))
  173. remove = self._check_mgd_terms(remove, handling)
  174. add = self._check_mgd_terms(add, handling)
  175. return remove, add
  176. def _ensure_single_subject_rdf(self, g):
  177. '''
  178. Ensure that a RDF payload for a POST or PUT has a single resource.
  179. '''
  180. for s in set(g.subjects()):
  181. if not s == self.uri:
  182. return SingleSubjectError(s, self.uri)
  183. class Ldpc(LdpRs):
  184. '''LDPC (LDP Container).'''
  185. def __init__(self, uuid):
  186. super().__init__(uuid)
  187. self.base_types.update({
  188. nsc['ldp'].Container,
  189. })
  190. class LdpBc(Ldpc):
  191. '''LDP-BC (LDP Basic Container).'''
  192. def __init__(self, uuid):
  193. super().__init__(uuid)
  194. self.base_types.update({
  195. nsc['ldp'].BasicContainer,
  196. })
  197. class LdpDc(Ldpc):
  198. '''LDP-DC (LDP Direct Container).'''
  199. def __init__(self, uuid):
  200. super().__init__(uuid)
  201. self.base_types.update({
  202. nsc['ldp'].DirectContainer,
  203. })
  204. class LdpIc(Ldpc):
  205. '''LDP-IC (LDP Indirect Container).'''
  206. def __init__(self, uuid):
  207. super().__init__(uuid)
  208. self.base_types.update({
  209. nsc['ldp'].IndirectContainer,
  210. })