simple_layout.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. from copy import deepcopy
  2. import arrow
  3. from rdflib import Graph
  4. from rdflib.namespace import RDF, XSD
  5. from rdflib.query import ResultException
  6. from rdflib.resource import Resource
  7. from rdflib.term import Literal, URIRef, Variable
  8. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  9. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  10. from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \
  11. srv_mgd_predicates, srv_mgd_types
  12. from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout, \
  13. needs_rsrc
  14. from lakesuperior.util.digest import Digest
  15. from lakesuperior.util.translator import Translator
  16. class SimpleLayout(BaseRdfLayout):
  17. '''
  18. This is the simplest layout.
  19. It uses a flat triple structure without named graphs aimed at performance.
  20. Changes are destructive.
  21. In theory it could be used on top of a triplestore instead of a quad-store
  22. for (possible) improved speed and reduced storage.
  23. '''
  24. @property
  25. def headers(self):
  26. '''
  27. See base_rdf_layout.headers.
  28. '''
  29. headers = {
  30. 'Link' : [],
  31. }
  32. # @NOTE: Easy with these one-by-one picks. Each one of them is a call
  33. # to the triplestore.
  34. digest = self.rsrc.value(nsc['premis'].hasMessageDigest)
  35. if digest:
  36. etag = digest.identifier.split(':')[-1]
  37. headers['ETag'] = 'W/"{}"'.format(etag),
  38. last_updated_term = self.rsrc.value(nsc['fcrepo'].lastModified)
  39. if last_updated_term:
  40. headers['Last-Modified'] = arrow.get(last_updated_term)\
  41. .format('ddd, D MMM YYYY HH:mm:ss Z')
  42. return headers
  43. def extract_imr(self, uri=None, graph=None, minimal=False,
  44. incl_inbound=False, incl_children=True, incl_srv_mgd=True):
  45. '''
  46. See base_rdf_layout.extract_imr.
  47. '''
  48. uri = uri or self.base_urn
  49. inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
  50. if incl_inbound else ''
  51. embed_children_qry = '''
  52. OPTIONAL {{
  53. {0} ldp:contains ?c .
  54. ?c ?cp ?co .
  55. }}
  56. '''.format(uri.n3()) if incl_children else ''
  57. q = '''
  58. CONSTRUCT {{
  59. {0} ?p ?o .{1}
  60. ?c ?cp ?co .
  61. }} WHERE {{
  62. {0} ?p ?o .{1}{2}
  63. FILTER (?p != premis:hasMessageDigest) .
  64. }}
  65. '''.format(uri.n3(), inbound_qry, embed_children_qry)
  66. try:
  67. qres = self.query(q)
  68. except ResultException:
  69. # RDFlib bug? https://github.com/RDFLib/rdflib/issues/775
  70. g = Graph()
  71. else:
  72. g = qres.graph
  73. rsrc = Resource(g, uri)
  74. if not incl_srv_mgd:
  75. self._logger.info('Removing server managed triples.')
  76. for p in srv_mgd_predicates:
  77. self._logger.debug('Removing predicate: {}'.format(p))
  78. rsrc.remove(p)
  79. for t in srv_mgd_types:
  80. self._logger.debug('Removing type: {}'.format(t))
  81. rsrc.remove(RDF.type, t)
  82. return rsrc
  83. @needs_rsrc
  84. def out_rsrc(self, **kwargs):
  85. '''
  86. See base_rdf_layout.out_rsrc.
  87. '''
  88. im_rsrc = self.extract_imr(**kwargs)
  89. im_rsrc.remove(nsc['premis'].hasMessageDigest)
  90. return im_rsrc
  91. def ask_rsrc_exists(self, uri=None):
  92. '''
  93. See base_rdf_layout.ask_rsrc_exists.
  94. '''
  95. if not uri:
  96. if self.rsrc is not None:
  97. uri = self.rsrc.identifier
  98. else:
  99. return False
  100. self._logger.info('Searching for resource: {}'.format(uri))
  101. return (uri, Variable('p'), Variable('o')) in self.ds
  102. @needs_rsrc
  103. def create_or_replace_rsrc(self, g):
  104. '''
  105. See base_rdf_layout.create_or_replace_rsrc.
  106. '''
  107. # @TODO Use gunicorn to get request timestamp.
  108. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  109. if self.ask_rsrc_exists():
  110. self._logger.info(
  111. 'Resource {} exists. Removing all outbound triples.'
  112. .format(self.rsrc.identifier))
  113. # Delete all triples but keep creation date and creator.
  114. created = self.rsrc.value(nsc['fcrepo'].created)
  115. created_by = self.rsrc.value(nsc['fcrepo'].createdBy)
  116. self.delete_rsrc()
  117. res = self.RES_UPDATED
  118. else:
  119. created = ts
  120. created_by = Literal('BypassAdmin')
  121. res = self.RES_CREATED
  122. self._logger.info('Created timestamp: {}'.format(ts))
  123. self.rsrc.set(nsc['fcrepo'].created, created)
  124. self.rsrc.set(nsc['fcrepo'].createdBy, created_by)
  125. self.rsrc.set(nsc['fcrepo'].lastModified, ts)
  126. self.rsrc.set(nsc['fcrepo'].lastModifiedBy, Literal('BypassAdmin'))
  127. for s, p, o in g:
  128. self.ds.add((s, p, o))
  129. return res
  130. @needs_rsrc
  131. def create_rsrc(self, g):
  132. '''
  133. See base_rdf_layout.create_rsrc.
  134. '''
  135. # @TODO Use gunicorn to get request timestamp.
  136. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  137. self.rsrc.set(nsc['fcrepo'].created, ts)
  138. self.rsrc.set(nsc['fcrepo'].createdBy, Literal('BypassAdmin'))
  139. cksum = Digest.rdf_cksum(self.rsrc.graph)
  140. self.rsrc.set(nsc['premis'].hasMessageDigest,
  141. URIRef('urn:sha1:{}'.format(cksum)))
  142. for s, p, o in g:
  143. self.ds.add((s, p, o))
  144. @needs_rsrc
  145. def patch_rsrc(self, data):
  146. '''
  147. Perform a SPARQL UPDATE on a resource.
  148. @TODO deprecate.
  149. '''
  150. # @TODO Use gunicorn to get request timestamp.
  151. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  152. q = Translator.localize_string(data).replace(
  153. '<>', self.rsrc.identifier.n3())
  154. self.rsrc.set(nsc['fcrepo'].lastModified, ts)
  155. self.rsrc.set(nsc['fcrepo'].lastModifiedBy, Literal('BypassAdmin'))
  156. self.ds.update(q)
  157. @needs_rsrc
  158. def modify_rsrc(self, remove, add):
  159. '''
  160. See base_rdf_layout.update_rsrc.
  161. '''
  162. for t in remove.predicate_objects():
  163. self.rsrc.remove(t[0], t[1])
  164. for t in add.predicate_objects():
  165. self.rsrc.add(t[0], t[1])
  166. def delete_rsrc(self, inbound=False):
  167. '''
  168. Delete a resource. If `inbound` is specified, delete all inbound
  169. relationships as well.
  170. '''
  171. print('Removing resource {}.'.format(self.rsrc.identifier))
  172. self.rsrc.remove(Variable('p'))
  173. if inbound:
  174. self.ds.remove((Variable('s'), Variable('p'), self.rsrc.identifier))
  175. ## PROTECTED METHODS ##
  176. def _unique_value(self, p):
  177. '''
  178. Use this to retrieve a single value knowing that there SHOULD be only
  179. one (e.g. `skos:prefLabel`), If more than one is found, raise an
  180. exception.
  181. @param rdflib.Resource rsrc The resource to extract value from.
  182. @param rdflib.term.URIRef p The predicate to serach for.
  183. @throw ValueError if more than one value is found.
  184. '''
  185. values = self.rsrc[p]
  186. value = next(values)
  187. try:
  188. next(values)
  189. except StopIteration:
  190. return value
  191. # If the second next() did not raise a StopIteration, something is
  192. # wrong.
  193. raise ValueError('Predicate {} should be single valued. Found: {}.'\
  194. .format(set(values)))