simple_layout.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. from copy import deepcopy
  2. import arrow
  3. from rdflib import Graph
  4. from rdflib.namespace import RDF, XSD
  5. from rdflib.query import ResultException
  6. from rdflib.resource import Resource
  7. from rdflib.term import Literal, URIRef, Variable
  8. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  9. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  10. from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \
  11. srv_mgd_predicates, srv_mgd_types
  12. from lakesuperior.exceptions import InvalidResourceError, \
  13. ResourceNotExistsError, TombstoneError
  14. from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout
  15. from lakesuperior.util.translator import Translator
  16. class SimpleLayout(BaseRdfLayout):
  17. '''
  18. This is the simplest layout.
  19. It uses a flat triple structure without named graphs aimed at performance.
  20. Changes are destructive.
  21. In theory it could be used on top of a triplestore instead of a quad-store
  22. for (possible) improved speed and reduced storage.
  23. '''
  24. def extract_imr(self, uri, strict=False, incl_inbound=False,
  25. incl_children=True, embed_children=False, incl_srv_mgd=True):
  26. '''
  27. See base_rdf_layout.extract_imr.
  28. '''
  29. inbound_construct = '\n?s1 ?p1 {} .'.format(uri.n3()) \
  30. if incl_inbound else ''
  31. inbound_qry = '\nOPTIONAL {{ ?s1 ?p1 {} . }} .'.format(uri.n3()) \
  32. if incl_inbound else ''
  33. embed_children_qry = '''
  34. OPTIONAL {{
  35. {0} ldp:contains ?c .
  36. ?c ?cp ?co .
  37. }}
  38. '''.format(uri.n3()) if incl_children and embed_children else ''
  39. incl_children_qry = '\nFILTER ( ?p != ldp:contains )' \
  40. if not incl_children else ''
  41. srv_mgd_qry = ''
  42. if not incl_srv_mgd:
  43. for p in srv_mgd_predicates:
  44. self._logger.debug('Removing predicate: {}'.format(p))
  45. srv_mgd_qry += '\nFILTER ( ?p != {} ) .'.format(p.n3())
  46. for t in srv_mgd_types:
  47. self._logger.debug('Removing type: {}'.format(t))
  48. srv_mgd_qry += '\nMINUS {{ ?s a {} .}} .'.format(t.n3())
  49. q = '''
  50. CONSTRUCT {{
  51. {uri} ?p ?o .{inb_cnst}
  52. ?c ?cp ?co .
  53. }} WHERE {{
  54. {uri} ?p ?o .{inb_qry}{incl_chld}{embed_chld}{omit_srv_mgd}
  55. #FILTER (?p != premis:hasMessageDigest) .
  56. }}
  57. '''.format(uri=uri.n3(), inb_cnst=inbound_construct,
  58. inb_qry=inbound_qry, incl_chld=incl_children_qry,
  59. embed_chld=embed_children_qry, omit_srv_mgd=srv_mgd_qry)
  60. try:
  61. qres = self.query(q)
  62. except ResultException:
  63. # RDFlib bug: https://github.com/RDFLib/rdflib/issues/775
  64. g = Graph()
  65. else:
  66. g = qres.graph
  67. #self._logger.debug('Found resource: {}'.format(
  68. # g.serialize(format='turtle').decode('utf-8')))
  69. if strict and not len(g):
  70. raise ResourceNotExistsError(uri)
  71. rsrc = Resource(g, uri)
  72. # Check if resource is a tombstone.
  73. if rsrc[RDF.type : nsc['fcsystem'].Tombstone]:
  74. raise TombstoneError(
  75. Translator.uri_to_uuid(rsrc.identifier),
  76. rsrc.value(nsc['fcrepo'].created))
  77. elif rsrc.value(nsc['fcsystem'].tombstone):
  78. tombstone_rsrc = rsrc.value(nsc['fcsystem'].tombstone)
  79. raise TombstoneError(
  80. Translator.uri_to_uuid(rsrc.identifier),
  81. tombstone_rsrc.value(nsc['fcrepo'].created))
  82. return rsrc
  83. def ask_rsrc_exists(self, urn):
  84. '''
  85. See base_rdf_layout.ask_rsrc_exists.
  86. '''
  87. self._logger.info('Checking if resource exists: {}'.format(urn))
  88. imr = self.extract_imr(urn, incl_children=False)
  89. return len(imr.graph) > 0
  90. def create_rsrc(self, imr):
  91. '''
  92. See base_rdf_layout.create_rsrc.
  93. '''
  94. self._logger.debug('Creating resource:\n{}'.format(
  95. imr.graph.serialize(format='turtle').decode('utf8')))
  96. #self.ds |= imr.graph # This does not seem to work with datasets.
  97. for t in imr.graph:
  98. self.ds.add(t)
  99. return self.RES_CREATED
  100. def replace_rsrc(self, imr):
  101. '''
  102. See base_rdf_layout.replace_rsrc.
  103. '''
  104. rsrc = self.rsrc(imr.identifier)
  105. # Delete the stored triples but spare the protected predicates.
  106. del_trp_qry = []
  107. for p in rsrc.predicates():
  108. if p.identifier not in self.protected_pred:
  109. self._logger.debug('Removing {}'.format(p.identifier))
  110. rsrc.remove(p.identifier)
  111. else:
  112. self._logger.debug('NOT Removing {}'.format(p))
  113. imr.remove(p.identifier)
  114. #self.ds |= imr.graph # This does not seem to work with datasets.
  115. for t in imr.graph:
  116. self.ds.add(t)
  117. return self.RES_UPDATED
  118. def modify_dataset(self, remove_trp, add_trp):
  119. '''
  120. See base_rdf_layout.update_rsrc.
  121. '''
  122. #self._logger.debug('Remove triples: {}'.format(
  123. # remove_trp.serialize(format='turtle').decode('utf-8')))
  124. #self._logger.debug('Add triples: {}'.format(
  125. # add_trp.serialize(format='turtle').decode('utf-8')))
  126. for t in remove_trp:
  127. self.ds.remove(t)
  128. for t in add_trp:
  129. self.ds.add(t)
  130. ## PROTECTED METHODS ##
  131. def _do_delete_rsrc(self, rsrc, inbound):
  132. '''
  133. See BaseRdfLayout._do_delete_rsrc
  134. '''
  135. urn = rsrc.identifier
  136. print('Removing resource {}.'.format(urn))
  137. rsrc.remove(Variable('p'))
  138. if inbound:
  139. self.ds.remove((Variable('s'), Variable('p'), rsrc.identifier))
  140. return urn
  141. def leave_tombstone(self, urn, parent_urn=None):
  142. '''
  143. See BaseRdfLayout.leave_tombstone
  144. '''
  145. if parent_urn:
  146. self.ds.add((urn, nsc['fcsystem'].tombstone, parent_urn))
  147. else:
  148. # @TODO Use gunicorn to get request timestamp.
  149. ts = Literal(arrow.utcnow(), datatype=XSD.dateTime)
  150. self.ds.add((urn, RDF.type, nsc['fcsystem'].Tombstone))
  151. self.ds.add((urn, nsc['fcrepo'].created, ts))
  152. def delete_tombstone(self, urn):
  153. '''
  154. See BaseRdfLayout.leave_tombstone
  155. '''
  156. self.ds.remove((urn, RDF.type, nsc['fcsystem'].Tombstone))
  157. self.ds.remove((urn, nsc['fcrepo'].created, None))
  158. self.ds.remove((None, nsc['fcsystem'].tombstone, urn))