simple_layout.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. from copy import deepcopy
  2. import arrow
  3. from rdflib import Graph
  4. from rdflib.namespace import RDF, XSD
  5. from rdflib.query import ResultException
  6. from rdflib.resource import Resource
  7. from rdflib.term import Literal, URIRef, Variable
  8. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  9. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  10. from lakesuperior.dictionaries.srv_mgd_terms import srv_mgd_subjects, \
  11. srv_mgd_predicates, srv_mgd_types
  12. from lakesuperior.store_layouts.rdf.base_rdf_layout import BaseRdfLayout, \
  13. needs_rsrc
  14. from lakesuperior.util.translator import Translator
  15. class SimpleLayout(BaseRdfLayout):
  16. '''
  17. This is the simplest layout.
  18. It uses a flat triple structure without named graphs aimed at performance.
  19. Changes are destructive.
  20. In theory it could be used on top of a triplestore instead of a quad-store
  21. for (possible) improved speed and reduced storage.
  22. '''
  23. def extract_imr(self, uri=None, graph=None, minimal=False,
  24. incl_inbound=False, embed_children=False, incl_srv_mgd=True):
  25. '''
  26. See base_rdf_layout.extract_imr.
  27. '''
  28. uri = uri or self.base_urn
  29. inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
  30. if incl_inbound else ''
  31. embed_children_qry = '''
  32. OPTIONAL {{
  33. {0} ldp:contains ?c .
  34. ?c ?cp ?co .
  35. }}
  36. '''.format(uri.n3()) if embed_children else ''
  37. q = '''
  38. CONSTRUCT {{
  39. {0} ?p ?o .{1}
  40. ?c ?cp ?co .
  41. }} WHERE {{
  42. {0} ?p ?o .{1}{2}
  43. #FILTER (?p != premis:hasMessageDigest) .
  44. }}
  45. '''.format(uri.n3(), inbound_qry, embed_children_qry)
  46. try:
  47. qres = self.query(q)
  48. except ResultException:
  49. # RDFlib bug? https://github.com/RDFLib/rdflib/issues/775
  50. g = Graph()
  51. else:
  52. g = qres.graph
  53. rsrc = Resource(g, uri)
  54. if not incl_srv_mgd:
  55. self._logger.info('Removing server managed triples.')
  56. for p in srv_mgd_predicates:
  57. self._logger.debug('Removing predicate: {}'.format(p))
  58. rsrc.remove(p)
  59. for t in srv_mgd_types:
  60. self._logger.debug('Removing type: {}'.format(t))
  61. rsrc.remove(RDF.type, t)
  62. return rsrc
  63. def ask_rsrc_exists(self, uri=None):
  64. '''
  65. See base_rdf_layout.ask_rsrc_exists.
  66. '''
  67. if not uri:
  68. if self.rsrc is not None:
  69. uri = self.rsrc.identifier
  70. else:
  71. return False
  72. self._logger.info('Searching for resource: {}'.format(uri))
  73. return (uri, Variable('p'), Variable('o')) in self.ds
  74. @needs_rsrc
  75. def create_rsrc(self, imr):
  76. '''
  77. See base_rdf_layout.create_rsrc.
  78. '''
  79. for s, p, o in imr.graph:
  80. self.ds.add((s, p, o))
  81. return self.RES_CREATED
  82. @needs_rsrc
  83. def replace_rsrc(self, imr):
  84. '''
  85. See base_rdf_layout.replace_rsrc.
  86. '''
  87. # Delete all triples but keep creation date and creator.
  88. created = self.rsrc.value(nsc['fcrepo'].created)
  89. created_by = self.rsrc.value(nsc['fcrepo'].createdBy)
  90. imr.set(nsc['fcrepo'].created, created)
  91. imr.set(nsc['fcrepo'].createdBy, created_by)
  92. # Delete the stored triples.
  93. self.delete_rsrc()
  94. for s, p, o in imr.graph:
  95. self.ds.add((s, p, o))
  96. return self.RES_UPDATED
  97. @needs_rsrc
  98. def modify_rsrc(self, remove, add):
  99. '''
  100. See base_rdf_layout.update_rsrc.
  101. '''
  102. for t in remove.predicate_objects():
  103. self.rsrc.remove(t[0], t[1])
  104. for t in add.predicate_objects():
  105. self.rsrc.add(t[0], t[1])
  106. def delete_rsrc(self, inbound=True):
  107. '''
  108. Delete a resource. If `inbound` is specified, delete all inbound
  109. relationships as well.
  110. '''
  111. print('Removing resource {}.'.format(self.rsrc.identifier))
  112. self.rsrc.remove(Variable('p'))
  113. if inbound:
  114. self.ds.remove(
  115. (Variable('s'), Variable('p'), self.rsrc.identifier))
  116. ## PROTECTED METHODS ##
  117. def _unique_value(self, p):
  118. '''
  119. Use this to retrieve a single value knowing that there SHOULD be only
  120. one (e.g. `skos:prefLabel`), If more than one is found, raise an
  121. exception.
  122. @param rdflib.Resource rsrc The resource to extract value from.
  123. @param rdflib.term.URIRef p The predicate to serach for.
  124. @throw ValueError if more than one value is found.
  125. '''
  126. values = self.rsrc[p]
  127. value = next(values)
  128. try:
  129. next(values)
  130. except StopIteration:
  131. return value
  132. # If the second next() did not raise a StopIteration, something is
  133. # wrong.
  134. raise ValueError('Predicate {} should be single valued. Found: {}.'\
  135. .format(set(values)))