base_rdf_layout.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. import logging
  2. from abc import ABCMeta, abstractmethod
  3. from rdflib import Dataset, Graph
  4. from rdflib.query import ResultException
  5. from rdflib.resource import Resource
  6. from rdflib.term import URIRef
  7. from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
  8. from lakesuperior.config_parser import config
  9. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  10. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  11. from lakesuperior.exceptions import ResourceNotExistsError
  12. #def needs_rsrc(fn):
  13. # '''
  14. # Decorator for methods that cannot be called without `self.rsrc` set.
  15. # '''
  16. # def wrapper(self, *args, **kwargs):
  17. # if not hasattr(self, 'rsrc') or self.rsrc is None:
  18. # raise TypeError(
  19. # 'This method must be called by an instance with `rsrc` set.')
  20. #
  21. # return fn(self, *args, **kwargs)
  22. #
  23. # return wrapper
  24. class BaseRdfLayout(metaclass=ABCMeta):
  25. '''
  26. This class exposes an interface to build graph store layouts. It also
  27. provides the baics of the triplestore connection.
  28. Some store layouts are provided. New ones aimed at specific uses
  29. and optimizations of the repository may be developed by extending this
  30. class and implementing all its abstract methods.
  31. A layout is implemented via application configuration. However, once
  32. contents are ingested in a repository, changing a layout will most likely
  33. require a migration.
  34. The custom layout must be in the lakesuperior.store_layouts.rdf
  35. package and the class implementing the layout must be called
  36. `StoreLayout`. The module name is the one defined in the app
  37. configuration.
  38. E.g. if the configuration indicates `simple_layout` the application will
  39. look for
  40. `lakesuperior.store_layouts.rdf.simple_layout.SimpleLayout`.
  41. Some method naming conventions:
  42. - Methods starting with `get_` return a resource.
  43. - Methods starting with `list_` return an iterable or generator of URIs.
  44. - Methods starting with `select_` return an iterable or generator with
  45. table-like data such as from a SELECT statement.
  46. - Methods starting with `ask_` return a boolean value.
  47. '''
  48. ROOT_NODE_URN = nsc['fcsystem'].root
  49. # N.B. This is Fuseki-specific.
  50. UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
  51. RES_CREATED = '_created_'
  52. RES_UPDATED = '_updated_'
  53. _conf = config['application']['store']['ldp_rs']
  54. _logger = logging.getLogger(__name__)
  55. query_ep = _conf['webroot'] + _conf['query_ep']
  56. update_ep = _conf['webroot'] + _conf['update_ep']
  57. ## MAGIC METHODS ##
  58. def __init__(self):
  59. '''Initialize the graph store and a layout.
  60. NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
  61. for Graph Store HTTP protocol
  62. (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
  63. this only in the (currently) unreleased 2.2 branch. It works with Jena,
  64. but other considerations would have to be made (e.g. Jena has no REST
  65. API for handling transactions).
  66. '''
  67. self.ds = Dataset(self.store, default_union=True)
  68. self.ds.namespace_manager = nsm
  69. @property
  70. def store(self):
  71. if not hasattr(self, '_store') or not self._store:
  72. self._store = SPARQLUpdateStore(
  73. queryEndpoint=self.query_ep,
  74. update_endpoint=self.update_ep,
  75. autocommit=False,
  76. dirty_reads=True)
  77. return self._store
  78. def rsrc(self, urn):
  79. '''
  80. Reference to a live data set that can be updated. This exposes the
  81. whole underlying triplestore structure and is used to update a
  82. resource.
  83. '''
  84. return self.ds.resource(urn)
  85. def out_rsrc(self, urn):
  86. '''
  87. Graph obtained by querying the triplestore and adding any abstraction
  88. and filtering to make up a graph that can be used for read-only,
  89. API-facing results. Different layouts can implement this in very
  90. different ways, so it is an abstract method.
  91. @return rdflib.resource.Resource
  92. '''
  93. imr = self.extract_imr(urn)
  94. if not len(imr.graph):
  95. raise ResourceNotExistsError
  96. ## PUBLIC METHODS ##
  97. def query(self, q, initBindings=None, nsc=nsc):
  98. '''
  99. Perform a SPARQL query on the triplestore.
  100. This should provide non-abstract access, independent from the layout,
  101. therefore it should not be overridden by individual layouts.
  102. @param q (string) SPARQL query.
  103. @return rdflib.query.Result
  104. '''
  105. self._logger.debug('Sending SPARQL query: {}'.format(q))
  106. return self.ds.query(q, initBindings=initBindings, initNs=nsc)
  107. def update(self, q, initBindings=None, nsc=nsc):
  108. '''
  109. Perform a SPARQL update on the triplestore.
  110. This should provide low-level access, independent from the layout,
  111. therefore it should not be overridden by individual layouts.
  112. @param q (string) SPARQL-UPDATE query.
  113. @return None
  114. '''
  115. self._logger.debug('Sending SPARQL update: {}'.format(q))
  116. return self.ds.query(q, initBindings=initBindings, initNs=nsc)
  117. def create_or_replace_rsrc(self, imr):
  118. '''Create a resource graph in the main graph if it does not exist.
  119. If it exists, replace the existing one retaining the creation date.
  120. '''
  121. if self.ask_rsrc_exists(imr.identifier):
  122. self._logger.info(
  123. 'Resource {} exists. Removing all outbound triples.'
  124. .format(imr.identifier))
  125. return self.replace_rsrc(imr)
  126. else:
  127. return self.create_rsrc(imr)
  128. ## INTERFACE METHODS ##
  129. # Implementers of custom layouts should look into these methods to
  130. # implement.
  131. @abstractmethod
  132. def extract_imr(self, uri, graph=None, minimal=False,
  133. incl_inbound=False, embed_children=False, incl_srv_mgd=True):
  134. '''
  135. Extract an in-memory resource based on the copy of a graph on a subject.
  136. @param uri (URIRef) Resource URI.
  137. @param graph (rdflib.term.URIRef | set(rdflib.graphURIRef)) The graph
  138. to extract from. This can be an URI for a single graph, or a list of
  139. graph URIs in which case an aggregate graph will be used.
  140. @param inbound (boolean) Whether to pull triples that have the resource
  141. URI as their object.
  142. '''
  143. pass
  144. @abstractmethod
  145. def ask_rsrc_exists(self, urn):
  146. '''
  147. Ask if a resource is stored in the graph store.
  148. @param uri (rdflib.term.URIRef) The internal URN of the resource to be
  149. queried.
  150. @return boolean
  151. '''
  152. pass
  153. @abstractmethod
  154. def create_rsrc(self, imr):
  155. '''Create a resource graph in the main graph.
  156. If the resource exists, raise an exception.
  157. '''
  158. pass
  159. @abstractmethod
  160. def replace_rsrc(self, imr):
  161. '''Replace a resource, i.e. delete all the triples and re-add the
  162. ones provided.
  163. @param g (rdflib.Graph) Graph to load. It must not contain
  164. `fcrepo:created` and `fcrepo:createdBy`.
  165. '''
  166. pass
  167. @abstractmethod
  168. def modify_dataset(self, remove_trp, add_trp):
  169. '''
  170. Adds and/or removes triples from the graph.
  171. NOTE: This is not specific to a resource. The LDP layer is responsible
  172. for checking that all the +/- triples are referring to the intended
  173. subject(s).
  174. @param remove (rdflib.Graph) Triples to be removed.
  175. @param add (rdflib.Graph) Triples to be added.
  176. '''
  177. pass
  178. @abstractmethod
  179. def delete_rsrc(self, urn, inbound=True):
  180. pass
  181. ## PROTECTED METHODS ##
  182. def _set_msg_digest(self):
  183. '''
  184. Add a message digest to the current resource.
  185. '''
  186. cksum = Digest.rdf_cksum(self.rsrc.graph)
  187. self.rsrc.set(nsc['premis'].hasMessageDigest,
  188. URIRef('urn:sha1:{}'.format(cksum)))