base_rdf_layout.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. import logging
  2. from abc import ABCMeta, abstractmethod
  3. from rdflib import Dataset, Graph
  4. from rdflib.query import ResultException
  5. from rdflib.resource import Resource
  6. from rdflib.term import URIRef
  7. from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
  8. from lakesuperior.config_parser import config
  9. from lakesuperior.dictionaries.namespaces import ns_collection as nsc
  10. from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
  11. def needs_rsrc(fn):
  12. '''
  13. Decorator for methods that cannot be called without `self.rsrc` set.
  14. '''
  15. def wrapper(self, *args, **kwargs):
  16. if not isset(self, '_rsrc') or self._rsrc is None:
  17. raise TypeError(
  18. 'This method must be called by an instance with `rsrc` set.')
  19. return fn(self, *args, **kwargs)
  20. return wrapper
  21. class BaseRdfLayout(metaclass=ABCMeta):
  22. '''
  23. This class exposes an interface to build graph store layouts. It also
  24. provides the baics of the triplestore connection.
  25. Some store layouts are provided. New ones aimed at specific uses
  26. and optimizations of the repository may be developed by extending this
  27. class and implementing all its abstract methods.
  28. A layout is implemented via application configuration. However, once
  29. contents are ingested in a repository, changing a layout will most likely
  30. require a migration.
  31. The custom layout must be in the lakesuperior.store_layouts.rdf
  32. package and the class implementing the layout must be called
  33. `StoreLayout`. The module name is the one defined in the app
  34. configuration.
  35. E.g. if the configuration indicates `simple_layout` the application will
  36. look for
  37. `lakesuperior.store_layouts.rdf.simple_layout.SimpleLayout`.
  38. Some method naming conventions:
  39. - Methods starting with `get_` return a resource.
  40. - Methods starting with `list_` return an iterable or generator of URIs.
  41. - Methods starting with `select_` return an iterable or generator with
  42. table-like data such as from a SELECT statement.
  43. - Methods starting with `ask_` return a boolean value.
  44. '''
  45. ROOT_NODE_URN = nsc['fcsystem'].root
  46. # N.B. This is Fuseki-specific.
  47. UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
  48. _conf = config['application']['store']['ldp_rs']
  49. _logger = logging.getLogger(__name__)
  50. query_ep = _conf['webroot'] + _conf['query_ep']
  51. update_ep = _conf['webroot'] + _conf['update_ep']
  52. ## MAGIC METHODS ##
  53. def __init__(self, urn=None):
  54. '''Initialize the graph store and a layout.
  55. NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
  56. for Graph Store HTTP protocol
  57. (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
  58. this only in the (currently) unreleased 2.2 branch. It works with Jena,
  59. but other considerations would have to be made (e.g. Jena has no REST
  60. API for handling transactions).
  61. In a more advanced development phase it could be possible to extend the
  62. SPARQLUpdateStore class to add non-standard interaction with specific
  63. SPARQL implementations in order to support ACID features provided
  64. by them; e.g. Blazegraph's RESTful transaction handling methods.
  65. The layout can be initialized with a URN to make resource-centric
  66. operations simpler. However, for generic queries, urn can be None and
  67. no `self.rsrc` is assigned. In this case, some methods (the ones
  68. decorated by `@needs_rsrc`) will not be available.
  69. '''
  70. self.ds = Dataset(self.store, default_union=True)
  71. self.ds.namespace_manager = nsm
  72. self._base_urn = urn
  73. @property
  74. def store(self):
  75. if not hasattr(self, '_store') or not self._store:
  76. self._store = SPARQLUpdateStore(
  77. queryEndpoint=self.query_ep,
  78. update_endpoint=self.update_ep,
  79. autocommit=False,
  80. dirty_reads=True)
  81. return self._store
  82. @property
  83. def base_urn(self):
  84. '''
  85. The base URN for the current resource being handled.
  86. This value is only here for convenience. It does not preclude one from
  87. using an instance of this class with more than one subject.
  88. '''
  89. return self._base_urn
  90. @property
  91. def rsrc(self):
  92. '''
  93. Reference to a live data set that can be updated. This exposes the
  94. whole underlying triplestore structure and is used to update a
  95. resource.
  96. '''
  97. if self.base_urn is None:
  98. return None
  99. return self.ds.resource(self.base_urn)
  100. @property
  101. @abstractmethod
  102. @needs_rsrc
  103. def headers(self):
  104. '''
  105. Return a dict with information for generating HTTP headers.
  106. @retun dict
  107. '''
  108. pass
  109. ## PUBLIC METHODS ##
  110. def query(self, q, initBindings=None, nsc=nsc):
  111. '''
  112. Perform a SPARQL query on the triplestore.
  113. This should provide non-abstract access, independent from the layout,
  114. therefore it should not be overridden by individual layouts.
  115. @param q (string) SPARQL query.
  116. @return rdflib.query.Result
  117. '''
  118. self._logger.debug('Sending SPARQL query: {}'.format(q))
  119. return self.ds.query(q, initBindings=initBindings, initNs=nsc)
  120. def update(self, q, initBindings=None, nsc=nsc):
  121. '''
  122. Perform a SPARQL update on the triplestore.
  123. This should provide non-abstract access, independent from the layout,
  124. therefore it should not be overridden by individual layouts.
  125. @param q (string) SPARQL-UPDATE query.
  126. @return None
  127. '''
  128. self._logger.debug('Sending SPARQL update: {}'.format(q))
  129. return self.ds.query(q, initBindings=initBindings, initNs=nsc)
  130. ## INTERFACE METHODS ##
  131. # Implementers of custom layouts should look into these methods to
  132. # implement.
  133. @abstractmethod
  134. def extract_imr(self, uri=None, graph=None, inbound=False):
  135. '''
  136. Extract an in-memory resource based on the copy of a graph on a subject.
  137. @param uri (URIRef) Resource URI.
  138. @param graph (rdflib.term.URIRef | set(rdflib.graphURIRef)) The graph
  139. to extract from. This can be an URI for a single graph, or a list of
  140. graph URIs in which case an aggregate graph will be used.
  141. @param inbound (boolean) Whether to pull triples that have the resource
  142. URI as their object.
  143. '''
  144. pass
  145. @abstractmethod
  146. @needs_rsrc
  147. def out_rsrc(self, srv_mgd=True, inbound=False, embed_children=False):
  148. '''
  149. Graph obtained by querying the triplestore and adding any abstraction
  150. and filtering to make up a graph that can be used for read-only,
  151. API-facing results. Different layouts can implement this in very
  152. different ways, so it is an abstract method.
  153. @return rdflib.resource.Resource
  154. '''
  155. pass
  156. @abstractmethod
  157. def ask_rsrc_exists(self, uri=None):
  158. '''
  159. Ask if a resource exists (is stored) in the graph store.
  160. @param uri (rdflib.term.URIRef) If this is provided, this method
  161. will look for the specified resource. Otherwise, it will look for the
  162. default resource. If this latter is not specified, the result is False.
  163. @return boolean
  164. '''
  165. pass
  166. @abstractmethod
  167. @needs_rsrc
  168. def create_or_replace_rsrc(self, urn, data, commit=True):
  169. '''Create a resource graph in the main graph if it does not exist.
  170. If it exists, replace the existing one retaining the creation date.
  171. '''
  172. pass
  173. @abstractmethod
  174. @needs_rsrc
  175. def create_rsrc(self, urn, data, commit=True):
  176. '''Create a resource graph in the main graph.
  177. If the resource exists, raise an exception.
  178. '''
  179. pass
  180. @abstractmethod
  181. @needs_rsrc
  182. def patch_rsrc(self, urn, data, commit=False):
  183. '''
  184. Perform a SPARQL UPDATE on a resource.
  185. '''
  186. pass
  187. @abstractmethod
  188. @needs_rsrc
  189. def delete_rsrc(self, urn, commit=True):
  190. pass