123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- import logging
- from abc import ABCMeta, abstractmethod
- from rdflib import Dataset, Graph
- from rdflib.query import ResultException
- from rdflib.resource import Resource
- from rdflib.term import URIRef
- from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
- from lakesuperior.config_parser import config
- from lakesuperior.dictionaries.namespaces import ns_collection as nsc
- from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
- from lakesuperior.exceptions import ResourceNotExistsError
- #def needs_rsrc(fn):
- # '''
- # Decorator for methods that cannot be called without `self.rsrc` set.
- # '''
- # def wrapper(self, *args, **kwargs):
- # if not hasattr(self, 'rsrc') or self.rsrc is None:
- # raise TypeError(
- # 'This method must be called by an instance with `rsrc` set.')
- #
- # return fn(self, *args, **kwargs)
- #
- # return wrapper
- class BaseRdfLayout(metaclass=ABCMeta):
- '''
- This class exposes an interface to build graph store layouts. It also
- provides the baics of the triplestore connection.
- Some store layouts are provided. New ones aimed at specific uses
- and optimizations of the repository may be developed by extending this
- class and implementing all its abstract methods.
- A layout is implemented via application configuration. However, once
- contents are ingested in a repository, changing a layout will most likely
- require a migration.
- The custom layout must be in the lakesuperior.store_layouts.rdf
- package and the class implementing the layout must be called
- `StoreLayout`. The module name is the one defined in the app
- configuration.
- E.g. if the configuration indicates `simple_layout` the application will
- look for
- `lakesuperior.store_layouts.rdf.simple_layout.SimpleLayout`.
- Some method naming conventions:
- - Methods starting with `get_` return a resource.
- - Methods starting with `list_` return an iterable or generator of URIs.
- - Methods starting with `select_` return an iterable or generator with
- table-like data such as from a SELECT statement.
- - Methods starting with `ask_` return a boolean value.
- '''
- ROOT_NODE_URN = nsc['fcsystem'].root
- # N.B. This is Fuseki-specific.
- UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
- RES_CREATED = '_created_'
- RES_UPDATED = '_updated_'
- _conf = config['application']['store']['ldp_rs']
- _logger = logging.getLogger(__name__)
- query_ep = _conf['webroot'] + _conf['query_ep']
- update_ep = _conf['webroot'] + _conf['update_ep']
- ## MAGIC METHODS ##
- def __init__(self):
- '''Initialize the graph store and a layout.
- NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
- for Graph Store HTTP protocol
- (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
- this only in the (currently) unreleased 2.2 branch. It works with Jena,
- but other considerations would have to be made (e.g. Jena has no REST
- API for handling transactions).
- '''
- self.ds = Dataset(self.store, default_union=True)
- self.ds.namespace_manager = nsm
- @property
- def store(self):
- if not hasattr(self, '_store') or not self._store:
- self._store = SPARQLUpdateStore(
- queryEndpoint=self.query_ep,
- update_endpoint=self.update_ep,
- autocommit=False,
- dirty_reads=True)
- return self._store
- def rsrc(self, urn):
- '''
- Reference to a live data set that can be updated. This exposes the
- whole underlying triplestore structure and is used to update a
- resource.
- '''
- return self.ds.resource(urn)
- def out_rsrc(self, urn):
- '''
- Graph obtained by querying the triplestore and adding any abstraction
- and filtering to make up a graph that can be used for read-only,
- API-facing results. Different layouts can implement this in very
- different ways, so it is an abstract method.
- @return rdflib.resource.Resource
- '''
- imr = self.extract_imr(urn)
- if not len(imr.graph):
- raise ResourceNotExistsError
- ## PUBLIC METHODS ##
- def query(self, q, initBindings=None, nsc=nsc):
- '''
- Perform a SPARQL query on the triplestore.
- This should provide non-abstract access, independent from the layout,
- therefore it should not be overridden by individual layouts.
- @param q (string) SPARQL query.
- @return rdflib.query.Result
- '''
- self._logger.debug('Sending SPARQL query: {}'.format(q))
- return self.ds.query(q, initBindings=initBindings, initNs=nsc)
- def update(self, q, initBindings=None, nsc=nsc):
- '''
- Perform a SPARQL update on the triplestore.
- This should provide low-level access, independent from the layout,
- therefore it should not be overridden by individual layouts.
- @param q (string) SPARQL-UPDATE query.
- @return None
- '''
- self._logger.debug('Sending SPARQL update: {}'.format(q))
- return self.ds.query(q, initBindings=initBindings, initNs=nsc)
- def create_or_replace_rsrc(self, imr):
- '''Create a resource graph in the main graph if it does not exist.
- If it exists, replace the existing one retaining the creation date.
- '''
- if self.ask_rsrc_exists(imr.identifier):
- self._logger.info(
- 'Resource {} exists. Removing all outbound triples.'
- .format(imr.identifier))
- return self.replace_rsrc(imr)
- else:
- return self.create_rsrc(imr)
- ## INTERFACE METHODS ##
- # Implementers of custom layouts should look into these methods to
- # implement.
- @abstractmethod
- def extract_imr(self, uri, graph=None, minimal=False,
- incl_inbound=False, embed_children=False, incl_srv_mgd=True):
- '''
- Extract an in-memory resource based on the copy of a graph on a subject.
- @param uri (URIRef) Resource URI.
- @param graph (rdflib.term.URIRef | set(rdflib.graphURIRef)) The graph
- to extract from. This can be an URI for a single graph, or a list of
- graph URIs in which case an aggregate graph will be used.
- @param inbound (boolean) Whether to pull triples that have the resource
- URI as their object.
- '''
- pass
- @abstractmethod
- def ask_rsrc_exists(self, urn):
- '''
- Ask if a resource is stored in the graph store.
- @param uri (rdflib.term.URIRef) The internal URN of the resource to be
- queried.
- @return boolean
- '''
- pass
- @abstractmethod
- def create_rsrc(self, imr):
- '''Create a resource graph in the main graph.
- If the resource exists, raise an exception.
- '''
- pass
- @abstractmethod
- def replace_rsrc(self, imr):
- '''Replace a resource, i.e. delete all the triples and re-add the
- ones provided.
- @param g (rdflib.Graph) Graph to load. It must not contain
- `fcrepo:created` and `fcrepo:createdBy`.
- '''
- pass
- @abstractmethod
- def modify_dataset(self, remove_trp, add_trp):
- '''
- Adds and/or removes triples from the graph.
- NOTE: This is not specific to a resource. The LDP layer is responsible
- for checking that all the +/- triples are referring to the intended
- subject(s).
- @param remove (rdflib.Graph) Triples to be removed.
- @param add (rdflib.Graph) Triples to be added.
- '''
- pass
- @abstractmethod
- def delete_rsrc(self, urn, inbound=True):
- pass
- ## PROTECTED METHODS ##
- def _set_msg_digest(self):
- '''
- Add a message digest to the current resource.
- '''
- cksum = Digest.rdf_cksum(self.rsrc.graph)
- self.rsrc.set(nsc['premis'].hasMessageDigest,
- URIRef('urn:sha1:{}'.format(cksum)))
|