Преглед на файлове

Merge RDF store connector into base layout; change extract_graph to extract_rsrc.

Stefano Cossu преди 7 години
родител
ревизия
a71371cadb

+ 1 - 1
etc.skeleton/application.yml

@@ -23,7 +23,7 @@ store:
     # MUST support SPARQL 1.1 query and update.
     ldp_rs:
         # Store layout. This corresponds to a sub-class of the
-        # `lakesuperior.connectors.graph_store_connector.BaseGraphStoreConnector`.
+        # `lakesuperior.store_layouts.rdf.base_rdf_layout/BaseRdfLayout`.
         layout: simple_layout
         webroot: http://localhost:9999/namespace/fcrepo/
         query_ep: sparql

+ 0 - 77
lakesuperior/connectors/graph_store_connector.py

@@ -1,77 +0,0 @@
-import logging
-import uuid
-
-from flask import request
-from rdflib import Dataset
-from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
-from rdflib.term import URIRef
-
-from lakesuperior.config_parser import config
-from lakesuperior.core.namespaces import ns_collection as nsc
-from lakesuperior.core.namespaces import ns_mgr as nsm
-
-
-class GraphStoreConnector:
-    '''Connector for LDP-RS (RDF Source) resources. Connects to a
-    triplestore.
-    '''
-
-    _conf = config['application']['store']['ldp_rs']
-    _logger = logging.getLogger(__module__)
-
-    query_ep = _conf['webroot'] + _conf['query_ep']
-    update_ep = _conf['webroot'] + _conf['update_ep']
-
-
-    ## MAGIC METHODS ##
-
-    @property
-    def store(self):
-        if not hasattr(self, '_store') or not self._store:
-            self._store = SPARQLUpdateStore(
-                    queryEndpoint=self.query_ep,
-                    update_endpoint=self.update_ep,
-                    autocommit=False,
-                    dirty_reads=True)
-
-        return self._store
-
-
-    def __init__(self):
-        '''Initialize the graph store.
-
-        NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
-        for Graph Store HTTP protocol
-        (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
-        this only in the (currently) unreleased 2.2 branch. It works with Jena,
-        but other considerations would have to be made (e.g. Jena has no REST
-        API for handling transactions).
-
-        In a more advanced development phase it could be possible to extend the
-        SPARQLUpdateStore class to add non-standard interaction with specific
-        SPARQL implementations in order to support ACID features provided
-        by them; e.g. Blazegraph's RESTful transaction handling methods.
-        '''
-        self.ds = Dataset(self.store, default_union=True)
-        self.ds.namespace_manager = nsm
-
-
-    #def __del__(self):
-    #    '''Commit pending transactions and close connection.'''
-    #    self.store.close(True)
-
-
-    ## PUBLIC METHODS ##
-
-    def query(self, q, initBindings=None, nsc=nsc):
-        '''
-        Perform a custom query on the triplestore.
-
-        @param q (string) SPARQL query.
-
-        @return rdflib.query.Result
-        '''
-        self._logger.debug('Querying SPARQL endpoint: {}'.format(q))
-        return self.ds.query(q, initBindings=initBindings, initNs=nsc)
-
-

+ 5 - 8
lakesuperior/model/ldpr.py

@@ -10,7 +10,6 @@ import arrow
 from rdflib import Graph
 from rdflib.resource import Resource
 from rdflib.namespace import RDF, XSD
-from rdflib.query import ResultException
 from rdflib.term import Literal
 
 from lakesuperior.config_parser import config
@@ -60,11 +59,11 @@ def transactional(fn):
         try:
             ret = fn(self, *args, **kwargs)
             print('Committing transaction.')
-            self.rdfly.conn.store.commit()
+            self.rdfly.store.commit()
             return ret
         except:
             print('Rolling back transaction.')
-            self.rdfly.conn.store.rollback()
+            self.rdfly.store.rollback()
             raise
 
     return wrapper
@@ -374,13 +373,11 @@ class Ldpr(metaclass=ABCMeta):
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
         '''
-        try:
-            g = self.rdfly.out_graph(inbound)
-        except ResultException:
-            # RDFlib bug? https://github.com/RDFLib/rdflib/issues/775
+        im_rsrc = self.rdfly.out_rsrc(inbound)
+        if not len(im_rsrc.graph):
             raise ResourceNotExistsError()
 
-        return Translator.globalize_rsrc(g)
+        return Translator.globalize_rsrc(im_rsrc)
 
 
     @transactional

+ 113 - 10
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -2,12 +2,13 @@ import logging
 
 from abc import ABCMeta, abstractmethod
 
-from flask import request
-from rdflib import Graph
+from rdflib import Dataset, Graph
+from rdflib.query import ResultException
 from rdflib.resource import Resource
 from rdflib.term import URIRef
+from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
 
-from lakesuperior.connectors.graph_store_connector import GraphStoreConnector
+from lakesuperior.config_parser import config
 from lakesuperior.core.namespaces import ns_collection as nsc
 from lakesuperior.core.namespaces import ns_mgr as nsm
 
@@ -29,7 +30,8 @@ def needs_rsrc(fn):
 
 class BaseRdfLayout(metaclass=ABCMeta):
     '''
-    This class exposes an interface to build graph store layouts.
+    This class exposes an interface to build graph store layouts. It also
+    provides the baics of the triplestore connection.
 
     Some store layouts are provided. New ones aimed at specific uses
     and optimizations of the repository may be developed by extending this
@@ -60,23 +62,52 @@ class BaseRdfLayout(metaclass=ABCMeta):
     # N.B. This is Fuseki-specific.
     UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
 
+    _conf = config['application']['store']['ldp_rs']
     _logger = logging.getLogger(__module__)
 
+    query_ep = _conf['webroot'] + _conf['query_ep']
+    update_ep = _conf['webroot'] + _conf['update_ep']
+
 
     ## MAGIC METHODS ##
 
     def __init__(self, urn=None):
-        '''
+        '''Initialize the graph store and a layout.
+
+        NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
+        for Graph Store HTTP protocol
+        (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
+        this only in the (currently) unreleased 2.2 branch. It works with Jena,
+        but other considerations would have to be made (e.g. Jena has no REST
+        API for handling transactions).
+
+        In a more advanced development phase it could be possible to extend the
+        SPARQLUpdateStore class to add non-standard interaction with specific
+        SPARQL implementations in order to support ACID features provided
+        by them; e.g. Blazegraph's RESTful transaction handling methods.
+
         The layout can be initialized with a URN to make resource-centric
         operations simpler. However, for generic queries, urn can be None and
-        no `self.rsrc` is assigned. In this case, some methods will not be
-        available.
+        no `self.rsrc` is assigned. In this case, some methods (the ones
+        decorated by `@needs_rsrc`) will not be available.
         '''
-        self.conn = GraphStoreConnector()
-        self.ds = self.conn.ds
+        self.ds = Dataset(self.store, default_union=True)
+        self.ds.namespace_manager = nsm
         self._base_urn = urn
 
 
+    @property
+    def store(self):
+        if not hasattr(self, '_store') or not self._store:
+            self._store = SPARQLUpdateStore(
+                    queryEndpoint=self.query_ep,
+                    update_endpoint=self.update_ep,
+                    autocommit=False,
+                    dirty_reads=True)
+
+        return self._store
+
+
     @property
     def base_urn(self):
         '''
@@ -114,15 +145,87 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
     ## PUBLIC METHODS ##
 
+    def query(self, q, initBindings=None, nsc=nsc):
+        '''
+        Perform a SPARQL query on the triplestore.
+
+        This should provide non-abstract access, independent from the layout,
+        therefore it should not be overridden by individual layouts.
+
+        @param q (string) SPARQL query.
+
+        @return rdflib.query.Result
+        '''
+        self._logger.debug('Sending SPARQL query: {}'.format(q))
+        return self.ds.query(q, initBindings=initBindings, initNs=nsc)
+
+
+    def update(self, q, initBindings=None, nsc=nsc):
+        '''
+        Perform a SPARQL update on the triplestore.
+
+        This should provide non-abstract access, independent from the layout,
+        therefore it should not be overridden by individual layouts.
+
+        @param q (string) SPARQL-UPDATE query.
+
+        @return None
+        '''
+        self._logger.debug('Sending SPARQL update: {}'.format(q))
+        return self.ds.query(q, initBindings=initBindings, initNs=nsc)
+
+
+    def extract_rsrc(self, uri=None, graph=None, inbound=False):
+        '''
+        Extract an in-memory resource based on the copy of a graph on a subject.
+
+        @param uri (URIRef) Resource URI.
+        @param graph (rdflib.term.URIRef | set(rdflib.graphURIRef)) The graph
+        to extract from. This can be an URI for a single graph, or a list of
+        graph URIs in which case an aggregate graph will be used.
+        @param inbound (boolean) Whether to pull triples that have the resource
+        URI as their object.
+        '''
+        uri = uri or self.base_urn
+
+        inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
+                if inbound else ''
+
+        q = '''
+        CONSTRUCT {{
+            {0} ?p ?o .{1}
+        }} WHERE {{
+            {0} ?p ?o .{1}
+            FILTER (?p != premis:hasMessageDigest) .
+        }}
+        '''.format(uri.n3(), inbound_qry)
+
+        try:
+            qres = self.query(q)
+        except ResultException:
+            # RDFlib bug? https://github.com/RDFLib/rdflib/issues/775
+            g = Graph()
+        else:
+            g = qres.graph
+
+        return Resource(g, uri)
+
+
+    ## INTERFACE METHODS ##
+
+    # Implementers of custom layouts should look into these methods to
+    # implement.
 
     @abstractmethod
     @needs_rsrc
-    def out_graph(self, srv_mgd=True, inbound=False, embed_children=False):
+    def out_rsrc(self, srv_mgd=True, inbound=False, embed_children=False):
         '''
         Graph obtained by querying the triplestore and adding any abstraction
         and filtering to make up a graph that can be used for read-only,
         API-facing results. Different layouts can implement this in very
         different ways, so it is an abstract method.
+
+        @return rdflib.resource.Resource
         '''
         pass
 

+ 5 - 14
lakesuperior/store_layouts/rdf/simple_layout.py

@@ -49,24 +49,15 @@ class SimpleLayout(BaseRdfLayout):
         return headers
 
 
-    def out_graph(self, srv_mgd=True, inbound=False, embed_children=False):
+    def out_rsrc(self, srv_mgd=True, inbound=False, embed_children=False):
         '''
-        See base_rdf_layout.out_graph.
+        See base_rdf_layout.out_rsrc.
         '''
-        inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
-                if inbound else ''
-        q = '''
-        CONSTRUCT {{
-            {0} ?p ?o .{1}
-        }} WHERE {{
-            {0} ?p ?o .{1}
-            FILTER (?p != premis:hasMessageDigest) .
-        }}
-        '''.format(self.base_urn.n3(), inbound_qry)
+        im_rsrc = self.extract_rsrc(inbound=inbound)
 
-        qres = self.rsrc.graph.query(q)
+        im_rsrc.remove(nsc['premis'].hasMessageDigest)
 
-        return Resource(qres.graph, self.base_urn)
+        return im_rsrc
 
 
     def ask_rsrc_exists(self, rsrc=None):