浏览代码

Merge RDF store connector into base layout; change extract_graph to extract_rsrc.

Stefano Cossu 7 年之前
父节点
当前提交
a71371cadb

+ 1 - 1
etc.skeleton/application.yml

@@ -23,7 +23,7 @@ store:
     # MUST support SPARQL 1.1 query and update.
     # MUST support SPARQL 1.1 query and update.
     ldp_rs:
     ldp_rs:
         # Store layout. This corresponds to a sub-class of the
         # Store layout. This corresponds to a sub-class of the
-        # `lakesuperior.connectors.graph_store_connector.BaseGraphStoreConnector`.
+        # `lakesuperior.store_layouts.rdf.base_rdf_layout/BaseRdfLayout`.
         layout: simple_layout
         layout: simple_layout
         webroot: http://localhost:9999/namespace/fcrepo/
         webroot: http://localhost:9999/namespace/fcrepo/
         query_ep: sparql
         query_ep: sparql

+ 0 - 77
lakesuperior/connectors/graph_store_connector.py

@@ -1,77 +0,0 @@
-import logging
-import uuid
-
-from flask import request
-from rdflib import Dataset
-from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
-from rdflib.term import URIRef
-
-from lakesuperior.config_parser import config
-from lakesuperior.core.namespaces import ns_collection as nsc
-from lakesuperior.core.namespaces import ns_mgr as nsm
-
-
-class GraphStoreConnector:
-    '''Connector for LDP-RS (RDF Source) resources. Connects to a
-    triplestore.
-    '''
-
-    _conf = config['application']['store']['ldp_rs']
-    _logger = logging.getLogger(__module__)
-
-    query_ep = _conf['webroot'] + _conf['query_ep']
-    update_ep = _conf['webroot'] + _conf['update_ep']
-
-
-    ## MAGIC METHODS ##
-
-    @property
-    def store(self):
-        if not hasattr(self, '_store') or not self._store:
-            self._store = SPARQLUpdateStore(
-                    queryEndpoint=self.query_ep,
-                    update_endpoint=self.update_ep,
-                    autocommit=False,
-                    dirty_reads=True)
-
-        return self._store
-
-
-    def __init__(self):
-        '''Initialize the graph store.
-
-        NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
-        for Graph Store HTTP protocol
-        (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
-        this only in the (currently) unreleased 2.2 branch. It works with Jena,
-        but other considerations would have to be made (e.g. Jena has no REST
-        API for handling transactions).
-
-        In a more advanced development phase it could be possible to extend the
-        SPARQLUpdateStore class to add non-standard interaction with specific
-        SPARQL implementations in order to support ACID features provided
-        by them; e.g. Blazegraph's RESTful transaction handling methods.
-        '''
-        self.ds = Dataset(self.store, default_union=True)
-        self.ds.namespace_manager = nsm
-
-
-    #def __del__(self):
-    #    '''Commit pending transactions and close connection.'''
-    #    self.store.close(True)
-
-
-    ## PUBLIC METHODS ##
-
-    def query(self, q, initBindings=None, nsc=nsc):
-        '''
-        Perform a custom query on the triplestore.
-
-        @param q (string) SPARQL query.
-
-        @return rdflib.query.Result
-        '''
-        self._logger.debug('Querying SPARQL endpoint: {}'.format(q))
-        return self.ds.query(q, initBindings=initBindings, initNs=nsc)
-
-

+ 5 - 8
lakesuperior/model/ldpr.py

@@ -10,7 +10,6 @@ import arrow
 from rdflib import Graph
 from rdflib import Graph
 from rdflib.resource import Resource
 from rdflib.resource import Resource
 from rdflib.namespace import RDF, XSD
 from rdflib.namespace import RDF, XSD
-from rdflib.query import ResultException
 from rdflib.term import Literal
 from rdflib.term import Literal
 
 
 from lakesuperior.config_parser import config
 from lakesuperior.config_parser import config
@@ -60,11 +59,11 @@ def transactional(fn):
         try:
         try:
             ret = fn(self, *args, **kwargs)
             ret = fn(self, *args, **kwargs)
             print('Committing transaction.')
             print('Committing transaction.')
-            self.rdfly.conn.store.commit()
+            self.rdfly.store.commit()
             return ret
             return ret
         except:
         except:
             print('Rolling back transaction.')
             print('Rolling back transaction.')
-            self.rdfly.conn.store.rollback()
+            self.rdfly.store.rollback()
             raise
             raise
 
 
     return wrapper
     return wrapper
@@ -374,13 +373,11 @@ class Ldpr(metaclass=ABCMeta):
         '''
         '''
         https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
         https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
         '''
         '''
-        try:
-            g = self.rdfly.out_graph(inbound)
-        except ResultException:
-            # RDFlib bug? https://github.com/RDFLib/rdflib/issues/775
+        im_rsrc = self.rdfly.out_rsrc(inbound)
+        if not len(im_rsrc.graph):
             raise ResourceNotExistsError()
             raise ResourceNotExistsError()
 
 
-        return Translator.globalize_rsrc(g)
+        return Translator.globalize_rsrc(im_rsrc)
 
 
 
 
     @transactional
     @transactional

+ 113 - 10
lakesuperior/store_layouts/rdf/base_rdf_layout.py

@@ -2,12 +2,13 @@ import logging
 
 
 from abc import ABCMeta, abstractmethod
 from abc import ABCMeta, abstractmethod
 
 
-from flask import request
-from rdflib import Graph
+from rdflib import Dataset, Graph
+from rdflib.query import ResultException
 from rdflib.resource import Resource
 from rdflib.resource import Resource
 from rdflib.term import URIRef
 from rdflib.term import URIRef
+from rdflib.plugins.stores.sparqlstore import SPARQLUpdateStore
 
 
-from lakesuperior.connectors.graph_store_connector import GraphStoreConnector
+from lakesuperior.config_parser import config
 from lakesuperior.core.namespaces import ns_collection as nsc
 from lakesuperior.core.namespaces import ns_collection as nsc
 from lakesuperior.core.namespaces import ns_mgr as nsm
 from lakesuperior.core.namespaces import ns_mgr as nsm
 
 
@@ -29,7 +30,8 @@ def needs_rsrc(fn):
 
 
 class BaseRdfLayout(metaclass=ABCMeta):
 class BaseRdfLayout(metaclass=ABCMeta):
     '''
     '''
-    This class exposes an interface to build graph store layouts.
+    This class exposes an interface to build graph store layouts. It also
+    provides the baics of the triplestore connection.
 
 
     Some store layouts are provided. New ones aimed at specific uses
     Some store layouts are provided. New ones aimed at specific uses
     and optimizations of the repository may be developed by extending this
     and optimizations of the repository may be developed by extending this
@@ -60,23 +62,52 @@ class BaseRdfLayout(metaclass=ABCMeta):
     # N.B. This is Fuseki-specific.
     # N.B. This is Fuseki-specific.
     UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
     UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
 
 
+    _conf = config['application']['store']['ldp_rs']
     _logger = logging.getLogger(__module__)
     _logger = logging.getLogger(__module__)
 
 
+    query_ep = _conf['webroot'] + _conf['query_ep']
+    update_ep = _conf['webroot'] + _conf['update_ep']
+
 
 
     ## MAGIC METHODS ##
     ## MAGIC METHODS ##
 
 
     def __init__(self, urn=None):
     def __init__(self, urn=None):
-        '''
+        '''Initialize the graph store and a layout.
+
+        NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
+        for Graph Store HTTP protocol
+        (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
+        this only in the (currently) unreleased 2.2 branch. It works with Jena,
+        but other considerations would have to be made (e.g. Jena has no REST
+        API for handling transactions).
+
+        In a more advanced development phase it could be possible to extend the
+        SPARQLUpdateStore class to add non-standard interaction with specific
+        SPARQL implementations in order to support ACID features provided
+        by them; e.g. Blazegraph's RESTful transaction handling methods.
+
         The layout can be initialized with a URN to make resource-centric
         The layout can be initialized with a URN to make resource-centric
         operations simpler. However, for generic queries, urn can be None and
         operations simpler. However, for generic queries, urn can be None and
-        no `self.rsrc` is assigned. In this case, some methods will not be
-        available.
+        no `self.rsrc` is assigned. In this case, some methods (the ones
+        decorated by `@needs_rsrc`) will not be available.
         '''
         '''
-        self.conn = GraphStoreConnector()
-        self.ds = self.conn.ds
+        self.ds = Dataset(self.store, default_union=True)
+        self.ds.namespace_manager = nsm
         self._base_urn = urn
         self._base_urn = urn
 
 
 
 
+    @property
+    def store(self):
+        if not hasattr(self, '_store') or not self._store:
+            self._store = SPARQLUpdateStore(
+                    queryEndpoint=self.query_ep,
+                    update_endpoint=self.update_ep,
+                    autocommit=False,
+                    dirty_reads=True)
+
+        return self._store
+
+
     @property
     @property
     def base_urn(self):
     def base_urn(self):
         '''
         '''
@@ -114,15 +145,87 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
 
     ## PUBLIC METHODS ##
     ## PUBLIC METHODS ##
 
 
+    def query(self, q, initBindings=None, nsc=nsc):
+        '''
+        Perform a SPARQL query on the triplestore.
+
+        This should provide non-abstract access, independent from the layout,
+        therefore it should not be overridden by individual layouts.
+
+        @param q (string) SPARQL query.
+
+        @return rdflib.query.Result
+        '''
+        self._logger.debug('Sending SPARQL query: {}'.format(q))
+        return self.ds.query(q, initBindings=initBindings, initNs=nsc)
+
+
+    def update(self, q, initBindings=None, nsc=nsc):
+        '''
+        Perform a SPARQL update on the triplestore.
+
+        This should provide non-abstract access, independent from the layout,
+        therefore it should not be overridden by individual layouts.
+
+        @param q (string) SPARQL-UPDATE query.
+
+        @return None
+        '''
+        self._logger.debug('Sending SPARQL update: {}'.format(q))
+        return self.ds.query(q, initBindings=initBindings, initNs=nsc)
+
+
+    def extract_rsrc(self, uri=None, graph=None, inbound=False):
+        '''
+        Extract an in-memory resource based on the copy of a graph on a subject.
+
+        @param uri (URIRef) Resource URI.
+        @param graph (rdflib.term.URIRef | set(rdflib.graphURIRef)) The graph
+        to extract from. This can be an URI for a single graph, or a list of
+        graph URIs in which case an aggregate graph will be used.
+        @param inbound (boolean) Whether to pull triples that have the resource
+        URI as their object.
+        '''
+        uri = uri or self.base_urn
+
+        inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
+                if inbound else ''
+
+        q = '''
+        CONSTRUCT {{
+            {0} ?p ?o .{1}
+        }} WHERE {{
+            {0} ?p ?o .{1}
+            FILTER (?p != premis:hasMessageDigest) .
+        }}
+        '''.format(uri.n3(), inbound_qry)
+
+        try:
+            qres = self.query(q)
+        except ResultException:
+            # RDFlib bug? https://github.com/RDFLib/rdflib/issues/775
+            g = Graph()
+        else:
+            g = qres.graph
+
+        return Resource(g, uri)
+
+
+    ## INTERFACE METHODS ##
+
+    # Implementers of custom layouts should look into these methods to
+    # implement.
 
 
     @abstractmethod
     @abstractmethod
     @needs_rsrc
     @needs_rsrc
-    def out_graph(self, srv_mgd=True, inbound=False, embed_children=False):
+    def out_rsrc(self, srv_mgd=True, inbound=False, embed_children=False):
         '''
         '''
         Graph obtained by querying the triplestore and adding any abstraction
         Graph obtained by querying the triplestore and adding any abstraction
         and filtering to make up a graph that can be used for read-only,
         and filtering to make up a graph that can be used for read-only,
         API-facing results. Different layouts can implement this in very
         API-facing results. Different layouts can implement this in very
         different ways, so it is an abstract method.
         different ways, so it is an abstract method.
+
+        @return rdflib.resource.Resource
         '''
         '''
         pass
         pass
 
 

+ 5 - 14
lakesuperior/store_layouts/rdf/simple_layout.py

@@ -49,24 +49,15 @@ class SimpleLayout(BaseRdfLayout):
         return headers
         return headers
 
 
 
 
-    def out_graph(self, srv_mgd=True, inbound=False, embed_children=False):
+    def out_rsrc(self, srv_mgd=True, inbound=False, embed_children=False):
         '''
         '''
-        See base_rdf_layout.out_graph.
+        See base_rdf_layout.out_rsrc.
         '''
         '''
-        inbound_qry = '\n?s1 ?p1 {}'.format(self.base_urn.n3()) \
-                if inbound else ''
-        q = '''
-        CONSTRUCT {{
-            {0} ?p ?o .{1}
-        }} WHERE {{
-            {0} ?p ?o .{1}
-            FILTER (?p != premis:hasMessageDigest) .
-        }}
-        '''.format(self.base_urn.n3(), inbound_qry)
+        im_rsrc = self.extract_rsrc(inbound=inbound)
 
 
-        qres = self.rsrc.graph.query(q)
+        im_rsrc.remove(nsc['premis'].hasMessageDigest)
 
 
-        return Resource(qres.graph, self.base_urn)
+        return im_rsrc
 
 
 
 
     def ask_rsrc_exists(self, rsrc=None):
     def ask_rsrc_exists(self, rsrc=None):