Browse Source

Connector and layout overhaul: configurable back ends (SPARQL + BerkeleyDB)

Stefano Cossu 6 years ago
parent
commit
544dc691e9

+ 2 - 0
conftest.py

@@ -35,6 +35,8 @@ def db(app):
     for g in db.ds.graphs():
         db.ds.remove_graph(g)
 
+    db.ds.store.commit()
+
 
 @pytest.fixture
 def rnd_img():

+ 24 - 17
etc.skeleton/application.yml

@@ -22,30 +22,37 @@ store:
     # The semantic store used for persisting LDP-RS (RDF Source) resources.
     # MUST support SPARQL 1.1 query and update.
     ldp_rs:
-        # Store layout. This corresponds to a sub-class of the
-        # `lakesuperior.store_layouts.rdf.base_rdf_layout/BaseRdfLayout`.
+        # Connector class. This corresponds to a sub-class of the
+        # `lakesuperior.store_layouts.rdf.base_connector.BaseConnector`.
+        connector:
+            module: sparql_connector
+            options:
+                # `location` must be consistent for all connector types. Other
+                # options may vary.
+                location: http://localhost:3030/fcrepo/
+                query_ep: query
+                update_ep: update
+                # Optional
+                #username: <set me>
+                #password: <set me>
+                #ssl_verify: false
+
+        # store layout. this corresponds to a sub-class of the
+        # `lakesuperior.store_layouts.rdf.base_rdf_layout/baserdflayout`.
         layout: default_layout
-        # Whether to check if the object of a client-provided triple is the URI
+        # whether to check if the object of a client-provided triple is the uri
         # of a repository-managed resource and veify if that exists.
-        # If set to false, properties are allowed to point to resources in the
-        # repositoy that do not exist. Also, if a resource is deleted, inbound
+        # if set to false, properties are allowed to point to resources in the
+        # repositoy that do not exist. also, if a resource is deleted, inbound
         # relationships may not be cleaned up.
-        # This can be one of `False` (boolean), `lenient` or `strict`. `False`
+        # this can be one of `false` (boolean), `lenient` or `strict`. `false`
         # does not check for referential integrity. `lenient` quietly drops a
         # user-provided triple if its object violates referential integrity.
         # `strict` raises an exception.
         referential_integrity: lenient
-        webroot: http://localhost:3030/fcrepo
-        query_ep: query
-        update_ep: update
-        # Optional
-        #username: <set me>
-        #password: <set me>
-        #ssl_verify: false
-
-        # This mimics Fedora4 behavior which segments an identifier on POST.
-        # Hyrax will break if this is off.
-        legacy_ptree_split: False
+        # this mimics fedora4 behavior which segments an identifier on post.
+        # hyrax will break if this is off.
+        legacy_ptree_split: false
 
     # The path used to persist LDP-NR (bitstreams).
     # This is for now a POSIX filesystem. Other solutions such as HDFS may be

+ 9 - 7
etc.skeleton/test.yml

@@ -4,13 +4,15 @@
 
 store:
     ldp_rs:
-        webroot: http://localhost:9999/namespace/fcrepo_test/
-        query_ep: sparql
-        update_ep: sparql
-        # Optional
-        #username: <set me>
-        #password: <set me>
-        #ssl_verify: false
+        connector:
+            options:
+                location: http://localhost:3030/fcrepo-test/
+                query_ep: query
+                update_ep: update
+                # Optional
+                #username: <set me>
+                #password: <set me>
+                #ssl_verify: false
     ldp_nr:
         path: /tmp/fcrepo_test/ldpnr_store
 

+ 24 - 11
lakesuperior/app.py

@@ -44,17 +44,30 @@ def create_app(app_conf, logging_conf):
     })
     app.register_blueprint(query, url_prefix='/query')
 
-    # Initialize RDF and file store.
-    def load_layout(type):
-        layout_cls = app_conf['store'][type]['layout']
-        store_mod = import_module('lakesuperior.store_layouts.{0}.{1}'.format(
-                type, layout_cls))
-        layout_cls = getattr(store_mod, camelcase(layout_cls))
-
-        return layout_cls(app_conf['store'][type])
-
-    app.rdfly = load_layout('ldp_rs')
-    app.nonrdfly = load_layout('ldp_nr')
+    # Initialize RDF store connector.
+    conn_mod_name = app_conf['store']['ldp_rs']['connector']['module']
+    conn_mod = import_module('lakesuperior.store_layouts.ldp_rs.{}'.format(
+            conn_mod_name))
+    conn_cls = getattr(conn_mod, camelcase(conn_mod_name))
+    rdf_store_conn = conn_cls(
+            **app_conf['store']['ldp_rs']['connector']['options'])
+    logger.info('RDF store: {}'.format(conn_mod_name))
+
+    # Initialize RDF layout.
+    rdfly_mod_name = app_conf['store']['ldp_rs']['layout']
+    rdfly_mod = import_module('lakesuperior.store_layouts.ldp_rs.{}'.format(
+            rdfly_mod_name))
+    rdfly_cls = getattr(rdfly_mod, camelcase(rdfly_mod_name))
+    app.rdfly = rdfly_cls(rdf_store_conn, app_conf['store']['ldp_rs'])
+    logger.info('RDF layout: {}'.format(rdfly_mod_name))
+
+    # Initialize file layout.
+    nonrdfly_mod_name = app_conf['store']['ldp_nr']['layout']
+    nonrdfly_mod = import_module('lakesuperior.store_layouts.ldp_nr.{}'.format(
+            nonrdfly_mod_name))
+    nonrdfly_cls = getattr(nonrdfly_mod, camelcase(nonrdfly_mod_name))
+    app.nonrdfly = nonrdfly_cls(app_conf['store']['ldp_nr'])
+    logger.info('Non-RDF layout: {}'.format(nonrdfly_mod_name))
 
     # Set up messaging.
     app.messenger = Messenger(app_conf['messaging'])

+ 2 - 2
lakesuperior/config_parser.py

@@ -40,8 +40,8 @@ Please review your configuration before starting.
 config['test'] = hiyapyco.load(CONFIG_DIR + '/application.yml',
         CONFIG_DIR + '/test.yml', method=hiyapyco.METHOD_MERGE)
 
-if config['application']['store']['ldp_rs']['webroot'] == \
-        config['test']['store']['ldp_rs']['webroot']:
+if config['application']['store']['ldp_rs']['connector']['options']['location'] \
+        == config['test']['store']['ldp_rs']['connector']['options']['location']:
             raise RuntimeError(error_msg.format('RDF'))
             sys.exit()
 

+ 7 - 7
lakesuperior/model/ldp_factory.py

@@ -37,13 +37,13 @@ class LdpFactory:
 
         @param uuid UUID of the instance.
         '''
-        __class__._logger.info('Retrieving stored resource: {}'.format(uuid))
+        #__class__._logger.info('Retrieving stored resource: {}'.format(uuid))
         imr_urn = nsc['fcres'][uuid] if uuid else (
                 model.ldpr.Ldpr.ROOT_NODE_URN)
 
         imr = current_app.rdfly.extract_imr(imr_urn, **repr_opts)
-        __class__._logger.debug('Extracted graph: {}'.format(
-                pformat(set(imr.graph))))
+        #__class__._logger.debug('Extracted graph: {}'.format(
+        #        pformat(set(imr.graph))))
         rdf_types = set(imr.graph.objects(imr.identifier, RDF.type))
 
         if __class__.LDP_NR_TYPE in rdf_types:
@@ -88,11 +88,11 @@ class LdpFactory:
             input_rdf = stream.read()
             provided_gr = Graph().parse(data=input_rdf,
                     format=mimetype, publicID=urn)
-            logger.debug('Provided graph: {}'.format(
-                    pformat(set(provided_gr))))
+            #logger.debug('Provided graph: {}'.format(
+            #        pformat(set(provided_gr))))
             local_gr = g.tbox.localize_graph(provided_gr)
-            logger.debug('Parsed local graph: {}'.format(
-                    pformat(set(local_gr))))
+            #logger.debug('Parsed local graph: {}'.format(
+            #        pformat(set(local_gr))))
             provided_imr = Resource(local_gr, urn)
 
             # Determine whether it is a basic, direct or indirect container.

+ 1 - 1
lakesuperior/model/ldp_rs.py

@@ -74,7 +74,7 @@ class LdpRs(Ldpr):
         with `BaseStoreLayout.update_resource` and/or recorded as separate
         events in a provenance tracking system.
         '''
-        self._logger.debug('Provided SPARQL query: {}'.format(q))
+        #self._logger.debug('Provided SPARQL query: {}'.format(q))
         pre_gr = self.imr.graph
 
         post_gr = deepcopy(pre_gr)

+ 8 - 8
lakesuperior/model/ldpr.py

@@ -41,7 +41,7 @@ def atomic(fn):
             self._logger.info('Committing transaction.')
             self.rdfly.store.commit()
             for ev in request.changelog:
-                self._logger.info('Message: {}'.format(pformat(ev)))
+                #self._logger.info('Message: {}'.format(pformat(ev)))
                 self._send_event_msg(*ev)
             return ret
 
@@ -172,7 +172,7 @@ class Ldpr(metaclass=ABCMeta):
         '''
         if not hasattr(self, '_imr'):
             if hasattr(self, '_imr_options'):
-                self._logger.debug('IMR options: {}'.format(self._imr_options))
+                #self._logger.debug('IMR options: {}'.format(self._imr_options))
                 imr_options = self._imr_options
             else:
                 imr_options = {}
@@ -194,7 +194,7 @@ class Ldpr(metaclass=ABCMeta):
         '''
         if not hasattr(self, '_imr'):
             if hasattr(self, '_imr_options'):
-                self._logger.debug('IMR options: {}'.format(self._imr_options))
+                #self._logger.debug('IMR options: {}'.format(self._imr_options))
                 imr_options = self._imr_options
             else:
                 imr_options = {}
@@ -801,8 +801,8 @@ class Ldpr(metaclass=ABCMeta):
                     self._logger.info('Removing offending type: {}'.format(t))
                     gr.remove((None, RDF.type, t))
 
-        self._logger.debug('Sanitized graph: {}'.format(gr.serialize(
-            format='turtle').decode('utf-8')))
+        #self._logger.debug('Sanitized graph: {}'.format(gr.serialize(
+        #    format='turtle').decode('utf-8')))
         return gr
 
 
@@ -946,7 +946,7 @@ class Ldpr(metaclass=ABCMeta):
         add_gr = Graph()
 
         self._logger.info('Checking direct or indirect containment.')
-        self._logger.debug('Parent predicates: {}'.format(cont_p))
+        #self._logger.debug('Parent predicates: {}'.format(cont_p))
 
         if self.MBR_RSRC_URI in cont_p and self.MBR_REL_URI in cont_p:
             s = g.tbox.localize_term(
@@ -971,8 +971,8 @@ class Ldpr(metaclass=ABCMeta):
 
         if len(add_gr):
             add_gr = self._check_mgd_terms(add_gr)
-            self._logger.debug('Adding DC/IC triples: {}'.format(
-                add_gr.serialize(format='turtle').decode('utf-8')))
+            #self._logger.debug('Adding DC/IC triples: {}'.format(
+            #    add_gr.serialize(format='turtle').decode('utf-8')))
             self._modify_rsrc(self.RES_UPDATED, add_trp=add_gr)
 
 

+ 14 - 20
lakesuperior/store_layouts/ldp_rs/graph_store_connector.py → lakesuperior/store_layouts/ldp_rs/base_connector.py

@@ -1,14 +1,13 @@
 import logging
 
-from rdflib import Dataset
+from abc import ABCMeta, abstractmethod
+
 from rdflib.term import URIRef
-from rdflib.plugins.stores.sparqlstore import SPARQLStore, SPARQLUpdateStore
-from SPARQLWrapper.Wrapper import POST
 
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 
 
-class GraphStoreConnector:
+class BaseConnector(metaclass=ABCMeta):
     '''
     Handles the connection and dataset information.
 
@@ -16,30 +15,25 @@ class GraphStoreConnector:
     be passed any configuration options.
     '''
 
-    # N.B. This is Fuseki-specific.
-    UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
+    UNION_GRAPH_URI = URIRef('urn:x-rdflib:default')
 
     _logger = logging.getLogger(__name__)
 
-    def __init__(self, query_ep, update_ep=None, autocommit=False):
+    def __init__(self, *args, **kwargs):
         '''
         Initialize the connection to the SPARQL endpoint.
 
         If `update_ep` is not specified, the store is initialized as read-only.
         '''
-        if update_ep:
-            self.store = SPARQLUpdateStore(
-                    queryEndpoint=query_ep,
-                    update_endpoint=update_ep,
-                    autocommit=autocommit,
-                    dirty_reads=not autocommit)
-
-            self.readonly = False
-        else:
-            self.store = SPARQLStore(query_ep, default_query_method=POST)
-            self.readonly = True
-
-        self.ds = Dataset(self.store, default_union=True)
+        self._init_connection(*args, **kwargs)
+
+
+    @abstractmethod
+    def _init_connection(self, *args, **kwargs):
+        '''
+        Interface method. Connection steps go here.
+        '''
+        pass
 
 
     def query(self, q, initBindings=None, nsc=nsc):

+ 5 - 7
lakesuperior/store_layouts/ldp_rs/base_rdf_layout.py

@@ -2,6 +2,7 @@ import logging
 
 from abc import ABCMeta, abstractmethod
 
+from flask import current_app
 from rdflib import Graph
 from rdflib.namespace import RDF
 from rdflib.query import ResultException
@@ -11,8 +12,8 @@ from rdflib.term import URIRef
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.exceptions import ResourceNotExistsError
-from lakesuperior.store_layouts.ldp_rs.graph_store_connector import \
-        GraphStoreConnector
+from lakesuperior.store_layouts.ldp_rs.bdb_connector import BdbConnector
+from lakesuperior.store_layouts.ldp_rs.sqlite_connector import SqliteConnector
 from lakesuperior.toolbox import Toolbox
 
 
@@ -53,7 +54,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
 
     ## MAGIC METHODS ##
 
-    def __init__(self, config):
+    def __init__(self, conn, config):
         '''Initialize the graph store and a layout.
 
         NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
@@ -63,10 +64,7 @@ class BaseRdfLayout(metaclass=ABCMeta):
         which is currently the reference implementation.
         '''
         self.config = config
-        self._conn = GraphStoreConnector(
-                query_ep=config['webroot'] + config['query_ep'],
-                update_ep=config['webroot'] + config['update_ep'])
-
+        self._conn = conn
         self.store = self._conn.store
 
         self.UNION_GRAPH_URI = self._conn.UNION_GRAPH_URI

+ 36 - 0
lakesuperior/store_layouts/ldp_rs/bdb_connector.py

@@ -0,0 +1,36 @@
+import logging
+
+from rdflib import Dataset, plugin
+from rdflib.store import Store
+from rdflib.term import URIRef
+from rdflib.plugins.stores.sparqlstore import SPARQLStore, SPARQLUpdateStore
+from SPARQLWrapper.Wrapper import POST
+
+from lakesuperior.dictionaries.namespaces import ns_collection as nsc
+from lakesuperior.store_layouts.ldp_rs.base_connector import BaseConnector
+
+
+class BdbConnector(BaseConnector):
+    '''
+    Handles the connection and dataset information.
+
+    This is indpendent from the application context (production/test) and can
+    be passed any configuration options.
+    '''
+
+    _logger = logging.getLogger(__name__)
+
+    def _init_connection(self, path):
+        '''
+        Initialize the connection to the SPARQL endpoint.
+
+        If `update_ep` is not specified, the store is initialized as read-only.
+        '''
+        self.store = plugin.get('Sleepycat', Store)(
+                identifier=URIRef('urn:fcsystem:lsup'))
+        self.store.open(path, create=True)
+        self.ds = Dataset(self.store, default_union=True)
+
+
+    def __del__(self):
+        self.store.close()

+ 4 - 4
lakesuperior/store_layouts/ldp_rs/default_layout.py

@@ -185,10 +185,10 @@ class DefaultLayout(BaseRdfLayout):
         '''
         See base_rdf_layout.update_rsrc.
         '''
-        self._logger.debug('Remove triples: {}'.format(pformat(
-                set(remove_trp))))
-        self._logger.debug('Add triples: {}'.format(pformat(
-                set(add_trp))))
+        #self._logger.debug('Remove triples: {}'.format(pformat(
+        #        set(remove_trp))))
+        #self._logger.debug('Add triples: {}'.format(pformat(
+        #        set(add_trp))))
 
         if not types:
             # @FIXME This is terrible, but I can't get Fuseki to update the

+ 47 - 0
lakesuperior/store_layouts/ldp_rs/sparql_connector.py

@@ -0,0 +1,47 @@
+import logging
+
+from abc import ABCMeta
+
+from rdflib import Dataset
+from rdflib.term import URIRef
+from rdflib.plugins.stores.sparqlstore import SPARQLStore, SPARQLUpdateStore
+from SPARQLWrapper.Wrapper import POST
+
+from lakesuperior.dictionaries.namespaces import ns_collection as nsc
+from lakesuperior.store_layouts.ldp_rs.base_connector import BaseConnector
+
+
+class SparqlConnector(BaseConnector):
+    '''
+    Handles the connection and dataset information.
+
+    This is indpendent from the application context (production/test) and can
+    be passed any configuration options.
+    '''
+
+    # N.B. This is Fuseki-specific.
+    UNION_GRAPH_URI = URIRef('urn:x-arq:UnionGraph')
+
+    _logger = logging.getLogger(__name__)
+
+    def _init_connection(self, location, query_ep, update_ep=None,
+            autocommit=False):
+        '''
+        Initialize the connection to the SPARQL endpoint.
+
+        If `update_ep` is not specified, the store is initialized as read-only.
+        '''
+        if update_ep:
+            self.store = SPARQLUpdateStore(
+                    queryEndpoint=location + query_ep,
+                    update_endpoint=location + update_ep,
+                    autocommit=autocommit,
+                    dirty_reads=not autocommit)
+
+            self.readonly = False
+        else:
+            self.store = SPARQLStore(
+                    location + query_ep, default_query_method=POST)
+            self.readonly = True
+
+        self.ds = Dataset(self.store, default_union=True)

+ 11 - 15
util/bootstrap.py

@@ -7,8 +7,8 @@ sys.path.append('.')
 
 from lakesuperior.app import create_app
 from lakesuperior.config_parser import config
-from lakesuperior.store_layouts.ldp_rs.graph_store_connector import \
-        GraphStoreConnector
+from lakesuperior.store_layouts.ldp_rs.bdb_connector import \
+        BdbConnector
 from lakesuperior.model.ldpr import Ldpr
 
 __doc__ = '''
@@ -24,23 +24,19 @@ def bootstrap_db(app):
     '''
     Initialize RDF store.
     '''
-    dbconf = app.config['store']['ldp_rs']
-    print('Resetting RDF store to base data set: {}'.format(dbconf['webroot']))
-    db = GraphStoreConnector(
-            query_ep=dbconf['webroot'] + dbconf['query_ep'],
-            update_ep=dbconf['webroot'] + dbconf['update_ep'],
-            autocommit=True)
-
-    print('Cleaning up graph store: {}'.format(dbconf['webroot']))
-    for g in db.ds.graphs():
-        db.ds.remove_graph(g)
+    print('Cleaning up graph store: {}'.format(
+            app.config['store']['ldp_rs']['connector']['options']['location']))
+    for g in app.rdfly.ds.graphs():
+        app.rdfly.ds.remove_graph(g)
 
     # @TODO Make configurable.
     print('Populating graph store with base dataset.')
-    db.ds.default_context.parse(source='data/bootstrap/default_layout.nq',
-            format='nquads')
+    app.rdfly.ds.default_context.parse(
+            source='data/bootstrap/default_layout.nq', format='nquads')
+
+    app.rdfly.ds.store.commit()
 
-    return db
+    return app.rdfly
 
 
 def bootstrap_binary_store(app):