Browse Source

Merge branch 'migration' into development

Stefano Cossu 6 years ago
parent
commit
e660da5229

+ 0 - 2
conftest.py

@@ -1,5 +1,3 @@
-import sys
-
 import pytest
 
 from os import path

+ 1 - 1
docs/apidoc/lakesuperior.rst

@@ -34,7 +34,7 @@ lakesuperior\.config\_parser module
 lakesuperior\.env module
 ------------------------
 
-.. automodule:: lakesuperior.env
+.. automodule:: lakesuperior
     :members:
     :undoc-members:
     :show-inheritance:

+ 3 - 2
docs/usage.rst

@@ -123,11 +123,12 @@ Before using the API, either do::
 
 Or, to specify an alternative configuration::
 
+    >>> from lakesuperior import env
     >>> from lakesuperior.config_parser import parse_config
     >>> from lakesuperior.globals import AppGlobals
-    >>> env.config = parse_config('/my/custom/config_dir')
+    >>> config = parse_config('/my/custom/config_dir')
     Reading configuration at /my/custom/config_dir
-    >>> env.app_globals = AppGlobals(env.config)
+    >>> env.app_globals = AppGlobals(config)
 
 Create and replace resources
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+ 1 - 1
lakesuperior/api/admin.py

@@ -1,7 +1,7 @@
 import logging
 
+from lakesuperior import env
 from lakesuperior.config_parser import parse_config
-from lakesuperior.env import env
 from lakesuperior.globals import AppGlobals
 from lakesuperior.migrator import Migrator
 from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout

+ 1 - 1
lakesuperior/api/query.py

@@ -2,9 +2,9 @@ import logging
 
 from io import BytesIO
 
+from lakesuperior import env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
-from lakesuperior.env import env
 from lakesuperior.store.ldp_rs.lmdb_store import LmdbStore, TxnManager
 
 

+ 6 - 5
lakesuperior/api/resource.py

@@ -13,7 +13,7 @@ from rdflib.namespace import XSD
 from lakesuperior.config_parser import config
 from lakesuperior.exceptions import (
         InvalidResourceError, ResourceNotExistsError, TombstoneError)
-from lakesuperior.env import env
+from lakesuperior import env, thread_env
 from lakesuperior.globals import RES_DELETED, RES_UPDATED
 from lakesuperior.model.ldp_factory import LDP_NR_TYPE, LdpFactory
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
@@ -72,15 +72,16 @@ def transaction(write=False):
         def _wrapper(*args, **kwargs):
             # Mark transaction begin timestamp. This is used for create and
             # update timestamps on resources.
-            env.timestamp = arrow.utcnow()
-            env.timestamp_term = Literal(env.timestamp, datatype=XSD.dateTime)
+            thread_env.timestamp = arrow.utcnow()
+            thread_env.timestamp_term = Literal(
+                    thread_env.timestamp, datatype=XSD.dateTime)
             with TxnManager(env.app_globals.rdf_store, write=write) as txn:
                 ret = fn(*args, **kwargs)
             if len(env.app_globals.changelog):
                 job = Thread(target=_process_queue)
                 job.start()
-            delattr(env, 'timestamp')
-            delattr(env, 'timestamp_term')
+            delattr(thread_env, 'timestamp')
+            delattr(thread_env, 'timestamp_term')
             return ret
         return _wrapper
     return _transaction_deco

+ 1 - 1
lakesuperior/endpoints/query.py

@@ -3,7 +3,7 @@ import logging
 from flask import Blueprint, current_app, request, render_template, send_file
 from rdflib.plugin import PluginException
 
-from lakesuperior.env import env
+from lakesuperior import env
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.api import query as query_api
 

+ 0 - 28
lakesuperior/env.py

@@ -1,28 +0,0 @@
-import threading
-
-'''
-Global bucket for switching configuration. Different environments
-(e.g. webapp, test suite) put the appropriate value in it.
-The most important values to be stored are app_conf (either from
-lakesuperior.config_parser.config or lakesuperior.config_parser.test_config)
-and app_globals (obtained by an instance of lakesuperior.globals.AppGlobals).
-
-e.g.:
-
->>> from lakesuperior.config_parser import config
->>> from lakesuperior.globals import AppGlobals
->>> from lakesuperior.env import env
->>> env.config = config
->>> env.app_globals = AppGlobals(config)
-
-This is automated in non-test environments by importing
-`lakesuperior.env_setup`.
-'''
-class Env:
-    pass
-
-# NOTE: this can lead to race conditions in multi-thread operations competing
-# to set a timestamp.
-#env = Env()
-# NOTE: This should be thread-safe but is experimental.
-env = threading.local()

+ 2 - 3
lakesuperior/env_setup.py

@@ -1,16 +1,15 @@
+from lakesuperior import env
 from lakesuperior.config_parser import config
 from lakesuperior.globals import AppGlobals
-from lakesuperior.env import env
 
 __doc__="""
 Default configuration.
 
 Import this module to initialize the configuration for a production setup::
 
-    >>>from lakesuperior import env_setup
+    >>> from lakesuperior import env_setup
 
 Will load the default configuration.
 """
 
-env.config = config
 env.app_globals = AppGlobals(config)

+ 22 - 8
lakesuperior/globals.py

@@ -27,7 +27,7 @@ class AppGlobals:
 
     The variables are set on initialization by passing a configuration dict.
     Usually this is done when starting an application. The instance with the
-    loaded variables is then assigned to the :data:`lakesuperior.env.env`
+    loaded variables is then assigned to the :data:`lakesuperior.env`
     global variable.
 
     You can either load the default configuration::
@@ -36,20 +36,19 @@ class AppGlobals:
 
     Or set up an environment with a custom configuration::
 
-        >>>from lakesuperior.env import env
-        >>>from lakesuperior.app_globals import AppGlobals
-        >>>my_config = {'name': 'value', '...': '...'}
-        >>>env.config = my_config
-        >>>env.app_globals = AppGlobals(my_config)
+        >>> from lakesuperior import env
+        >>> from lakesuperior.app_globals import AppGlobals
+        >>> my_config = {'name': 'value', '...': '...'}
+        >>> env.app_globals = AppGlobals(my_config)
 
     """
-    def __init__(self, conf):
+    def __init__(self, config):
         """
         Generate global variables from configuration.
         """
         from lakesuperior.messaging.messenger import Messenger
 
-        app_conf = conf['application']
+        app_conf = config['application']
 
         # Initialize RDF layout.
         rdfly_mod_name = app_conf['store']['ldp_rs']['layout']
@@ -69,11 +68,26 @@ class AppGlobals:
         self._messenger  = Messenger(app_conf['messaging'])
 
         # Exposed globals.
+        self._config = config
         self._rdfly = rdfly_cls(app_conf['store']['ldp_rs'])
         self._nonrdfly = nonrdfly_cls(app_conf['store']['ldp_nr'])
         self._changelog = deque()
 
 
+    @property
+    def config(self):
+        """
+        Global configuration.
+
+        This is a collection of all configuration options **except** for the
+        WSGI configuration which is initialized at a different time and is
+        stored under :data:`lakesuperior.env.wsgi_options`.
+
+        *TODO:* Update class reference when interface will be separated from
+        implementation.
+        """
+        return self._config
+
     @property
     def rdfly(self):
         """

+ 21 - 2
lakesuperior/lsup_admin.py

@@ -5,14 +5,30 @@ import logging
 import os
 import sys
 
+from lakesuperior import env
 from lakesuperior.api import admin as admin_api
 from lakesuperior.config_parser import config
-from lakesuperior.env import env
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 
+__doc__="""
+Utility to perform core maintenance tasks via console command-line.
+
+The command-line tool is self-documented. Type::
+
+    lsup-admin --help
+
+for a list of tools and options.
+"""
+
 logger = logging.getLogger(__name__)
 click_log.basic_config(logger)
 
+#report = logging.getLogger('report')
+#report_formatter = logging.Formatter('"%(asctime)s",%(message)s')
+#report_fpath = '{}/lsup-report-{}'.format(
+#        env.config['application']['data_dir'],
+#        arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss.S'))
+#report_handler = logging.FileHandler(report_fpath)
 
 @click.group()
 def admin():
@@ -88,8 +104,11 @@ def check_fixity(uid):
     '--config-folder', '-c', default=None, help='Alternative configuration '
     'folder to look up. If not set, the location set in the environment or '
     'the default configuration is used.')
+@click.option(
+    '--output', '-o', default=None, help='Output file. If not specified, a '
+    'timestamp-named file will be generated automatically.')
 @click.command()
-def check_refint(config_folder=None):
+def check_refint(config_folder=None, output=None):
     """
     Check referential integrity.
 

+ 51 - 33
lakesuperior/migrator.py

@@ -10,8 +10,9 @@ import yaml
 
 from rdflib import Graph, URIRef
 
+from lakesuperior import env, basedir
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
-from lakesuperior.env import env
+from lakesuperior.exceptions import InvalidResourceError
 from lakesuperior.globals import AppGlobals, ROOT_UID
 from lakesuperior.config_parser import parse_config
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
@@ -28,8 +29,7 @@ class StoreWrapper(ContextDecorator):
         self.store = store
 
     def __enter__(self):
-        self.store.open(
-                env.config['application']['store']['ldp_rs'])
+        self.store.open(env.app_globals.rdfly.config)
 
     def __exit__(self, *exc):
         self.store.close()
@@ -69,8 +69,8 @@ class Migrator:
 
 
     def __init__(
-            self, src, dest, zero_binaries=False, compact_uris=False,
-            skip_errors=False):
+            self, src, dest, clear=False, zero_binaries=False,
+            compact_uris=False, skip_errors=False):
         """
         Set up base paths and clean up existing directories.
 
@@ -82,8 +82,10 @@ class Migrator:
             it must be a writable directory. It will be deleted and recreated.
             If it does not exist, it will be created along with its parents if
             missing.
-        :param str binary_handling: One of ``include``, ``truncate`` or
-            ``split``.
+        :param bool clear: Whether to clear any pre-existing data at the
+            locations indicated.
+        :param bool zero_binaries: Whether to create zero-byte binary files
+            rather than copy the sources.
         :param bool compact_uris: NOT IMPLEMENTED. Whether the process should
             attempt to compact URIs generated with broken up path segments. If
             the UID matches a pattern such as ``/12/34/56/123456...`` it is
@@ -95,33 +97,36 @@ class Migrator:
         """
         # Set up repo folder structure and copy default configuration to
         # destination file.
-        cur_dir = path.dirname(path.dirname(path.abspath(__file__)))
         self.dbpath = '{}/data/ldprs_store'.format(dest)
         self.fpath = '{}/data/ldpnr_store'.format(dest)
         self.config_dir = '{}/etc'.format(dest)
 
-        shutil.rmtree(dest, ignore_errors=True)
-        shutil.copytree(
-                '{}/etc.defaults'.format(cur_dir), self.config_dir)
+        if clear:
+            shutil.rmtree(dest, ignore_errors=True)
+        if not path.isdir(self.config_dir):
+            shutil.copytree(
+                '{}/etc.defaults'.format(basedir), self.config_dir)
 
         # Modify and overwrite destination configuration.
         orig_config = parse_config(self.config_dir)
         orig_config['application']['store']['ldp_rs']['location'] = self.dbpath
         orig_config['application']['store']['ldp_nr']['path'] = self.fpath
 
-        with open('{}/application.yml'.format(self.config_dir), 'w') \
-                as config_file:
-            config_file.write(yaml.dump(orig_config['application']))
+        if clear:
+            with open('{}/application.yml'.format(self.config_dir), 'w') \
+                    as config_file:
+                config_file.write(yaml.dump(orig_config['application']))
 
         env.app_globals = AppGlobals(parse_config(self.config_dir))
 
         self.rdfly = env.app_globals.rdfly
         self.nonrdfly = env.app_globals.nonrdfly
 
-        with TxnManager(env.app_globals.rdf_store, write=True) as txn:
-            self.rdfly.bootstrap()
-            self.rdfly.store.close()
-        env.app_globals.nonrdfly.bootstrap()
+        if clear:
+            with TxnManager(env.app_globals.rdf_store, write=True) as txn:
+                self.rdfly.bootstrap()
+                self.rdfly.store.close()
+            env.app_globals.nonrdfly.bootstrap()
 
         self.src = src.rstrip('/')
         self.zero_binaries = zero_binaries
@@ -154,7 +159,7 @@ class Migrator:
                             'Starting point {} does not begin with a slash.'
                             .format(start))
 
-                    if start != ROOT_UID:
+                    if not rsrc_api.exists(start):
                         # Create the full hierarchy with link to the parents.
                         rsrc_api.create_or_replace(start)
                     # Then populate the new resource and crawl for more
@@ -164,8 +169,11 @@ class Migrator:
                 with open(list_file, 'r') as fp:
                     for uri in fp:
                         uid = uri.strip().replace(self.src, '')
-                        if uid != ROOT_UID:
-                            rsrc_api.create_or_replace(uid)
+                        if not rsrc_api.exists(uid):
+                            try:
+                                rsrc_api.create_or_replace(uid)
+                            except InvalidResourceError:
+                                pass
                         self._crawl(uid)
         logger.info('Dumped {} resources.'.format(self._ct))
 
@@ -188,12 +196,17 @@ class Migrator:
         # Internal URI of destination.
         iuri = ibase + uid
 
-        rsp = requests.head(uri)
-        if not self.skip_errors:
-            rsp.raise_for_status()
-        elif rsp.status_code > 399:
-            print('Error retrieving resource {} headers: {} {}'.format(
-                uri, rsp.status_code, rsp.text))
+        try:
+            rsp = requests.head(uri)
+        except:
+            logger.warn('Error retrieving resource {}'.format(uri))
+            return
+        if rsp:
+            if not self.skip_errors:
+                rsp.raise_for_status()
+            elif rsp.status_code > 399:
+                print('Error retrieving resource {} headers: {} {}'.format(
+                    uri, rsp.status_code, rsp.text))
 
         # Determine LDP type.
         ldp_type = 'ldp_nr'
@@ -217,12 +230,17 @@ class Migrator:
         # links.
         get_uri = (
                 uri if ldp_type == 'ldp_rs' else '{}/fcr:metadata'.format(uri))
-        get_rsp = requests.get(get_uri)
-        if not self.skip_errors:
-            get_rsp.raise_for_status()
-        elif get_rsp.status_code > 399:
-            print('Error retrieving resource {} body: {} {}'.format(
-                uri, get_rsp.status_code, get_rsp.text))
+        try:
+            get_rsp = requests.get(get_uri)
+        except:
+            logger.warn('Error retrieving resource {}'.format(get_uri))
+            return
+        if get_rsp:
+            if not self.skip_errors:
+                get_rsp.raise_for_status()
+            elif get_rsp.status_code > 399:
+                print('Error retrieving resource {} body: {} {}'.format(
+                    uri, get_rsp.status_code, get_rsp.text))
 
         data = get_rsp.content.replace(
                 self.src.encode('utf-8'), ibase.encode('utf-8'))

+ 1 - 1
lakesuperior/model/ldp_factory.py

@@ -7,11 +7,11 @@ from rdflib import Graph, parser, plugin, serializer
 from rdflib.resource import Resource
 from rdflib.namespace import RDF
 
+from lakesuperior import env
 from lakesuperior.model.ldpr import Ldpr
 from lakesuperior.model.ldp_nr import LdpNr
 from lakesuperior.model.ldp_rs import LdpRs, Ldpc, LdpDc, LdpIc
 from lakesuperior.config_parser import config
-from lakesuperior.env import env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.exceptions import (
         IncompatibleLdpTypeError, InvalidResourceError, ResourceExistsError,

+ 1 - 1
lakesuperior/model/ldp_nr.py

@@ -6,7 +6,7 @@ from rdflib.namespace import RDF, XSD
 from rdflib.resource import Resource
 from rdflib.term import URIRef, Literal, Variable
 
-from lakesuperior.env import env
+from lakesuperior import env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.model.ldpr import Ldpr
 from lakesuperior.model.ldp_rs import LdpRs

+ 1 - 1
lakesuperior/model/ldp_rs.py

@@ -2,7 +2,7 @@ import logging
 
 from rdflib import Graph
 
-from lakesuperior.env import env
+from lakesuperior import env
 from lakesuperior.globals import RES_UPDATED
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.model.ldpr import Ldpr

+ 16 - 9
lakesuperior/model/ldpr.py

@@ -10,7 +10,7 @@ import arrow
 from rdflib import Graph, URIRef, Literal
 from rdflib.namespace import RDF
 
-from lakesuperior.env import env
+from lakesuperior import env, thread_env
 from lakesuperior.globals import (
     RES_CREATED, RES_DELETED, RES_UPDATED, ROOT_UID)
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
@@ -411,7 +411,7 @@ class Ldpr(metaclass=ABCMeta):
         else:
             add_trp = {
                 (self.uri, RDF.type, nsc['fcsystem'].Tombstone),
-                (self.uri, nsc['fcrepo'].created, env.timestamp_term),
+                (self.uri, nsc['fcrepo'].created, thread_env.timestamp_term),
             }
 
         self.modify(RES_DELETED, remove_trp, add_trp)
@@ -432,7 +432,7 @@ class Ldpr(metaclass=ABCMeta):
         Remove all traces of a resource and versions.
         """
         logger.info('Purging resource {}'.format(self.uid))
-        refint = env.config['store']['ldp_rs']['referential_integrity']
+        refint = rdfly.config['referential_integrity']
         inbound = True if refint else inbound
         rdfly.forget_rsrc(self.uid, inbound)
 
@@ -692,7 +692,7 @@ class Ldpr(metaclass=ABCMeta):
 
         if (
                 ev_type is not None and
-                env.config['application'].get('messaging')):
+                env.app_globals.config['application'].get('messaging')):
             logger.debug('Enqueuing message for {}'.format(self.uid))
             self._enqueue_msg(ev_type, remove_trp, add_trp)
 
@@ -720,7 +720,7 @@ class Ldpr(metaclass=ABCMeta):
 
         env.app_globals.changelog.append((set(remove_trp), set(add_trp), {
             'ev_type': ev_type,
-            'timestamp': env.timestamp.format(),
+            'timestamp': thread_env.timestamp.format(),
             'rsrc_type': rsrc_type,
             'actor': actor,
         }))
@@ -769,7 +769,7 @@ class Ldpr(metaclass=ABCMeta):
         # Create and modify timestamp.
         if create:
             self.provided_imr.set((
-                self.uri, nsc['fcrepo'].created, env.timestamp_term))
+                self.uri, nsc['fcrepo'].created, thread_env.timestamp_term))
             self.provided_imr.set((
                 self.uri, nsc['fcrepo'].createdBy, self.DEFAULT_USER))
         else:
@@ -781,12 +781,12 @@ class Ldpr(metaclass=ABCMeta):
                     self.uri, nsc['fcrepo'].createdBy)))
 
         self.provided_imr.set((
-            self.uri, nsc['fcrepo'].lastModified, env.timestamp_term))
+            self.uri, nsc['fcrepo'].lastModified, thread_env.timestamp_term))
         self.provided_imr.set((
             self.uri, nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER))
 
 
-    def _containment_rel(self, create):
+    def _containment_rel(self, create, ignore_type=True):
         """Find the closest parent in the path indicated by the uid and
         establish a containment triple.
 
@@ -805,6 +805,11 @@ class Ldpr(metaclass=ABCMeta):
 
         :param bool create: Whether the resource is being created. If false,
         the parent container is not updated.
+        "param bool ignore_type: If False (the default), an exception is raised
+        if trying to create a resource under a non-container. This can be
+        overridden in special cases (e.g. when migrating a repository in which
+        a LDP-NR has "children" under ``fcr:versions``) by setting this to
+        True.
         """
         from lakesuperior.model.ldp_factory import LdpFactory
 
@@ -814,7 +819,9 @@ class Ldpr(metaclass=ABCMeta):
             cnd_parent_uid = '/' + '/'.join(path_components[:-1])
             if rdfly.ask_rsrc_exists(cnd_parent_uid):
                 parent_rsrc = LdpFactory.from_stored(cnd_parent_uid)
-                if nsc['ldp'].Container not in parent_rsrc.types:
+                if (
+                        not ignore_type
+                        and nsc['ldp'].Container not in parent_rsrc.types):
                     raise InvalidResourceError(
                         cnd_parent_uid, 'Parent {} is not a container.')
 

+ 1 - 1
lakesuperior/profiler.py

@@ -5,9 +5,9 @@ from werkzeug.contrib.profiler import ProfilerMiddleware
 # Environment must be set before importing the app factory function.
 import lakesuperior.env_setup
 
+from lakesuperior import env
 from lakesuperior.config_parser import config
 from lakesuperior.globals import AppGlobals
-from lakesuperior.env import env
 
 options = {
     'restrictions': [30],

+ 5 - 6
lakesuperior/server.py

@@ -4,21 +4,20 @@ from logging.config import dictConfig
 # Environment must be set before importing the app factory function.
 import lakesuperior.env_setup
 
+from lakesuperior import env
 from lakesuperior.config_parser import config
 from lakesuperior.globals import AppGlobals
-from lakesuperior.env import env
 
 from lakesuperior.app import create_app
 
-dictConfig(env.config['logging'])
+dictConfig(env.app_globals.config['logging'])
 logger = logging.getLogger(__name__)
 
 logger.info('Graph store location: {}'.format(
-    env.config['application']['store']['ldp_rs']['location']))
-logger.info('Binary store location: {}'.format(
-    env.config['application']['store']['ldp_nr']['path']))
+    env.app_globals.rdfly.config['location']))
+logger.info('Binary store location: {}'.format(env.app_globals.nonrdfly.root))
 
-fcrepo = create_app(env.config['application'])
+fcrepo = create_app(env.app_globals.config['application'])
 
 if __name__ == "__main__":
     fcrepo.run(host='0.0.0.0')

+ 1 - 1
lakesuperior/store/ldp_rs/lmdb_store.py

@@ -14,7 +14,7 @@ from rdflib import Graph, Namespace, URIRef, Variable
 from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
 from rdflib.store import Store, VALID_STORE, NO_STORE
 
-from lakesuperior.env import env
+from lakesuperior import env
 
 logger = logging.getLogger(__name__)
 

+ 0 - 1
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -21,7 +21,6 @@ from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
         srv_mgd_predicates, srv_mgd_types
 from lakesuperior.exceptions import (InvalidResourceError,
         ResourceNotExistsError, TombstoneError, PathSegmentError)
-from lakesuperior.env import env
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 
 

+ 0 - 1
lakesuperior/wsgi.py

@@ -7,7 +7,6 @@ import gunicorn.app.base
 
 from lakesuperior import env, env_setup
 from lakesuperior.config_parser import default_config_dir
-from lakesuperior.env import env
 
 
 config_file = path.join(default_config_dir, 'gunicorn.yml')