Browse Source

Configuration overhaul.

* Remove fully custom locations for resource data paths
* More sane defaults
* Allow relative paths for some directives
* Consistently set default folder within `lakesuperior/data`
* Remove additional test configuration
Stefano Cossu 6 years ago
parent
commit
7bd26e6f6c

+ 1 - 2
.gitignore

@@ -104,5 +104,4 @@ venv.bak/
 .mypy_cache/
 
 # Default LAKEsuperior data directories
-data/ldpnr_store
-data/ldprs_store
+/data

+ 1 - 0
MANIFEST.in

@@ -1,4 +1,5 @@
 include README.rst
 include LICENSE
+graft lakesuperior/data/bootstrap
 graft lakesuperior/endpoints/templates
 graft lakesuperior/etc.defaults

+ 15 - 8
conftest.py

@@ -2,24 +2,31 @@ import sys
 
 import pytest
 
-sys.path.append('.')
-from lakesuperior.config_parser import test_config
-from lakesuperior.globals import AppGlobals
-from lakesuperior.env import env
+from shutil import rmtree
+from tempfile import gettempdir
 
-env.config = test_config
-env.app_globals = AppGlobals(test_config)
+from lakesuperior import env_setup, env
 from lakesuperior.app import create_app
 from lakesuperior.util.generators import random_image
 
-env.config = test_config
-
 @pytest.fixture(scope='module')
 def app():
+    # Override data directory locations.
+    data_dir = path.join(gettempdir(), 'lsup_test', 'data')
+    env.config['application']['data_dir'] = data_dir
+    env.config['application']['store']['ldp_nr']['location'] = path.join(
+            data_dir, 'ldpnr_store')
+    env.config['application']['store']['ldp_rs']['location'] = path.join(
+            data_dir, 'ldprs_store')
     app = create_app(env.config['application'])
 
     yield app
 
+    # TODO improve this by using tempfile.TemporaryDirectory as a context
+    # manager.
+    print('Removing fixture data directory.')
+    rmtree(data_dir)
+
 
 @pytest.fixture(scope='module')
 def db(app):

+ 0 - 0
data/log/.keep


+ 0 - 0
data/run/.keep


+ 14 - 6
docs/setup.rst

@@ -79,9 +79,14 @@ Configuration
 
 The app should run for testing and evaluation purposes without any
 further configuration. All the application data are stored by default in
-the ``data`` directory.
+the ``data`` directory of the Python package.
 
-To change the default configuration you should:
+This setup is not recommended for anything more than a quick look at the
+application. If more complex interaction is needed, or upgrades to the package
+are foreseen, it is strongly advised to set up proper locations for
+configuration and data.
+
+To change the default configuration you need to:
 
 #. Copy the ``etc.default`` folder to a separate location
 #. Set the configuration folder location in the environment:
@@ -94,10 +99,13 @@ To change the default configuration you should:
 
 The configuration options are documented in the files.
 
-**Note:** ``test.yml`` must specify a different location for the graph
-and for the binary stores than the default one, otherwise running a test
-suite will destroy your main data store. The application will issue an
-error message and refuse to start if these locations overlap.
+One thing worth noting is that some locations can be specified as relative
+paths. These paths will be relative to the ``data_dir`` location specified in
+the ``application.yml`` file.
+
+If ``data_dir`` is empty, as it is in the default configuration, it defaults
+to the ``data`` directory inside the Python package. This is the option that
+one may want to change before anything else.
 
 Production deployment
 ---------------------

+ 1 - 1
docs/usage.rst

@@ -125,7 +125,7 @@ Or, to specify an alternative configuration::
 
     >>> from lakesuperior.config_parser import parse_config
     >>> from lakesuperior.globals import AppGlobals
-    >>> env.config, test_config = parse_config('/my/custom/config_dir')
+    >>> env.config = parse_config('/my/custom/config_dir')
     Reading configuration at /my/custom/config_dir
     >>> env.app_globals = AppGlobals(env.config)
 

+ 49 - 0
lakesuperior/__init__.py

@@ -0,0 +1,49 @@
+import threading
+
+from os import path
+
+basedir = path.dirname(path.realpath(__file__))
+"""
+Base directory for the module.
+
+This can be used by modules looking for configuration and data files to be
+referenced or copied with a known path relative to the package root.
+
+:rtype: str
+"""
+
+class Env:
+    pass
+
+env = Env()
+"""
+A pox on "globals are evil".
+
+All-purpose bucket for storing global variables. Different environments
+(e.g. webapp, test suite) put the appropriate value in it.
+The most important values to be stored are app_conf (either from
+lakesuperior.config_parser.config or lakesuperior.config_parser.test_config)
+and app_globals (obtained by an instance of lakesuperior.globals.AppGlobals).
+
+e.g.::
+
+    >>> from lakesuperior.config_parser import config
+    >>> from lakesuperior.globals import AppGlobals
+    >>> from lakesuperior import env
+    >>> env.app_globals = AppGlobals(config)
+
+This is automated in non-test environments by importing
+`lakesuperior.env_setup`.
+
+:rtype: Object
+"""
+
+thread_env = threading.local()
+"""
+Thread-local environment.
+
+This is used to store thread-specific variables such as start/end request
+timestamps.
+
+:rtype: threading.local
+"""

+ 1 - 2
lakesuperior/api/admin.py

@@ -55,8 +55,7 @@ def integrity_check(config_dir=None):
     be added and triggered by different argument flags.
     """
     if config_dir:
-        env.config = parse_config(config_dir)[0]
-        env.app_globals = AppGlobals(env.config)
+        env.app_globals = AppGlobals(parse_config(config_dir))
     else:
         import lakesuperior.env_setup
     with TxnManager(env.app_globals.rdfly.store):

+ 24 - 30
lakesuperior/config_parser.py

@@ -1,6 +1,6 @@
 import sys
 
-from os import path, environ
+from os import chdir, environ, getcwd, path
 
 import hiyapyco
 import yaml
@@ -8,8 +8,10 @@ import yaml
 import lakesuperior
 
 
-default_config_dir = environ.get('FCREPO_CONFIG_DIR', path.dirname(
-            path.abspath(lakesuperior.__file__)) + '/etc.defaults')
+default_config_dir = environ.get(
+        'FCREPO_CONFIG_DIR',
+        path.join(
+            path.dirname(path.abspath(lakesuperior.__file__)), 'etc.defaults'))
 """
 Default configuration directory.
 
@@ -53,38 +55,30 @@ def parse_config(config_dir=None):
     print('Reading configuration at {}'.format(config_dir))
 
     for cname in configs:
-        file = '{}/{}.yml'.format(config_dir , cname)
+        file = path.join(config_dir, '{}.yml'.format(cname))
         with open(file, 'r') as stream:
             _config[cname] = yaml.load(stream, yaml.SafeLoader)
 
-    error_msg = '''
-    **************
-    ** WARNING! **
-    **************
+    if not _config['application']['data_dir']:
+        _config['application']['data_dir'] = path.join(
+                lakesuperior.basedir, 'data')
 
-    Your test {} store location is set to be the same as the production
-    location. This means that if you run a test suite, your live data may be
-    wiped clean!
+    data_dir = _config['application']['data_dir']
+    _config['application']['store']['ldp_nr']['location'] = path.join(
+            data_dir, 'ldpnr_store')
+    _config['application']['store']['ldp_rs']['location'] = path.join(
+            data_dir, 'ldprs_store')
+    # If log handler file names are relative, they will be relative to the
+    # data dir.
+    oldwd = getcwd()
+    chdir(data_dir)
+    for handler in _config['logging']['handlers'].values():
+        if 'filename' in handler:
+            handler['filename'] = path.realpath(handler['filename'])
+    chdir(oldwd)
 
-    Please review your configuration before starting.
-    '''
-
-    # Merge default and test configurations.
-    _test_config = {'application': hiyapyco.load(
-            config_dir + '/application.yml',
-            config_dir + '/test.yml', method=hiyapyco.METHOD_MERGE)}
-
-    if _config['application']['store']['ldp_rs']['location'] \
-            == _test_config['application']['store']['ldp_rs']['location']:
-                raise RuntimeError(error_msg.format('RDF'))
-                sys.exit()
-
-    if _config['application']['store']['ldp_nr']['path'] \
-            == _test_config['application']['store']['ldp_nr']['path']:
-                raise RuntimeError(error_msg.format('binary'))
-                sys.exit()
-    return _config, _test_config
+    return _config
 
 
 # Load default configuration.
-config, test_config = parse_config()
+config = parse_config()

+ 0 - 0
data/bootstrap/rsrc_centric_layout.sparql → lakesuperior/data/bootstrap/rsrc_centric_layout.sparql


+ 15 - 6
lakesuperior/etc.defaults/application.yml

@@ -4,6 +4,21 @@
 # settings. Individual items can be selectively overridden as long as the YAML
 # hierarchical structure is kept.
 
+# Set app_mode to either 'prod', 'test' or 'dev'.
+# 'prod' is normal running mode. 'test' is used for running test suites.
+# 'dev' is similar to normal mode but with reload and debug enabled.
+app_mode: 'prod'
+
+# Base data directory. This contains both volatile files such as PID files,
+# and persistent ones, such as resource data. LDP-NRs will be stored under
+# <basedir>/ldpnr_store and LDP-RSs under <basedir>/ldprs_store.
+#
+# If different data files need to be running on different storage hardware,
+# the individual subdirectories can be mounted on different file systems.
+#
+# If unset, it will default to <lakesuperior package root>/data.
+data_dir:
+
 # Configuration for binary path and fixity check generation. The hash is a
 # checksumn of the contents of the file.
 uuid:
@@ -18,9 +33,6 @@ store:
     # The semantic store used for persisting LDP-RS (RDF Source) resources.
     # MUST support SPARQL 1.1 query and update.
     ldp_rs:
-        # Directory where the RDF data files are stored.
-        location: data/ldprs_store
-
         # store layout. At the moment, only `rsrc_centric_layout`is supported.
         layout: rsrc_centric_layout
 
@@ -47,9 +59,6 @@ store:
         # See store.ldp_rs.layout.
         layout: default_layout
 
-        # The filesystem path to the root of the binary store.
-        path: data/ldpnr_store
-
         # How to split the balanced pairtree to generate a path. The hash
         # string is defined by the uuid.algo parameter value.
         # This parameter defines how many characters are in each branch. 2-4 is

+ 2 - 6
lakesuperior/etc.defaults/gunicorn.yml

@@ -4,12 +4,8 @@
 # Commented values are the application defaults.
 
 # Directory where the WSGI server data are stored.
-data_dir: 'data'
-
-# Set app_mode to either 'prod', 'test' or 'dev'.
-# 'prod' is normal running mode. 'test' is used for running test suites.
-# 'dev' is similar to normal mode but with reload and debug enabled.
-app_mode: 'dev'
+# Relative paths are relative to the `data_dir` value in `application.yml`.
+data_dir: .
 
 #listen_addr: '0.0.0.0'
 #listen_port: 8000

+ 4 - 2
lakesuperior/etc.defaults/logging.yml

@@ -13,8 +13,10 @@ formatters:
 handlers:
   logfile:
     class: logging.handlers.RotatingFileHandler
-    # Change this.
-    filename: /tmp/lakesuperior.log
+    # Relative paths are relative to the `data_dir` value in `application.yml`.
+    # You can change this value to an absolute path or leave it alone and
+    # symlink the location to a different directory.
+    filename: log/lakesuperior.log
     maxBytes: 10485760
     backupCount: 3
     formatter: default_fmt

+ 2 - 3
lakesuperior/migrator.py

@@ -105,7 +105,7 @@ class Migrator:
                 '{}/etc.defaults'.format(cur_dir), self.config_dir)
 
         # Modify and overwrite destination configuration.
-        orig_config, _ = parse_config(self.config_dir)
+        orig_config = parse_config(self.config_dir)
         orig_config['application']['store']['ldp_rs']['location'] = self.dbpath
         orig_config['application']['store']['ldp_nr']['path'] = self.fpath
 
@@ -113,8 +113,7 @@ class Migrator:
                 as config_file:
             config_file.write(yaml.dump(orig_config['application']))
 
-        env.config = parse_config(self.config_dir)[0]
-        env.app_globals = AppGlobals(env.config)
+        env.app_globals = AppGlobals(parse_config(self.config_dir))
 
         self.rdfly = env.app_globals.rdfly
         self.nonrdfly = env.app_globals.nonrdfly

+ 1 - 1
lakesuperior/store/ldp_nr/base_non_rdf_layout.py

@@ -20,7 +20,7 @@ class BaseNonRdfLayout(metaclass=ABCMeta):
         Initialize the base non-RDF store layout.
         """
         self.config = config
-        self.root = config['path']
+        self.root = config['location']
 
 
     ## INTERFACE METHODS ##

+ 5 - 1
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -2,6 +2,7 @@ import logging
 
 from collections import defaultdict
 from itertools import chain
+from os import path
 from string import Template
 from urllib.parse import urldefrag
 
@@ -13,6 +14,7 @@ from rdflib.query import ResultException
 from rdflib.resource import Resource
 from rdflib.store import Store
 
+from lakesuperior import basedir, env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
@@ -180,8 +182,10 @@ class RsrcCentricLayout:
 
         logger.info('Initializing the graph store with system data.')
         store.open()
+        fname = path.join(
+                basedir, 'data', 'bootstrap', 'rsrc_centric_layout.sparql')
         with TxnManager(store, True):
-            with open('data/bootstrap/rsrc_centric_layout.sparql', 'r') as f:
+            with open(fname, 'r') as f:
                 data = Template(f.read())
                 self.ds.update(data.substitute(timestamp=arrow.utcnow()))
 

+ 12 - 8
lakesuperior/wsgi.py

@@ -1,15 +1,16 @@
 import multiprocessing
 import yaml
 
-from os import environ, makedirs, path
+from os import chdir, environ, makedirs, getcwd, path
 
 import gunicorn.app.base
 
+from lakesuperior import env, env_setup
 from lakesuperior.config_parser import default_config_dir
 from lakesuperior.env import env
 
 
-config_file = '{}/gunicorn.yml'.format(default_config_dir)
+config_file = path.join(default_config_dir, 'gunicorn.yml')
 
 with open(config_file, 'r') as fh:
     config = yaml.load(fh, yaml.SafeLoader)
@@ -17,11 +18,14 @@ with open(config_file, 'r') as fh:
 listen_addr = config.get('listen_addr', '0.0.0.0')
 listen_port = config.get('listen_port', 8000)
 preload_app = config.get('preload_app', True)
-app_mode = config.get('app_mode', 'prod')
+app_mode = env.app_globals.config['application'].get('app_mode', 'prod')
 
+oldwd = getcwd()
+chdir(env.app_globals.config['application']['data_dir'])
 data_dir = path.realpath(config.get('data_dir'))
-run_dir = '{}/run'.format(data_dir)
-log_dir = '{}/log'.format(data_dir)
+chdir(oldwd)
+run_dir = path.join(data_dir, 'run')
+log_dir = path.join(data_dir, 'log')
 makedirs(log_dir, exist_ok=True)
 makedirs(run_dir, exist_ok=True)
 
@@ -43,9 +47,9 @@ options = {
     'daemon': app_mode=='prod',
     'reload': app_mode=='dev' and not preload_app,
 
-    'pidfile': '{}/fcrepo.pid'.format(run_dir),
-    'accesslog': '{}/gunicorn-access.log'.format(log_dir),
-    'errorlog': '{}/gunicorn-error.log'.format(log_dir),
+    'pidfile': path.join(run_dir, 'fcrepo.pid'),
+    'accesslog': path.join(log_dir, 'gunicorn-access.log'),
+    'errorlog': path.join(log_dir, 'gunicorn-error.log'),
 }
 env.wsgi_options = options
 

+ 2 - 4
setup.py

@@ -27,7 +27,7 @@ with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
 
 setup(
     name='lakesuperior',
-    version='1.0.0a12',
+    version='1.0.0a13',
 
     description='A Linked Data Platform repository sever.',
     long_description=long_description,
@@ -106,9 +106,7 @@ setup(
     #extras_require={},
     #package_data={
     #},
-    data_files=[
-        ('data/bootstrap', glob('data/bootstrap/*')),
-    ],
+    #data_files=[],
 
     entry_points={
         'console_scripts': [