Переглянути джерело

Configuration overhaul.

* Remove fully custom locations for resource data paths
* More sane defaults
* Allow relative paths for some directives
* Consistently set default folder within `lakesuperior/data`
* Remove additional test configuration
Stefano Cossu 6 роки тому
батько
коміт
7bd26e6f6c

+ 1 - 2
.gitignore

@@ -104,5 +104,4 @@ venv.bak/
 .mypy_cache/
 .mypy_cache/
 
 
 # Default LAKEsuperior data directories
 # Default LAKEsuperior data directories
-data/ldpnr_store
-data/ldprs_store
+/data

+ 1 - 0
MANIFEST.in

@@ -1,4 +1,5 @@
 include README.rst
 include README.rst
 include LICENSE
 include LICENSE
+graft lakesuperior/data/bootstrap
 graft lakesuperior/endpoints/templates
 graft lakesuperior/endpoints/templates
 graft lakesuperior/etc.defaults
 graft lakesuperior/etc.defaults

+ 15 - 8
conftest.py

@@ -2,24 +2,31 @@ import sys
 
 
 import pytest
 import pytest
 
 
-sys.path.append('.')
-from lakesuperior.config_parser import test_config
-from lakesuperior.globals import AppGlobals
-from lakesuperior.env import env
+from shutil import rmtree
+from tempfile import gettempdir
 
 
-env.config = test_config
-env.app_globals = AppGlobals(test_config)
+from lakesuperior import env_setup, env
 from lakesuperior.app import create_app
 from lakesuperior.app import create_app
 from lakesuperior.util.generators import random_image
 from lakesuperior.util.generators import random_image
 
 
-env.config = test_config
-
 @pytest.fixture(scope='module')
 @pytest.fixture(scope='module')
 def app():
 def app():
+    # Override data directory locations.
+    data_dir = path.join(gettempdir(), 'lsup_test', 'data')
+    env.config['application']['data_dir'] = data_dir
+    env.config['application']['store']['ldp_nr']['location'] = path.join(
+            data_dir, 'ldpnr_store')
+    env.config['application']['store']['ldp_rs']['location'] = path.join(
+            data_dir, 'ldprs_store')
     app = create_app(env.config['application'])
     app = create_app(env.config['application'])
 
 
     yield app
     yield app
 
 
+    # TODO improve this by using tempfile.TemporaryDirectory as a context
+    # manager.
+    print('Removing fixture data directory.')
+    rmtree(data_dir)
+
 
 
 @pytest.fixture(scope='module')
 @pytest.fixture(scope='module')
 def db(app):
 def db(app):

+ 0 - 0
data/log/.keep


+ 0 - 0
data/run/.keep


+ 14 - 6
docs/setup.rst

@@ -79,9 +79,14 @@ Configuration
 
 
 The app should run for testing and evaluation purposes without any
 The app should run for testing and evaluation purposes without any
 further configuration. All the application data are stored by default in
 further configuration. All the application data are stored by default in
-the ``data`` directory.
+the ``data`` directory of the Python package.
 
 
-To change the default configuration you should:
+This setup is not recommended for anything more than a quick look at the
+application. If more complex interaction is needed, or upgrades to the package
+are foreseen, it is strongly advised to set up proper locations for
+configuration and data.
+
+To change the default configuration you need to:
 
 
 #. Copy the ``etc.default`` folder to a separate location
 #. Copy the ``etc.default`` folder to a separate location
 #. Set the configuration folder location in the environment:
 #. Set the configuration folder location in the environment:
@@ -94,10 +99,13 @@ To change the default configuration you should:
 
 
 The configuration options are documented in the files.
 The configuration options are documented in the files.
 
 
-**Note:** ``test.yml`` must specify a different location for the graph
-and for the binary stores than the default one, otherwise running a test
-suite will destroy your main data store. The application will issue an
-error message and refuse to start if these locations overlap.
+One thing worth noting is that some locations can be specified as relative
+paths. These paths will be relative to the ``data_dir`` location specified in
+the ``application.yml`` file.
+
+If ``data_dir`` is empty, as it is in the default configuration, it defaults
+to the ``data`` directory inside the Python package. This is the option that
+one may want to change before anything else.
 
 
 Production deployment
 Production deployment
 ---------------------
 ---------------------

+ 1 - 1
docs/usage.rst

@@ -125,7 +125,7 @@ Or, to specify an alternative configuration::
 
 
     >>> from lakesuperior.config_parser import parse_config
     >>> from lakesuperior.config_parser import parse_config
     >>> from lakesuperior.globals import AppGlobals
     >>> from lakesuperior.globals import AppGlobals
-    >>> env.config, test_config = parse_config('/my/custom/config_dir')
+    >>> env.config = parse_config('/my/custom/config_dir')
     Reading configuration at /my/custom/config_dir
     Reading configuration at /my/custom/config_dir
     >>> env.app_globals = AppGlobals(env.config)
     >>> env.app_globals = AppGlobals(env.config)
 
 

+ 49 - 0
lakesuperior/__init__.py

@@ -0,0 +1,49 @@
+import threading
+
+from os import path
+
+basedir = path.dirname(path.realpath(__file__))
+"""
+Base directory for the module.
+
+This can be used by modules looking for configuration and data files to be
+referenced or copied with a known path relative to the package root.
+
+:rtype: str
+"""
+
+class Env:
+    pass
+
+env = Env()
+"""
+A pox on "globals are evil".
+
+All-purpose bucket for storing global variables. Different environments
+(e.g. webapp, test suite) put the appropriate value in it.
+The most important values to be stored are app_conf (either from
+lakesuperior.config_parser.config or lakesuperior.config_parser.test_config)
+and app_globals (obtained by an instance of lakesuperior.globals.AppGlobals).
+
+e.g.::
+
+    >>> from lakesuperior.config_parser import config
+    >>> from lakesuperior.globals import AppGlobals
+    >>> from lakesuperior import env
+    >>> env.app_globals = AppGlobals(config)
+
+This is automated in non-test environments by importing
+`lakesuperior.env_setup`.
+
+:rtype: Object
+"""
+
+thread_env = threading.local()
+"""
+Thread-local environment.
+
+This is used to store thread-specific variables such as start/end request
+timestamps.
+
+:rtype: threading.local
+"""

+ 1 - 2
lakesuperior/api/admin.py

@@ -55,8 +55,7 @@ def integrity_check(config_dir=None):
     be added and triggered by different argument flags.
     be added and triggered by different argument flags.
     """
     """
     if config_dir:
     if config_dir:
-        env.config = parse_config(config_dir)[0]
-        env.app_globals = AppGlobals(env.config)
+        env.app_globals = AppGlobals(parse_config(config_dir))
     else:
     else:
         import lakesuperior.env_setup
         import lakesuperior.env_setup
     with TxnManager(env.app_globals.rdfly.store):
     with TxnManager(env.app_globals.rdfly.store):

+ 24 - 30
lakesuperior/config_parser.py

@@ -1,6 +1,6 @@
 import sys
 import sys
 
 
-from os import path, environ
+from os import chdir, environ, getcwd, path
 
 
 import hiyapyco
 import hiyapyco
 import yaml
 import yaml
@@ -8,8 +8,10 @@ import yaml
 import lakesuperior
 import lakesuperior
 
 
 
 
-default_config_dir = environ.get('FCREPO_CONFIG_DIR', path.dirname(
-            path.abspath(lakesuperior.__file__)) + '/etc.defaults')
+default_config_dir = environ.get(
+        'FCREPO_CONFIG_DIR',
+        path.join(
+            path.dirname(path.abspath(lakesuperior.__file__)), 'etc.defaults'))
 """
 """
 Default configuration directory.
 Default configuration directory.
 
 
@@ -53,38 +55,30 @@ def parse_config(config_dir=None):
     print('Reading configuration at {}'.format(config_dir))
     print('Reading configuration at {}'.format(config_dir))
 
 
     for cname in configs:
     for cname in configs:
-        file = '{}/{}.yml'.format(config_dir , cname)
+        file = path.join(config_dir, '{}.yml'.format(cname))
         with open(file, 'r') as stream:
         with open(file, 'r') as stream:
             _config[cname] = yaml.load(stream, yaml.SafeLoader)
             _config[cname] = yaml.load(stream, yaml.SafeLoader)
 
 
-    error_msg = '''
-    **************
-    ** WARNING! **
-    **************
+    if not _config['application']['data_dir']:
+        _config['application']['data_dir'] = path.join(
+                lakesuperior.basedir, 'data')
 
 
-    Your test {} store location is set to be the same as the production
-    location. This means that if you run a test suite, your live data may be
-    wiped clean!
+    data_dir = _config['application']['data_dir']
+    _config['application']['store']['ldp_nr']['location'] = path.join(
+            data_dir, 'ldpnr_store')
+    _config['application']['store']['ldp_rs']['location'] = path.join(
+            data_dir, 'ldprs_store')
+    # If log handler file names are relative, they will be relative to the
+    # data dir.
+    oldwd = getcwd()
+    chdir(data_dir)
+    for handler in _config['logging']['handlers'].values():
+        if 'filename' in handler:
+            handler['filename'] = path.realpath(handler['filename'])
+    chdir(oldwd)
 
 
-    Please review your configuration before starting.
-    '''
-
-    # Merge default and test configurations.
-    _test_config = {'application': hiyapyco.load(
-            config_dir + '/application.yml',
-            config_dir + '/test.yml', method=hiyapyco.METHOD_MERGE)}
-
-    if _config['application']['store']['ldp_rs']['location'] \
-            == _test_config['application']['store']['ldp_rs']['location']:
-                raise RuntimeError(error_msg.format('RDF'))
-                sys.exit()
-
-    if _config['application']['store']['ldp_nr']['path'] \
-            == _test_config['application']['store']['ldp_nr']['path']:
-                raise RuntimeError(error_msg.format('binary'))
-                sys.exit()
-    return _config, _test_config
+    return _config
 
 
 
 
 # Load default configuration.
 # Load default configuration.
-config, test_config = parse_config()
+config = parse_config()

+ 0 - 0
data/bootstrap/rsrc_centric_layout.sparql → lakesuperior/data/bootstrap/rsrc_centric_layout.sparql


+ 15 - 6
lakesuperior/etc.defaults/application.yml

@@ -4,6 +4,21 @@
 # settings. Individual items can be selectively overridden as long as the YAML
 # settings. Individual items can be selectively overridden as long as the YAML
 # hierarchical structure is kept.
 # hierarchical structure is kept.
 
 
+# Set app_mode to either 'prod', 'test' or 'dev'.
+# 'prod' is normal running mode. 'test' is used for running test suites.
+# 'dev' is similar to normal mode but with reload and debug enabled.
+app_mode: 'prod'
+
+# Base data directory. This contains both volatile files such as PID files,
+# and persistent ones, such as resource data. LDP-NRs will be stored under
+# <basedir>/ldpnr_store and LDP-RSs under <basedir>/ldprs_store.
+#
+# If different data files need to be running on different storage hardware,
+# the individual subdirectories can be mounted on different file systems.
+#
+# If unset, it will default to <lakesuperior package root>/data.
+data_dir:
+
 # Configuration for binary path and fixity check generation. The hash is a
 # Configuration for binary path and fixity check generation. The hash is a
 # checksumn of the contents of the file.
 # checksumn of the contents of the file.
 uuid:
 uuid:
@@ -18,9 +33,6 @@ store:
     # The semantic store used for persisting LDP-RS (RDF Source) resources.
     # The semantic store used for persisting LDP-RS (RDF Source) resources.
     # MUST support SPARQL 1.1 query and update.
     # MUST support SPARQL 1.1 query and update.
     ldp_rs:
     ldp_rs:
-        # Directory where the RDF data files are stored.
-        location: data/ldprs_store
-
         # store layout. At the moment, only `rsrc_centric_layout`is supported.
         # store layout. At the moment, only `rsrc_centric_layout`is supported.
         layout: rsrc_centric_layout
         layout: rsrc_centric_layout
 
 
@@ -47,9 +59,6 @@ store:
         # See store.ldp_rs.layout.
         # See store.ldp_rs.layout.
         layout: default_layout
         layout: default_layout
 
 
-        # The filesystem path to the root of the binary store.
-        path: data/ldpnr_store
-
         # How to split the balanced pairtree to generate a path. The hash
         # How to split the balanced pairtree to generate a path. The hash
         # string is defined by the uuid.algo parameter value.
         # string is defined by the uuid.algo parameter value.
         # This parameter defines how many characters are in each branch. 2-4 is
         # This parameter defines how many characters are in each branch. 2-4 is

+ 2 - 6
lakesuperior/etc.defaults/gunicorn.yml

@@ -4,12 +4,8 @@
 # Commented values are the application defaults.
 # Commented values are the application defaults.
 
 
 # Directory where the WSGI server data are stored.
 # Directory where the WSGI server data are stored.
-data_dir: 'data'
-
-# Set app_mode to either 'prod', 'test' or 'dev'.
-# 'prod' is normal running mode. 'test' is used for running test suites.
-# 'dev' is similar to normal mode but with reload and debug enabled.
-app_mode: 'dev'
+# Relative paths are relative to the `data_dir` value in `application.yml`.
+data_dir: .
 
 
 #listen_addr: '0.0.0.0'
 #listen_addr: '0.0.0.0'
 #listen_port: 8000
 #listen_port: 8000

+ 4 - 2
lakesuperior/etc.defaults/logging.yml

@@ -13,8 +13,10 @@ formatters:
 handlers:
 handlers:
   logfile:
   logfile:
     class: logging.handlers.RotatingFileHandler
     class: logging.handlers.RotatingFileHandler
-    # Change this.
-    filename: /tmp/lakesuperior.log
+    # Relative paths are relative to the `data_dir` value in `application.yml`.
+    # You can change this value to an absolute path or leave it alone and
+    # symlink the location to a different directory.
+    filename: log/lakesuperior.log
     maxBytes: 10485760
     maxBytes: 10485760
     backupCount: 3
     backupCount: 3
     formatter: default_fmt
     formatter: default_fmt

+ 2 - 3
lakesuperior/migrator.py

@@ -105,7 +105,7 @@ class Migrator:
                 '{}/etc.defaults'.format(cur_dir), self.config_dir)
                 '{}/etc.defaults'.format(cur_dir), self.config_dir)
 
 
         # Modify and overwrite destination configuration.
         # Modify and overwrite destination configuration.
-        orig_config, _ = parse_config(self.config_dir)
+        orig_config = parse_config(self.config_dir)
         orig_config['application']['store']['ldp_rs']['location'] = self.dbpath
         orig_config['application']['store']['ldp_rs']['location'] = self.dbpath
         orig_config['application']['store']['ldp_nr']['path'] = self.fpath
         orig_config['application']['store']['ldp_nr']['path'] = self.fpath
 
 
@@ -113,8 +113,7 @@ class Migrator:
                 as config_file:
                 as config_file:
             config_file.write(yaml.dump(orig_config['application']))
             config_file.write(yaml.dump(orig_config['application']))
 
 
-        env.config = parse_config(self.config_dir)[0]
-        env.app_globals = AppGlobals(env.config)
+        env.app_globals = AppGlobals(parse_config(self.config_dir))
 
 
         self.rdfly = env.app_globals.rdfly
         self.rdfly = env.app_globals.rdfly
         self.nonrdfly = env.app_globals.nonrdfly
         self.nonrdfly = env.app_globals.nonrdfly

+ 1 - 1
lakesuperior/store/ldp_nr/base_non_rdf_layout.py

@@ -20,7 +20,7 @@ class BaseNonRdfLayout(metaclass=ABCMeta):
         Initialize the base non-RDF store layout.
         Initialize the base non-RDF store layout.
         """
         """
         self.config = config
         self.config = config
-        self.root = config['path']
+        self.root = config['location']
 
 
 
 
     ## INTERFACE METHODS ##
     ## INTERFACE METHODS ##

+ 5 - 1
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -2,6 +2,7 @@ import logging
 
 
 from collections import defaultdict
 from collections import defaultdict
 from itertools import chain
 from itertools import chain
+from os import path
 from string import Template
 from string import Template
 from urllib.parse import urldefrag
 from urllib.parse import urldefrag
 
 
@@ -13,6 +14,7 @@ from rdflib.query import ResultException
 from rdflib.resource import Resource
 from rdflib.resource import Resource
 from rdflib.store import Store
 from rdflib.store import Store
 
 
+from lakesuperior import basedir, env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
 from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
@@ -180,8 +182,10 @@ class RsrcCentricLayout:
 
 
         logger.info('Initializing the graph store with system data.')
         logger.info('Initializing the graph store with system data.')
         store.open()
         store.open()
+        fname = path.join(
+                basedir, 'data', 'bootstrap', 'rsrc_centric_layout.sparql')
         with TxnManager(store, True):
         with TxnManager(store, True):
-            with open('data/bootstrap/rsrc_centric_layout.sparql', 'r') as f:
+            with open(fname, 'r') as f:
                 data = Template(f.read())
                 data = Template(f.read())
                 self.ds.update(data.substitute(timestamp=arrow.utcnow()))
                 self.ds.update(data.substitute(timestamp=arrow.utcnow()))
 
 

+ 12 - 8
lakesuperior/wsgi.py

@@ -1,15 +1,16 @@
 import multiprocessing
 import multiprocessing
 import yaml
 import yaml
 
 
-from os import environ, makedirs, path
+from os import chdir, environ, makedirs, getcwd, path
 
 
 import gunicorn.app.base
 import gunicorn.app.base
 
 
+from lakesuperior import env, env_setup
 from lakesuperior.config_parser import default_config_dir
 from lakesuperior.config_parser import default_config_dir
 from lakesuperior.env import env
 from lakesuperior.env import env
 
 
 
 
-config_file = '{}/gunicorn.yml'.format(default_config_dir)
+config_file = path.join(default_config_dir, 'gunicorn.yml')
 
 
 with open(config_file, 'r') as fh:
 with open(config_file, 'r') as fh:
     config = yaml.load(fh, yaml.SafeLoader)
     config = yaml.load(fh, yaml.SafeLoader)
@@ -17,11 +18,14 @@ with open(config_file, 'r') as fh:
 listen_addr = config.get('listen_addr', '0.0.0.0')
 listen_addr = config.get('listen_addr', '0.0.0.0')
 listen_port = config.get('listen_port', 8000)
 listen_port = config.get('listen_port', 8000)
 preload_app = config.get('preload_app', True)
 preload_app = config.get('preload_app', True)
-app_mode = config.get('app_mode', 'prod')
+app_mode = env.app_globals.config['application'].get('app_mode', 'prod')
 
 
+oldwd = getcwd()
+chdir(env.app_globals.config['application']['data_dir'])
 data_dir = path.realpath(config.get('data_dir'))
 data_dir = path.realpath(config.get('data_dir'))
-run_dir = '{}/run'.format(data_dir)
-log_dir = '{}/log'.format(data_dir)
+chdir(oldwd)
+run_dir = path.join(data_dir, 'run')
+log_dir = path.join(data_dir, 'log')
 makedirs(log_dir, exist_ok=True)
 makedirs(log_dir, exist_ok=True)
 makedirs(run_dir, exist_ok=True)
 makedirs(run_dir, exist_ok=True)
 
 
@@ -43,9 +47,9 @@ options = {
     'daemon': app_mode=='prod',
     'daemon': app_mode=='prod',
     'reload': app_mode=='dev' and not preload_app,
     'reload': app_mode=='dev' and not preload_app,
 
 
-    'pidfile': '{}/fcrepo.pid'.format(run_dir),
-    'accesslog': '{}/gunicorn-access.log'.format(log_dir),
-    'errorlog': '{}/gunicorn-error.log'.format(log_dir),
+    'pidfile': path.join(run_dir, 'fcrepo.pid'),
+    'accesslog': path.join(log_dir, 'gunicorn-access.log'),
+    'errorlog': path.join(log_dir, 'gunicorn-error.log'),
 }
 }
 env.wsgi_options = options
 env.wsgi_options = options
 
 

+ 2 - 4
setup.py

@@ -27,7 +27,7 @@ with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
 
 
 setup(
 setup(
     name='lakesuperior',
     name='lakesuperior',
-    version='1.0.0a12',
+    version='1.0.0a13',
 
 
     description='A Linked Data Platform repository sever.',
     description='A Linked Data Platform repository sever.',
     long_description=long_description,
     long_description=long_description,
@@ -106,9 +106,7 @@ setup(
     #extras_require={},
     #extras_require={},
     #package_data={
     #package_data={
     #},
     #},
-    data_files=[
-        ('data/bootstrap', glob('data/bootstrap/*')),
-    ],
+    #data_files=[],
 
 
     entry_points={
     entry_points={
         'console_scripts': [
         'console_scripts': [