Procházet zdrojové kódy

Merge pull request #55 from scossu/development

Alpha 13.
Stefano Cossu před 7 roky
rodič
revize
e71a1324ba
43 změnil soubory, kde provedl 504 přidání a 242 odebrání
  1. 25 0
      .github/release_template.md
  2. 1 2
      .gitignore
  3. 1 0
      .travis.yml
  4. 1 0
      MANIFEST.in
  5. 1 0
      VERSION
  6. 27 15
      conftest.py
  7. 0 0
      data/log/.keep
  8. 0 0
      data/run/.keep
  9. 1 1
      docs/apidoc/lakesuperior.rst
  10. 67 9
      docs/cli.rst
  11. 6 7
      docs/conf.py
  12. 14 6
      docs/setup.rst
  13. 3 2
      docs/usage.rst
  14. 49 0
      lakesuperior/__init__.py
  15. 3 9
      lakesuperior/api/admin.py
  16. 1 1
      lakesuperior/api/query.py
  17. 6 5
      lakesuperior/api/resource.py
  18. 24 30
      lakesuperior/config_parser.py
  19. 0 0
      lakesuperior/data/bootstrap/rsrc_centric_layout.sparql
  20. 3 1
      lakesuperior/endpoints/ldp.py
  21. 9 1
      lakesuperior/endpoints/main.py
  22. 1 1
      lakesuperior/endpoints/query.py
  23. 1 0
      lakesuperior/endpoints/templates/index.html
  24. 0 25
      lakesuperior/env.py
  25. 2 3
      lakesuperior/env_setup.py
  26. 15 6
      lakesuperior/etc.defaults/application.yml
  27. 2 6
      lakesuperior/etc.defaults/gunicorn.yml
  28. 4 2
      lakesuperior/etc.defaults/logging.yml
  29. 22 8
      lakesuperior/globals.py
  30. 51 10
      lakesuperior/lsup_admin.py
  31. 53 36
      lakesuperior/migrator.py
  32. 1 1
      lakesuperior/model/ldp_factory.py
  33. 1 1
      lakesuperior/model/ldp_nr.py
  34. 1 1
      lakesuperior/model/ldp_rs.py
  35. 19 12
      lakesuperior/model/ldpr.py
  36. 1 1
      lakesuperior/profiler.py
  37. 5 6
      lakesuperior/server.py
  38. 1 1
      lakesuperior/store/ldp_nr/base_non_rdf_layout.py
  39. 24 14
      lakesuperior/store/ldp_rs/lmdb_store.py
  40. 16 6
      lakesuperior/store/ldp_rs/rsrc_centric_layout.py
  41. 14 9
      lakesuperior/wsgi.py
  42. 8 4
      setup.py
  43. 20 0
      tests/endpoints/test_ldp.py

+ 25 - 0
.github/release_template.md

@@ -0,0 +1,25 @@
+# What Changed In This Release
+
+[High-level, colloquial summary of changes—mandatory]
+
+## New Features
+
+-
+
+## Enhancements
+
+-
+
+## Bug Fixes
+
+-
+
+## Other Changes
+
+-
+
+## Notes & Caveats
+
+-
+
+## Acknowledgments

+ 1 - 2
.gitignore

@@ -104,5 +104,4 @@ venv.bak/
 .mypy_cache/
 
 # Default LAKEsuperior data directories
-data/ldpnr_store
-data/ldprs_store
+/data

+ 1 - 0
.travis.yml

@@ -15,5 +15,6 @@ deploy:
     on:
         tags: true
         branch: master
+        python: "3.5"
     distributions: "bdist_wheel"
 

+ 1 - 0
MANIFEST.in

@@ -1,4 +1,5 @@
 include README.rst
 include LICENSE
+graft lakesuperior/data/bootstrap
 graft lakesuperior/endpoints/templates
 graft lakesuperior/etc.defaults

+ 1 - 0
VERSION

@@ -0,0 +1 @@
+1.0.0a13

+ 27 - 15
conftest.py

@@ -1,22 +1,31 @@
-import sys
-
 import pytest
 
-sys.path.append('.')
-from lakesuperior.config_parser import test_config
+from os import makedirs, path
+from shutil import rmtree
+from tempfile import gettempdir
+
+from lakesuperior import env
+from lakesuperior.config_parser import parse_config
 from lakesuperior.globals import AppGlobals
-from lakesuperior.env import env
+from lakesuperior.util.generators import random_image
 
-env.config = test_config
-env.app_globals = AppGlobals(test_config)
+
+# Override data directory locations.
+config = parse_config()
+data_dir = path.join(gettempdir(), 'lsup_test', 'data')
+config['application']['data_dir'] = data_dir
+config['application']['store']['ldp_nr']['location'] = (
+        path.join(data_dir, 'ldpnr_store'))
+config['application']['store']['ldp_rs']['location'] = (
+        path.join(data_dir, 'ldprs_store'))
+
+env.app_globals = AppGlobals(config)
 from lakesuperior.app import create_app
-from lakesuperior.util.generators import random_image
 
-env.config = test_config
 
 @pytest.fixture(scope='module')
 def app():
-    app = create_app(env.config['application'])
+    app = create_app(env.app_globals.config['application'])
 
     yield app
 
@@ -26,14 +35,17 @@ def db(app):
     '''
     Set up and tear down test triplestore.
     '''
-    rdfly = env.app_globals.rdfly
-    rdfly.bootstrap()
+    makedirs(data_dir, exist_ok=True)
+    env.app_globals.rdfly.bootstrap()
     env.app_globals.nonrdfly.bootstrap()
+    print('Initialized data store.')
 
-    yield rdfly
+    yield env.app_globals.rdfly
 
-    print('Tearing down fixture graph store.')
-    rdfly.store.destroy(rdfly.store.path)
+    # TODO improve this by using tempfile.TemporaryDirectory as a context
+    # manager.
+    print('Removing fixture data directory.')
+    rmtree(data_dir)
 
 
 @pytest.fixture

+ 0 - 0
data/log/.keep


+ 0 - 0
data/run/.keep


+ 1 - 1
docs/apidoc/lakesuperior.rst

@@ -34,7 +34,7 @@ lakesuperior\.config\_parser module
 lakesuperior\.env module
 ------------------------
 
-.. automodule:: lakesuperior.env
+.. automodule:: lakesuperior
     :members:
     :undoc-members:
     :show-inheritance:

+ 67 - 9
docs/cli.rst

@@ -1,13 +1,39 @@
-LAKEsuperior Command Line Reference
-===================================
+Command Line Reference
+======================
 
-The LAKEsuperior command line tool is used for maintenance and
-administration purposes.
+LAKEsuperior comes with some command-line tools aimed at several purposes.
 
-The script should be in your executable path if you install LAKEsuperior with
-``pip``. The tool is self-documented, so this is just a redundant overview::
+If LAKEsuperior is installed via ``pip``, all tools can be invoked as normal
+commands (i.e. they are in the virtualenv ``PATH``). 
 
-    $ lsup_admin
+The tools are currently not directly available on Docker instances (*TODO add
+instructions and/or code changes to access them*).
+
+``fcrepo``
+----------
+
+This is the main server command. It has no parameters. The command spawns
+Gunicorn workers (as many as set up in the configuration) and can be sent in
+the background, or started via init script.
+
+The tool must be run in the same virtual environment LAKEsuperior
+was installed in (if it was)—i.e.::
+
+    source <virtualenv root>/bin/activate
+
+must be run before running the server.
+
+In the case an init script is used, ``coilmq`` (belonging to a 3rd party
+package) needs to be launched as well; unless a message broker is already set
+up, or if messaging is disabled in the configuration.
+
+``lsup-admin``
+--------------
+
+``lsup-admin`` is the principal repository management tool. It is
+self-documented, so this is just a redundant overview::
+
+    $ lsup-admin
     Usage: lsup-admin [OPTIONS] COMMAND [ARGS]...
 
     Options:
@@ -21,8 +47,6 @@ The script should be in your executable path if you install LAKEsuperior with
       migrate       Migrate an LDP repository to LAKEsuperior.
       stats         Print repository statistics.
 
-*TODO: Add instructions to access from Docker.*
-
 All entries marked ``[STUB]`` are not yet implemented, however the
 ``lsup_admin <command> --help`` command will issue a description of what
 the command is meant to do. Check the
@@ -31,3 +55,37 @@ the radar.
 
 All of the above commands are also available via, and based upon, the
 native Python API.
+
+``lsup-benchmark``
+------------------
+
+``lsup-benchmark`` is used to run performance tests in a predictable way.
+
+The command has no options but prompts the user for a few settings
+interactively (N.B. this may change in favor of parameters).
+
+The benchmark tool is able to create RDF sources, or non-RDF, or an equal mix
+of them, via POST or PUT, in the currently running LAKEsuperior server. It
+runs single-threaded.
+
+The RDF sources are randomly generated graphs of consistent size and
+complexity. They include a mix of in-repository references, literals, and
+external URIs. Each graph has 200 triples.
+
+The non-RDF sources are randomly generated 1024x1024 pixel PNG images.
+
+You are warmly encouraged to run the script and share the performance results (
+*TODO add template for posting results*).
+
+``profiler``
+------------
+
+This command launches a single-threaded HTTP server (Flask) on port 5000 that
+logs profiling information. This is useful for analyzing application
+performance.
+
+For more information, consult the `Python profilers guide
+<https://docs.python.org/3/library/profile.html>`__.
+
+Do not launch this while a WSGI server (``fcrepo``) is already running, because
+that also launches a Flask server on port 5000.

+ 6 - 7
docs/conf.py

@@ -17,13 +17,11 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-import os
 import sys
 
+from os import path
 from unittest.mock import MagicMock
 
-#sys.path.append(os.path.abspath('../'))
-
 class MockModule(MagicMock):
     @classmethod
     def __getattr__(cls, name):
@@ -37,6 +35,8 @@ sys.modules.update((mod_name, MockModule()) for mod_name in MOCK_MODULES)
 import lakesuperior.env_setup
 
 
+here = path.abspath(path.dirname(__file__))
+
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
@@ -74,10 +74,9 @@ author = 'Stefano Cossu'
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
-# The short X.Y version.
-version = '1.0-alpha'
-# The full version, including alpha/beta/rc tags.
-release = '1.0.0-alpha.8'
+# Version and release are the same.
+with open(path.realpath(path.join(here, '..', 'VERSION'))) as fh:
+    version = release = fh.readlines()[0]
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

+ 14 - 6
docs/setup.rst

@@ -79,9 +79,14 @@ Configuration
 
 The app should run for testing and evaluation purposes without any
 further configuration. All the application data are stored by default in
-the ``data`` directory.
+the ``data`` directory of the Python package.
 
-To change the default configuration you should:
+This setup is not recommended for anything more than a quick look at the
+application. If more complex interaction is needed, or upgrades to the package
+are foreseen, it is strongly advised to set up proper locations for
+configuration and data.
+
+To change the default configuration you need to:
 
 #. Copy the ``etc.default`` folder to a separate location
 #. Set the configuration folder location in the environment:
@@ -94,10 +99,13 @@ To change the default configuration you should:
 
 The configuration options are documented in the files.
 
-**Note:** ``test.yml`` must specify a different location for the graph
-and for the binary stores than the default one, otherwise running a test
-suite will destroy your main data store. The application will issue an
-error message and refuse to start if these locations overlap.
+One thing worth noting is that some locations can be specified as relative
+paths. These paths will be relative to the ``data_dir`` location specified in
+the ``application.yml`` file.
+
+If ``data_dir`` is empty, as it is in the default configuration, it defaults
+to the ``data`` directory inside the Python package. This is the option that
+one may want to change before anything else.
 
 Production deployment
 ---------------------

+ 3 - 2
docs/usage.rst

@@ -123,11 +123,12 @@ Before using the API, either do::
 
 Or, to specify an alternative configuration::
 
+    >>> from lakesuperior import env
     >>> from lakesuperior.config_parser import parse_config
     >>> from lakesuperior.globals import AppGlobals
-    >>> env.config, test_config = parse_config('/my/custom/config_dir')
+    >>> config = parse_config('/my/custom/config_dir')
     Reading configuration at /my/custom/config_dir
-    >>> env.app_globals = AppGlobals(env.config)
+    >>> env.app_globals = AppGlobals(config)
 
 Create and replace resources
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+ 49 - 0
lakesuperior/__init__.py

@@ -0,0 +1,49 @@
+import threading
+
+from os import path
+
+basedir = path.dirname(path.realpath(__file__))
+"""
+Base directory for the module.
+
+This can be used by modules looking for configuration and data files to be
+referenced or copied with a known path relative to the package root.
+
+:rtype: str
+"""
+
+class Env:
+    pass
+
+env = Env()
+"""
+A pox on "globals are evil".
+
+All-purpose bucket for storing global variables. Different environments
+(e.g. webapp, test suite) put the appropriate value in it.
+The most important values to be stored are app_conf (either from
+lakesuperior.config_parser.config or lakesuperior.config_parser.test_config)
+and app_globals (obtained by an instance of lakesuperior.globals.AppGlobals).
+
+e.g.::
+
+    >>> from lakesuperior.config_parser import config
+    >>> from lakesuperior.globals import AppGlobals
+    >>> from lakesuperior import env
+    >>> env.app_globals = AppGlobals(config)
+
+This is automated in non-test environments by importing
+`lakesuperior.env_setup`.
+
+:rtype: Object
+"""
+
+thread_env = threading.local()
+"""
+Thread-local environment.
+
+This is used to store thread-specific variables such as start/end request
+timestamps.
+
+:rtype: threading.local
+"""

+ 3 - 9
lakesuperior/api/admin.py

@@ -1,8 +1,7 @@
 import logging
 
+from lakesuperior import env
 from lakesuperior.config_parser import parse_config
-from lakesuperior.env import env
-from lakesuperior.globals import AppGlobals
 from lakesuperior.migrator import Migrator
 from lakesuperior.store.ldp_nr.default_layout import DefaultLayout as FileLayout
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
@@ -47,17 +46,12 @@ def migrate(src, dest, start_pts=None, list_file=None, **kwargs):
     return Migrator(src, dest, **kwargs).migrate(start_pts, list_file)
 
 
-def integrity_check(config_dir=None):
+def integrity_check():
     """
     Check integrity of the data set.
 
     At the moment this is limited to referential integrity. Other checks can
     be added and triggered by different argument flags.
     """
-    if config_dir:
-        env.config = parse_config(config_dir)[0]
-        env.app_globals = AppGlobals(env.config)
-    else:
-        import lakesuperior.env_setup
     with TxnManager(env.app_globals.rdfly.store):
-        return { t for t in env.app_globals.rdfly.find_refint_violations()}
+        return set(env.app_globals.rdfly.find_refint_violations())

+ 1 - 1
lakesuperior/api/query.py

@@ -2,9 +2,9 @@ import logging
 
 from io import BytesIO
 
+from lakesuperior import env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
-from lakesuperior.env import env
 from lakesuperior.store.ldp_rs.lmdb_store import LmdbStore, TxnManager
 
 

+ 6 - 5
lakesuperior/api/resource.py

@@ -13,7 +13,7 @@ from rdflib.namespace import XSD
 from lakesuperior.config_parser import config
 from lakesuperior.exceptions import (
         InvalidResourceError, ResourceNotExistsError, TombstoneError)
-from lakesuperior.env import env
+from lakesuperior import env, thread_env
 from lakesuperior.globals import RES_DELETED, RES_UPDATED
 from lakesuperior.model.ldp_factory import LDP_NR_TYPE, LdpFactory
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
@@ -72,15 +72,16 @@ def transaction(write=False):
         def _wrapper(*args, **kwargs):
             # Mark transaction begin timestamp. This is used for create and
             # update timestamps on resources.
-            env.timestamp = arrow.utcnow()
-            env.timestamp_term = Literal(env.timestamp, datatype=XSD.dateTime)
+            thread_env.timestamp = arrow.utcnow()
+            thread_env.timestamp_term = Literal(
+                    thread_env.timestamp, datatype=XSD.dateTime)
             with TxnManager(env.app_globals.rdf_store, write=write) as txn:
                 ret = fn(*args, **kwargs)
             if len(env.app_globals.changelog):
                 job = Thread(target=_process_queue)
                 job.start()
-            delattr(env, 'timestamp')
-            delattr(env, 'timestamp_term')
+            delattr(thread_env, 'timestamp')
+            delattr(thread_env, 'timestamp_term')
             return ret
         return _wrapper
     return _transaction_deco

+ 24 - 30
lakesuperior/config_parser.py

@@ -1,6 +1,6 @@
 import sys
 
-from os import path, environ
+from os import chdir, environ, getcwd, path
 
 import hiyapyco
 import yaml
@@ -8,8 +8,10 @@ import yaml
 import lakesuperior
 
 
-default_config_dir = environ.get('FCREPO_CONFIG_DIR', path.dirname(
-            path.abspath(lakesuperior.__file__)) + '/etc.defaults')
+default_config_dir = environ.get(
+        'FCREPO_CONFIG_DIR',
+        path.join(
+            path.dirname(path.abspath(lakesuperior.__file__)), 'etc.defaults'))
 """
 Default configuration directory.
 
@@ -53,38 +55,30 @@ def parse_config(config_dir=None):
     print('Reading configuration at {}'.format(config_dir))
 
     for cname in configs:
-        file = '{}/{}.yml'.format(config_dir , cname)
+        file = path.join(config_dir, '{}.yml'.format(cname))
         with open(file, 'r') as stream:
             _config[cname] = yaml.load(stream, yaml.SafeLoader)
 
-    error_msg = '''
-    **************
-    ** WARNING! **
-    **************
+    if not _config['application']['data_dir']:
+        _config['application']['data_dir'] = path.join(
+                lakesuperior.basedir, 'data')
 
-    Your test {} store location is set to be the same as the production
-    location. This means that if you run a test suite, your live data may be
-    wiped clean!
+    data_dir = _config['application']['data_dir']
+    _config['application']['store']['ldp_nr']['location'] = path.join(
+            data_dir, 'ldpnr_store')
+    _config['application']['store']['ldp_rs']['location'] = path.join(
+            data_dir, 'ldprs_store')
+    # If log handler file names are relative, they will be relative to the
+    # data dir.
+    oldwd = getcwd()
+    chdir(data_dir)
+    for handler in _config['logging']['handlers'].values():
+        if 'filename' in handler:
+            handler['filename'] = path.realpath(handler['filename'])
+    chdir(oldwd)
 
-    Please review your configuration before starting.
-    '''
-
-    # Merge default and test configurations.
-    _test_config = {'application': hiyapyco.load(
-            config_dir + '/application.yml',
-            config_dir + '/test.yml', method=hiyapyco.METHOD_MERGE)}
-
-    if _config['application']['store']['ldp_rs']['location'] \
-            == _test_config['application']['store']['ldp_rs']['location']:
-                raise RuntimeError(error_msg.format('RDF'))
-                sys.exit()
-
-    if _config['application']['store']['ldp_nr']['path'] \
-            == _test_config['application']['store']['ldp_nr']['path']:
-                raise RuntimeError(error_msg.format('binary'))
-                sys.exit()
-    return _config, _test_config
+    return _config
 
 
 # Load default configuration.
-config, test_config = parse_config()
+config = parse_config()

+ 0 - 0
data/bootstrap/rsrc_centric_layout.sparql → lakesuperior/data/bootstrap/rsrc_centric_layout.sparql


+ 3 - 1
lakesuperior/endpoints/ldp.py

@@ -251,7 +251,7 @@ def post_resource(parent_uid):
     hdr = {'Location' : uri}
 
     if mimetype and not is_rdf:
-        hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="<{0}>"'\
+        hdr['Link'] = '<{0}/fcr:metadata>; rel="describedby"; anchor="{0}"'\
                 .format(uri)
 
     out_headers.update(hdr)
@@ -312,6 +312,8 @@ def put_resource(uid):
 
 
 @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
+@ldp.route('/', defaults={'uid': '/'}, methods=['PATCH'],
+        strict_slashes=False)
 def patch_resource(uid, is_metadata=False):
     """
     https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH

+ 9 - 1
lakesuperior/endpoints/main.py

@@ -1,7 +1,11 @@
 import logging
 
+from os import path
+
 from flask import Blueprint, render_template
 
+from lakesuperior import basedir
+
 logger = logging.getLogger(__name__)
 
 # Blueprint for main pages. Not much here.
@@ -14,7 +18,11 @@ main = Blueprint('main', __name__, template_folder='templates',
 @main.route('/', methods=['GET'])
 def index():
     """Homepage."""
-    return render_template('index.html')
+    version_fname = path.abspath(
+            path.join(path.dirname(basedir), 'VERSION'))
+    with open(version_fname) as fh:
+        version = fh.readlines()[0]
+    return render_template('index.html', version=version)
 
 
 @main.route('/debug', methods=['GET'])

+ 1 - 1
lakesuperior/endpoints/query.py

@@ -3,7 +3,7 @@ import logging
 from flask import Blueprint, current_app, request, render_template, send_file
 from rdflib.plugin import PluginException
 
-from lakesuperior.env import env
+from lakesuperior import env
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.api import query as query_api
 

+ 1 - 0
lakesuperior/endpoints/templates/index.html

@@ -1,6 +1,7 @@
 {% extends 'base.html' %}
 {% block title %}LAKEsuperior{% endblock %}
 {% block content %}
+    <p>Version {{ version }}</p>
     <blockquote>
         Superior, they said, never gives up her dead<br />
         When the gales of November come early

+ 0 - 25
lakesuperior/env.py

@@ -1,25 +0,0 @@
-import threading
-
-'''
-Global bucket for switching configuration. Different environments
-(e.g. webapp, test suite) put the appropriate value in it.
-The most important values to be stored are app_conf (either from
-lakesuperior.config_parser.config or lakesuperior.config_parser.test_config)
-and app_globals (obtained by an instance of lakesuperior.globals.AppGlobals).
-
-e.g.:
-
->>> from lakesuperior.config_parser import config
->>> from lakesuperior.globals import AppGlobals
->>> from lakesuperior.env import env
->>> env.config = config
->>> env.app_globals = AppGlobals(config)
-
-This is automated in non-test environments by importing
-`lakesuperior.env_setup`.
-'''
-class Env:
-    pass
-
-env = Env()
-#env = threading.local()

+ 2 - 3
lakesuperior/env_setup.py

@@ -1,16 +1,15 @@
+from lakesuperior import env
 from lakesuperior.config_parser import config
 from lakesuperior.globals import AppGlobals
-from lakesuperior.env import env
 
 __doc__="""
 Default configuration.
 
 Import this module to initialize the configuration for a production setup::
 
-    >>>from lakesuperior import env_setup
+    >>> import lakesuperior.env_setup
 
 Will load the default configuration.
 """
 
-env.config = config
 env.app_globals = AppGlobals(config)

+ 15 - 6
lakesuperior/etc.defaults/application.yml

@@ -4,6 +4,21 @@
 # settings. Individual items can be selectively overridden as long as the YAML
 # hierarchical structure is kept.
 
+# Set app_mode to either 'prod', 'test' or 'dev'.
+# 'prod' is normal running mode. 'test' is used for running test suites.
+# 'dev' is similar to normal mode but with reload and debug enabled.
+app_mode: 'prod'
+
+# Base data directory. This contains both volatile files such as PID files,
+# and persistent ones, such as resource data. LDP-NRs will be stored under
+# <basedir>/ldpnr_store and LDP-RSs under <basedir>/ldprs_store.
+#
+# If different data files need to be running on different storage hardware,
+# the individual subdirectories can be mounted on different file systems.
+#
+# If unset, it will default to <lakesuperior package root>/data.
+data_dir:
+
 # Configuration for binary path and fixity check generation. The hash is a
 # checksumn of the contents of the file.
 uuid:
@@ -18,9 +33,6 @@ store:
     # The semantic store used for persisting LDP-RS (RDF Source) resources.
     # MUST support SPARQL 1.1 query and update.
     ldp_rs:
-        # Directory where the RDF data files are stored.
-        location: data/ldprs_store
-
         # store layout. At the moment, only `rsrc_centric_layout`is supported.
         layout: rsrc_centric_layout
 
@@ -47,9 +59,6 @@ store:
         # See store.ldp_rs.layout.
         layout: default_layout
 
-        # The filesystem path to the root of the binary store.
-        path: data/ldpnr_store
-
         # How to split the balanced pairtree to generate a path. The hash
         # string is defined by the uuid.algo parameter value.
         # This parameter defines how many characters are in each branch. 2-4 is

+ 2 - 6
lakesuperior/etc.defaults/gunicorn.yml

@@ -4,12 +4,8 @@
 # Commented values are the application defaults.
 
 # Directory where the WSGI server data are stored.
-data_dir: 'data'
-
-# Set app_mode to either 'prod', 'test' or 'dev'.
-# 'prod' is normal running mode. 'test' is used for running test suites.
-# 'dev' is similar to normal mode but with reload and debug enabled.
-app_mode: 'dev'
+# Relative paths are relative to the `data_dir` value in `application.yml`.
+data_dir: .
 
 #listen_addr: '0.0.0.0'
 #listen_port: 8000

+ 4 - 2
lakesuperior/etc.defaults/logging.yml

@@ -13,8 +13,10 @@ formatters:
 handlers:
   logfile:
     class: logging.handlers.RotatingFileHandler
-    # Change this.
-    filename: /tmp/lakesuperior.log
+    # Relative paths are relative to the `data_dir` value in `application.yml`.
+    # You can change this value to an absolute path or leave it alone and
+    # symlink the location to a different directory.
+    filename: log/lakesuperior.log
     maxBytes: 10485760
     backupCount: 3
     formatter: default_fmt

+ 22 - 8
lakesuperior/globals.py

@@ -27,7 +27,7 @@ class AppGlobals:
 
     The variables are set on initialization by passing a configuration dict.
     Usually this is done when starting an application. The instance with the
-    loaded variables is then assigned to the :data:`lakesuperior.env.env`
+    loaded variables is then assigned to the :data:`lakesuperior.env`
     global variable.
 
     You can either load the default configuration::
@@ -36,20 +36,19 @@ class AppGlobals:
 
     Or set up an environment with a custom configuration::
 
-        >>>from lakesuperior.env import env
-        >>>from lakesuperior.app_globals import AppGlobals
-        >>>my_config = {'name': 'value', '...': '...'}
-        >>>env.config = my_config
-        >>>env.app_globals = AppGlobals(my_config)
+        >>> from lakesuperior import env
+        >>> from lakesuperior.app_globals import AppGlobals
+        >>> my_config = {'name': 'value', '...': '...'}
+        >>> env.app_globals = AppGlobals(my_config)
 
     """
-    def __init__(self, conf):
+    def __init__(self, config):
         """
         Generate global variables from configuration.
         """
         from lakesuperior.messaging.messenger import Messenger
 
-        app_conf = conf['application']
+        app_conf = config['application']
 
         # Initialize RDF layout.
         rdfly_mod_name = app_conf['store']['ldp_rs']['layout']
@@ -69,11 +68,26 @@ class AppGlobals:
         self._messenger  = Messenger(app_conf['messaging'])
 
         # Exposed globals.
+        self._config = config
         self._rdfly = rdfly_cls(app_conf['store']['ldp_rs'])
         self._nonrdfly = nonrdfly_cls(app_conf['store']['ldp_nr'])
         self._changelog = deque()
 
 
+    @property
+    def config(self):
+        """
+        Global configuration.
+
+        This is a collection of all configuration options **except** for the
+        WSGI configuration which is initialized at a different time and is
+        stored under :data:`lakesuperior.env.wsgi_options`.
+
+        *TODO:* Update class reference when interface will be separated from
+        implementation.
+        """
+        return self._config
+
     @property
     def rdfly(self):
         """

+ 51 - 10
lakesuperior/lsup_admin.py

@@ -1,15 +1,30 @@
 import click
 import click_log
+import csv
 import json
 import logging
-import os
 import sys
 
+from os import getcwd, path
+
+import arrow
+
+from lakesuperior import env
 from lakesuperior.api import admin as admin_api
 from lakesuperior.config_parser import config
-from lakesuperior.env import env
+from lakesuperior.globals import AppGlobals
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 
+__doc__="""
+Utility to perform core maintenance tasks via console command-line.
+
+The command-line tool is self-documented. Type::
+
+    lsup-admin --help
+
+for a list of tools and options.
+"""
+
 logger = logging.getLogger(__name__)
 click_log.basic_config(logger)
 
@@ -53,7 +68,8 @@ def bootstrap():
     click.echo('Initializing binary store at {}'.format(nonrdfly.root))
     nonrdfly.bootstrap()
     click.echo('Binary store initialized.')
-    click.echo('Repository successfully set up. Go to town.')
+    click.echo('\nRepository successfully set up. Go to town.')
+    click.echo('If the HTTP server is running, it must be restarted.')
 
 
 @click.command()
@@ -88,8 +104,11 @@ def check_fixity(uid):
     '--config-folder', '-c', default=None, help='Alternative configuration '
     'folder to look up. If not set, the location set in the environment or '
     'the default configuration is used.')
+@click.option(
+    '--output', '-o', default=None, help='Output file. If not specified, a '
+    'timestamp-named file will be generated automatically.')
 @click.command()
-def check_refint(config_folder=None):
+def check_refint(config_folder=None, output=None):
     """
     Check referential integrity.
 
@@ -98,19 +117,41 @@ def check_refint(config_folder=None):
     resources. For repositories set up with the `referential_integrity` option
     (the default), this is a pre-condition for a consistent data set.
 
-    Note: this check is run regardless of whether the repository enforces
+    If inconsistencies are found, a report is generated in CSV format with the
+    following columns: `s`, `p`, `o` (respectively the terms of the
+    triple containing the dangling relationship) and `missing` which
+    indicates which term is the missing URI (currently always set to `o`).
+
+    Note: this check can be run regardless of whether the repository enforces
     referential integrity.
     """
-    check_results = admin_api.integrity_check(config_folder)
+    if config_folder:
+        env.app_globals = AppGlobals(parse_config(config_dir))
+    else:
+        import lakesuperior.env_setup
+
+    check_results = admin_api.integrity_check()
+
     click.echo('Integrity check results:')
     if len(check_results):
         click.echo(click.style('Inconsistencies found!', fg='red', bold=True))
-        click.echo('Missing object in the following triples:')
-        for trp in check_results:
-            click.echo(' '.join([str(t) for t in trp[0]]))
+        if not output:
+            output = path.join(getcwd(), 'refint_report-{}.csv'.format(
+                arrow.utcnow().format('YYYY-MM-DDTHH:mm:ss.S')))
+        elif not output.endswith('.csv'):
+            output += '.csv'
+
+        with open(output, 'w', newline='') as fh:
+            writer = csv.writer(fh)
+            writer.writerow(('s', 'p', 'o', 'missing'))
+            for trp in check_results:
+                # ``o`` is always hardcoded for now.
+                writer.writerow([t.n3() for t in trp[0]] + ['o'])
+
+        click.echo('Report generated at {}'.format(output))
     else:
         click.echo(click.style('Clean. ', fg='green', bold=True)
-                + 'No inconsistency found.')
+                + 'No inconsistency found. No report generated.')
 
 
 @click.command()

+ 53 - 36
lakesuperior/migrator.py

@@ -10,8 +10,9 @@ import yaml
 
 from rdflib import Graph, URIRef
 
+from lakesuperior import env, basedir
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
-from lakesuperior.env import env
+from lakesuperior.exceptions import InvalidResourceError
 from lakesuperior.globals import AppGlobals, ROOT_UID
 from lakesuperior.config_parser import parse_config
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
@@ -28,8 +29,7 @@ class StoreWrapper(ContextDecorator):
         self.store = store
 
     def __enter__(self):
-        self.store.open(
-                env.config['application']['store']['ldp_rs'])
+        self.store.open(env.app_globals.rdfly.config)
 
     def __exit__(self, *exc):
         self.store.close()
@@ -69,8 +69,8 @@ class Migrator:
 
 
     def __init__(
-            self, src, dest, zero_binaries=False, compact_uris=False,
-            skip_errors=False):
+            self, src, dest, clear=False, zero_binaries=False,
+            compact_uris=False, skip_errors=False):
         """
         Set up base paths and clean up existing directories.
 
@@ -82,8 +82,10 @@ class Migrator:
             it must be a writable directory. It will be deleted and recreated.
             If it does not exist, it will be created along with its parents if
             missing.
-        :param str binary_handling: One of ``include``, ``truncate`` or
-            ``split``.
+        :param bool clear: Whether to clear any pre-existing data at the
+            locations indicated.
+        :param bool zero_binaries: Whether to create zero-byte binary files
+            rather than copy the sources.
         :param bool compact_uris: NOT IMPLEMENTED. Whether the process should
             attempt to compact URIs generated with broken up path segments. If
             the UID matches a pattern such as ``/12/34/56/123456...`` it is
@@ -95,34 +97,36 @@ class Migrator:
         """
         # Set up repo folder structure and copy default configuration to
         # destination file.
-        cur_dir = path.dirname(path.dirname(path.abspath(__file__)))
         self.dbpath = '{}/data/ldprs_store'.format(dest)
         self.fpath = '{}/data/ldpnr_store'.format(dest)
         self.config_dir = '{}/etc'.format(dest)
 
-        shutil.rmtree(dest, ignore_errors=True)
-        shutil.copytree(
-                '{}/etc.defaults'.format(cur_dir), self.config_dir)
+        if clear:
+            shutil.rmtree(dest, ignore_errors=True)
+        if not path.isdir(self.config_dir):
+            shutil.copytree(
+                '{}/etc.defaults'.format(basedir), self.config_dir)
 
         # Modify and overwrite destination configuration.
-        orig_config, _ = parse_config(self.config_dir)
+        orig_config = parse_config(self.config_dir)
         orig_config['application']['store']['ldp_rs']['location'] = self.dbpath
         orig_config['application']['store']['ldp_nr']['path'] = self.fpath
 
-        with open('{}/application.yml'.format(self.config_dir), 'w') \
-                as config_file:
-            config_file.write(yaml.dump(orig_config['application']))
+        if clear:
+            with open('{}/application.yml'.format(self.config_dir), 'w') \
+                    as config_file:
+                config_file.write(yaml.dump(orig_config['application']))
 
-        env.config = parse_config(self.config_dir)[0]
-        env.app_globals = AppGlobals(env.config)
+        env.app_globals = AppGlobals(parse_config(self.config_dir))
 
         self.rdfly = env.app_globals.rdfly
         self.nonrdfly = env.app_globals.nonrdfly
 
-        with TxnManager(env.app_globals.rdf_store, write=True) as txn:
-            self.rdfly.bootstrap()
-            self.rdfly.store.close()
-        env.app_globals.nonrdfly.bootstrap()
+        if clear:
+            with TxnManager(env.app_globals.rdf_store, write=True) as txn:
+                self.rdfly.bootstrap()
+                self.rdfly.store.close()
+            env.app_globals.nonrdfly.bootstrap()
 
         self.src = src.rstrip('/')
         self.zero_binaries = zero_binaries
@@ -155,7 +159,7 @@ class Migrator:
                             'Starting point {} does not begin with a slash.'
                             .format(start))
 
-                    if start != ROOT_UID:
+                    if not rsrc_api.exists(start):
                         # Create the full hierarchy with link to the parents.
                         rsrc_api.create_or_replace(start)
                     # Then populate the new resource and crawl for more
@@ -165,8 +169,11 @@ class Migrator:
                 with open(list_file, 'r') as fp:
                     for uri in fp:
                         uid = uri.strip().replace(self.src, '')
-                        if uid != ROOT_UID:
-                            rsrc_api.create_or_replace(uid)
+                        if not rsrc_api.exists(uid):
+                            try:
+                                rsrc_api.create_or_replace(uid)
+                            except InvalidResourceError:
+                                pass
                         self._crawl(uid)
         logger.info('Dumped {} resources.'.format(self._ct))
 
@@ -189,12 +196,17 @@ class Migrator:
         # Internal URI of destination.
         iuri = ibase + uid
 
-        rsp = requests.head(uri)
-        if not self.skip_errors:
-            rsp.raise_for_status()
-        elif rsp.status_code > 399:
-            print('Error retrieving resource {} headers: {} {}'.format(
-                uri, rsp.status_code, rsp.text))
+        try:
+            rsp = requests.head(uri)
+        except:
+            logger.warn('Error retrieving resource {}'.format(uri))
+            return
+        if rsp:
+            if not self.skip_errors:
+                rsp.raise_for_status()
+            elif rsp.status_code > 399:
+                print('Error retrieving resource {} headers: {} {}'.format(
+                    uri, rsp.status_code, rsp.text))
 
         # Determine LDP type.
         ldp_type = 'ldp_nr'
@@ -218,12 +230,17 @@ class Migrator:
         # links.
         get_uri = (
                 uri if ldp_type == 'ldp_rs' else '{}/fcr:metadata'.format(uri))
-        get_rsp = requests.get(get_uri)
-        if not self.skip_errors:
-            get_rsp.raise_for_status()
-        elif get_rsp.status_code > 399:
-            print('Error retrieving resource {} body: {} {}'.format(
-                uri, get_rsp.status_code, get_rsp.text))
+        try:
+            get_rsp = requests.get(get_uri)
+        except:
+            logger.warn('Error retrieving resource {}'.format(get_uri))
+            return
+        if get_rsp:
+            if not self.skip_errors:
+                get_rsp.raise_for_status()
+            elif get_rsp.status_code > 399:
+                print('Error retrieving resource {} body: {} {}'.format(
+                    uri, get_rsp.status_code, get_rsp.text))
 
         data = get_rsp.content.replace(
                 self.src.encode('utf-8'), ibase.encode('utf-8'))

+ 1 - 1
lakesuperior/model/ldp_factory.py

@@ -7,11 +7,11 @@ from rdflib import Graph, parser, plugin, serializer
 from rdflib.resource import Resource
 from rdflib.namespace import RDF
 
+from lakesuperior import env
 from lakesuperior.model.ldpr import Ldpr
 from lakesuperior.model.ldp_nr import LdpNr
 from lakesuperior.model.ldp_rs import LdpRs, Ldpc, LdpDc, LdpIc
 from lakesuperior.config_parser import config
-from lakesuperior.env import env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.exceptions import (
         IncompatibleLdpTypeError, InvalidResourceError, ResourceExistsError,

+ 1 - 1
lakesuperior/model/ldp_nr.py

@@ -6,7 +6,7 @@ from rdflib.namespace import RDF, XSD
 from rdflib.resource import Resource
 from rdflib.term import URIRef, Literal, Variable
 
-from lakesuperior.env import env
+from lakesuperior import env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.model.ldpr import Ldpr
 from lakesuperior.model.ldp_rs import LdpRs

+ 1 - 1
lakesuperior/model/ldp_rs.py

@@ -2,7 +2,7 @@ import logging
 
 from rdflib import Graph
 
-from lakesuperior.env import env
+from lakesuperior import env
 from lakesuperior.globals import RES_UPDATED
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.model.ldpr import Ldpr

+ 19 - 12
lakesuperior/model/ldpr.py

@@ -10,7 +10,7 @@ import arrow
 from rdflib import Graph, URIRef, Literal
 from rdflib.namespace import RDF
 
-from lakesuperior.env import env
+from lakesuperior import env, thread_env
 from lakesuperior.globals import (
     RES_CREATED, RES_DELETED, RES_UPDATED, ROOT_UID)
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
@@ -411,7 +411,7 @@ class Ldpr(metaclass=ABCMeta):
         else:
             add_trp = {
                 (self.uri, RDF.type, nsc['fcsystem'].Tombstone),
-                (self.uri, nsc['fcrepo'].created, env.timestamp_term),
+                (self.uri, nsc['fcrepo'].created, thread_env.timestamp_term),
             }
 
         self.modify(RES_DELETED, remove_trp, add_trp)
@@ -432,7 +432,7 @@ class Ldpr(metaclass=ABCMeta):
         Remove all traces of a resource and versions.
         """
         logger.info('Purging resource {}'.format(self.uid))
-        refint = env.config['store']['ldp_rs']['referential_integrity']
+        refint = rdfly.config['referential_integrity']
         inbound = True if refint else inbound
         rdfly.forget_rsrc(self.uid, inbound)
 
@@ -637,8 +637,8 @@ class Ldpr(metaclass=ABCMeta):
 
         :rtype: tuple(rdflib.Graph)
         :return: Remove and add graphs. These can be used
-        with ``BaseStoreLayout.update_resource`` and/or recorded as separate
-        events in a provenance tracking system.
+            with ``BaseStoreLayout.update_resource`` and/or recorded as separate
+            events in a provenance tracking system.
         """
         logger.debug('Provided SPARQL query: {}'.format(q))
         pre_gr = self.imr
@@ -683,7 +683,7 @@ class Ldpr(metaclass=ABCMeta):
         method.
 
         :param ev_type: The type of event (create, update,
-        delete) or None. In the latter case, no notification is sent.
+            delete) or None. In the latter case, no notification is sent.
         :type ev_type: str or None
         :param set remove_trp: Triples to be removed.
         :param set add_trp: Triples to be added.
@@ -692,7 +692,7 @@ class Ldpr(metaclass=ABCMeta):
 
         if (
                 ev_type is not None and
-                env.config['application'].get('messaging')):
+                env.app_globals.config['application'].get('messaging')):
             logger.debug('Enqueuing message for {}'.format(self.uid))
             self._enqueue_msg(ev_type, remove_trp, add_trp)
 
@@ -720,7 +720,7 @@ class Ldpr(metaclass=ABCMeta):
 
         env.app_globals.changelog.append((set(remove_trp), set(add_trp), {
             'ev_type': ev_type,
-            'timestamp': env.timestamp.format(),
+            'timestamp': thread_env.timestamp.format(),
             'rsrc_type': rsrc_type,
             'actor': actor,
         }))
@@ -769,7 +769,7 @@ class Ldpr(metaclass=ABCMeta):
         # Create and modify timestamp.
         if create:
             self.provided_imr.set((
-                self.uri, nsc['fcrepo'].created, env.timestamp_term))
+                self.uri, nsc['fcrepo'].created, thread_env.timestamp_term))
             self.provided_imr.set((
                 self.uri, nsc['fcrepo'].createdBy, self.DEFAULT_USER))
         else:
@@ -781,12 +781,12 @@ class Ldpr(metaclass=ABCMeta):
                     self.uri, nsc['fcrepo'].createdBy)))
 
         self.provided_imr.set((
-            self.uri, nsc['fcrepo'].lastModified, env.timestamp_term))
+            self.uri, nsc['fcrepo'].lastModified, thread_env.timestamp_term))
         self.provided_imr.set((
             self.uri, nsc['fcrepo'].lastModifiedBy, self.DEFAULT_USER))
 
 
-    def _containment_rel(self, create):
+    def _containment_rel(self, create, ignore_type=True):
         """Find the closest parent in the path indicated by the uid and
         establish a containment triple.
 
@@ -805,6 +805,11 @@ class Ldpr(metaclass=ABCMeta):
 
         :param bool create: Whether the resource is being created. If false,
         the parent container is not updated.
+        "param bool ignore_type: If False (the default), an exception is raised
+        if trying to create a resource under a non-container. This can be
+        overridden in special cases (e.g. when migrating a repository in which
+        a LDP-NR has "children" under ``fcr:versions``) by setting this to
+        True.
         """
         from lakesuperior.model.ldp_factory import LdpFactory
 
@@ -814,7 +819,9 @@ class Ldpr(metaclass=ABCMeta):
             cnd_parent_uid = '/' + '/'.join(path_components[:-1])
             if rdfly.ask_rsrc_exists(cnd_parent_uid):
                 parent_rsrc = LdpFactory.from_stored(cnd_parent_uid)
-                if nsc['ldp'].Container not in parent_rsrc.types:
+                if (
+                        not ignore_type
+                        and nsc['ldp'].Container not in parent_rsrc.types):
                     raise InvalidResourceError(
                         cnd_parent_uid, 'Parent {} is not a container.')
 

+ 1 - 1
lakesuperior/profiler.py

@@ -5,9 +5,9 @@ from werkzeug.contrib.profiler import ProfilerMiddleware
 # Environment must be set before importing the app factory function.
 import lakesuperior.env_setup
 
+from lakesuperior import env
 from lakesuperior.config_parser import config
 from lakesuperior.globals import AppGlobals
-from lakesuperior.env import env
 
 options = {
     'restrictions': [30],

+ 5 - 6
lakesuperior/server.py

@@ -4,21 +4,20 @@ from logging.config import dictConfig
 # Environment must be set before importing the app factory function.
 import lakesuperior.env_setup
 
+from lakesuperior import env
 from lakesuperior.config_parser import config
 from lakesuperior.globals import AppGlobals
-from lakesuperior.env import env
 
 from lakesuperior.app import create_app
 
-dictConfig(env.config['logging'])
+dictConfig(env.app_globals.config['logging'])
 logger = logging.getLogger(__name__)
 
 logger.info('Graph store location: {}'.format(
-    env.config['application']['store']['ldp_rs']['location']))
-logger.info('Binary store location: {}'.format(
-    env.config['application']['store']['ldp_nr']['path']))
+    env.app_globals.rdfly.config['location']))
+logger.info('Binary store location: {}'.format(env.app_globals.nonrdfly.root))
 
-fcrepo = create_app(env.config['application'])
+fcrepo = create_app(env.app_globals.config['application'])
 
 if __name__ == "__main__":
     fcrepo.run(host='0.0.0.0')

+ 1 - 1
lakesuperior/store/ldp_nr/base_non_rdf_layout.py

@@ -20,7 +20,7 @@ class BaseNonRdfLayout(metaclass=ABCMeta):
         Initialize the base non-RDF store layout.
         """
         self.config = config
-        self.root = config['path']
+        self.root = config['location']
 
 
     ## INTERFACE METHODS ##

+ 24 - 14
lakesuperior/store/ldp_rs/lmdb_store.py

@@ -14,6 +14,7 @@ from rdflib import Graph, Namespace, URIRef, Variable
 from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as RDFLIB_DEFAULT_GRAPH_URI
 from rdflib.store import Store, VALID_STORE, NO_STORE
 
+from lakesuperior import env
 
 logger = logging.getLogger(__name__)
 
@@ -473,7 +474,7 @@ class LmdbStore(Store):
         pk_c = self._pickle(context)
 
         # Add new individual terms or gather keys for existing ones.
-        keys = [None, None, None, None]
+        keys = [None] * 4
         with self.cur('th:t') as icur:
             for i, pk_t in enumerate((pk_s, pk_p, pk_o, pk_c)):
                 thash = self._hash(pk_t)
@@ -856,10 +857,20 @@ class LmdbStore(Store):
             else:
                 return NO_STORE
 
-        self.data_env = lmdb.open(path + '/main', subdir=False, create=create,
-                map_size=self.MAP_SIZE, max_dbs=4, readahead=False)
-        self.idx_env = lmdb.open(path + '/index', subdir=False, create=create,
-                map_size=self.MAP_SIZE, max_dbs=6, readahead=False)
+        if getattr(env, 'wsgi_options', False):
+            self._workers = env.wsgi_options['workers']
+        else:
+            self._workers = 1
+        logger.info('Max LMDB readers: {}'.format(self._workers))
+
+        self.data_env = lmdb.open(
+                path + '/main', subdir=False, create=create,
+                map_size=self.MAP_SIZE, max_dbs=4,
+                max_spare_txns=self._workers, readahead=False)
+        self.idx_env = lmdb.open(
+                path + '/index', subdir=False, create=create,
+                map_size=self.MAP_SIZE, max_dbs=6,
+                max_spare_txns=self._workers, readahead=False)
 
         # Clear stale readers.
         data_stale_readers = self.data_env.reader_check()
@@ -1003,7 +1014,7 @@ class LmdbStore(Store):
                     yield from self._lookup_2bound({'s': s, 'o': o})
                 # s ? ?
                 else:
-                    yield from self._lookup_1bound('s', s)
+                    yield from self._lookup_1bound('s:po', s)
         else:
             if p is not None:
                 # ? p o
@@ -1011,11 +1022,11 @@ class LmdbStore(Store):
                     yield from self._lookup_2bound({'p': p, 'o': o})
                 # ? p ?
                 else:
-                    yield from self._lookup_1bound('p', p)
+                    yield from self._lookup_1bound('p:so', p)
             else:
                 # ? ? o
                 if o is not None:
-                    yield from self._lookup_1bound('o', o)
+                    yield from self._lookup_1bound('o:sp', o)
                 # ? ? ?
                 else:
                     # Get all triples in the database.
@@ -1023,21 +1034,20 @@ class LmdbStore(Store):
                         yield from cur.iternext_nodup()
 
 
-    def _lookup_1bound(self, label, term):
+    def _lookup_1bound(self, idx_name, term):
         """
         Lookup triples for a pattern with one bound term.
 
-        :param str label: Which term is being searched for. One of `s`,
-        `p`, or `o`.
+        :param str idx_name: The index to look up as one of the keys of
+            ``_lookup_ordering``.
         :param rdflib.URIRef term: Bound term to search for.
 
-        :rtype: iterator(bytes)
+        :rtype: Iterator(bytes)
         :return: SPO keys matching the pattern.
         """
         k = self._to_key(term)
         if not k:
             return iter(())
-        idx_name = '{}:{}'.format(label, 'spo'.replace(label, ''))
         term_order = self._lookup_ordering[idx_name]
         with self.cur(idx_name) as cur:
             if cur.set_key(k):
@@ -1045,7 +1055,7 @@ class LmdbStore(Store):
                     subkeys = self._split_key(match)
 
                     # Compose result.
-                    out = [None, None, None]
+                    out = [None] * 3
                     out[term_order[0]] = k
                     out[term_order[1]] = subkeys[0]
                     out[term_order[2]] = subkeys[1]

+ 16 - 6
lakesuperior/store/ldp_rs/rsrc_centric_layout.py

@@ -2,7 +2,9 @@ import logging
 
 from collections import defaultdict
 from itertools import chain
+from os import path
 from string import Template
+from urllib.parse import urldefrag
 
 import arrow
 
@@ -12,13 +14,13 @@ from rdflib.query import ResultException
 from rdflib.resource import Resource
 from rdflib.store import Store
 
+from lakesuperior import basedir, env
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_mgr as nsm
 from lakesuperior.dictionaries.srv_mgd_terms import  srv_mgd_subjects, \
         srv_mgd_predicates, srv_mgd_types
 from lakesuperior.exceptions import (InvalidResourceError,
         ResourceNotExistsError, TombstoneError, PathSegmentError)
-from lakesuperior.env import env
 from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 
 
@@ -179,8 +181,10 @@ class RsrcCentricLayout:
 
         logger.info('Initializing the graph store with system data.')
         store.open()
+        fname = path.join(
+                basedir, 'data', 'bootstrap', 'rsrc_centric_layout.sparql')
         with TxnManager(store, True):
-            with open('data/bootstrap/rsrc_centric_layout.sparql', 'r') as f:
+            with open(fname, 'r') as f:
                 data = Template(f.read())
                 self.ds.update(data.substitute(timestamp=arrow.utcnow()))
 
@@ -553,14 +557,20 @@ class RsrcCentricLayout:
         :rtype: set
         :return: Triples referencing a repository URI that is not a resource.
         """
-        for obj in self.store.all_terms('o'):
+        #import pdb; pdb.set_trace()
+        for i, obj in enumerate(self.store.all_terms('o'), start=1):
             if (
                     isinstance(obj, URIRef)
-                    and str(obj).startswith(nsc['fcres'])
-                    and not self.ask_rsrc_exists(self.uri_to_uid(obj))):
-                print('Object not found: {}'.format(obj))
+                    and obj.startswith(nsc['fcres'])
+                    and not obj.endswith('fcr:fixity')
+                    and not obj.endswith('fcr:versions')
+                    and not self.ask_rsrc_exists(self.uri_to_uid(
+                        urldefrag(obj).url))):
+                logger.warn('Object not found: {}'.format(obj))
                 for trp in self.store.triples((None, None, obj)):
                     yield trp
+            if i % 100 == 0:
+                logger.info('{} terms processed.'.format(i))
 
 
     ## PROTECTED MEMBERS ##

+ 14 - 9
lakesuperior/wsgi.py

@@ -1,15 +1,15 @@
 import multiprocessing
 import yaml
 
-from os import environ, makedirs, path
+from os import chdir, environ, makedirs, getcwd, path
 
 import gunicorn.app.base
 
-from lakesuperior.server import fcrepo
+from lakesuperior import env, env_setup
 from lakesuperior.config_parser import default_config_dir
 
 
-config_file = '{}/gunicorn.yml'.format(default_config_dir)
+config_file = path.join(default_config_dir, 'gunicorn.yml')
 
 with open(config_file, 'r') as fh:
     config = yaml.load(fh, yaml.SafeLoader)
@@ -17,11 +17,14 @@ with open(config_file, 'r') as fh:
 listen_addr = config.get('listen_addr', '0.0.0.0')
 listen_port = config.get('listen_port', 8000)
 preload_app = config.get('preload_app', True)
-app_mode = config.get('app_mode', 'prod')
+app_mode = env.app_globals.config['application'].get('app_mode', 'prod')
 
+oldwd = getcwd()
+chdir(env.app_globals.config['application']['data_dir'])
 data_dir = path.realpath(config.get('data_dir'))
-run_dir = '{}/run'.format(data_dir)
-log_dir = '{}/log'.format(data_dir)
+chdir(oldwd)
+run_dir = path.join(data_dir, 'run')
+log_dir = path.join(data_dir, 'log')
 makedirs(log_dir, exist_ok=True)
 makedirs(run_dir, exist_ok=True)
 
@@ -43,10 +46,11 @@ options = {
     'daemon': app_mode=='prod',
     'reload': app_mode=='dev' and not preload_app,
 
-    'pidfile': '{}/fcrepo.pid'.format(run_dir),
-    'accesslog': '{}/gunicorn-access.log'.format(log_dir),
-    'errorlog': '{}/gunicorn-error.log'.format(log_dir),
+    'pidfile': path.join(run_dir, 'fcrepo.pid'),
+    'accesslog': path.join(log_dir, 'gunicorn-access.log'),
+    'errorlog': path.join(log_dir, 'gunicorn-error.log'),
 }
+env.wsgi_options = options
 
 class WsgiApp(gunicorn.app.base.BaseApplication):
 
@@ -64,6 +68,7 @@ class WsgiApp(gunicorn.app.base.BaseApplication):
 
 
 def run():
+    from lakesuperior.server import fcrepo
     WsgiApp(fcrepo, options).run()
 
 

+ 8 - 4
setup.py

@@ -25,9 +25,14 @@ pytest_runner = ['pytest-runner'] if needs_pytest else []
 with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
     long_description = f.read()
 
+# Read release number.
+with open(path.realpath(path.join(here, 'VERSION'))) as fh:
+    version = fh.readlines()[0]
+
+
 setup(
     name='lakesuperior',
-    version='1.0.0a12',
+    version=version,
 
     description='A Linked Data Platform repository sever.',
     long_description=long_description,
@@ -81,6 +86,7 @@ setup(
         'HiYaPyCo',
         'PyYAML',
         'arrow',
+        'cchardet',
         'click',
         'click-log',
         'gevent',
@@ -105,9 +111,7 @@ setup(
     #extras_require={},
     #package_data={
     #},
-    data_files=[
-        ('data/bootstrap', glob('data/bootstrap/*')),
-    ],
+    #data_files=[],
 
     entry_points={
         'console_scripts': [

+ 20 - 0
tests/endpoints/test_ldp.py

@@ -298,6 +298,26 @@ class TestLdp:
         assert self.client.post('/ldp/post_409').status_code == 409
 
 
+    def test_patch_root(self):
+        '''
+        Test patching root node.
+        '''
+        path = '/ldp/'
+        self.client.get(path)
+        uri = g.webroot + '/'
+
+        with open('tests/data/sparql_update/simple_insert.sparql') as data:
+            resp = self.client.patch(path,
+                    data=data,
+                    headers={'content-type' : 'application/sparql-update'})
+
+        assert resp.status_code == 204
+
+        resp = self.client.get(path)
+        gr = Graph().parse(data=resp.data, format='text/turtle')
+        assert gr[ URIRef(uri) : nsc['dc'].title : Literal('Hello') ]
+
+
     def test_patch(self):
         '''
         Test patching a resource.