Browse Source

Merge branch 'development'

Stefano Cossu 6 years ago
parent
commit
8b05025556
100 changed files with 2477 additions and 1417 deletions
  1. 2 3
      .travis.yml
  2. 1 1
      Dockerfile
  3. 3 0
      MANIFEST.in
  4. 0 177
      README.md
  5. 69 0
      README.rst
  6. 3 1
      conftest.py
  7. 0 43
      doc/notes/architecture.md
  8. 0 33
      doc/notes/cli.md
  9. 0 213
      doc/notes/fcrepo4_deltas.md
  10. 0 260
      doc/notes/indexing_strategy.md
  11. 0 27
      doc/notes/messaging.md
  12. 0 57
      doc/notes/migration.md
  13. 0 63
      doc/notes/model.md
  14. 0 112
      doc/notes/performance.md
  15. 0 82
      doc/notes/storage.md
  16. 3 2
      docker/docker_entrypoint
  17. 0 31
      docker/etc/gunicorn.py
  18. 24 0
      docker/etc/gunicorn.yml
  19. 23 0
      docs/Makefile
  20. 86 0
      docs/about.rst
  21. 25 0
      docs/api.rst
  22. 38 0
      docs/apidoc/lakesuperior.api.rst
  23. 46 0
      docs/apidoc/lakesuperior.endpoints.rst
  24. 38 0
      docs/apidoc/lakesuperior.messaging.rst
  25. 46 0
      docs/apidoc/lakesuperior.model.rst
  26. 89 0
      docs/apidoc/lakesuperior.rst
  27. 30 0
      docs/apidoc/lakesuperior.store.ldp_nr.rst
  28. 30 0
      docs/apidoc/lakesuperior.store.ldp_rs.rst
  29. 18 0
      docs/apidoc/lakesuperior.store.rst
  30. 7 0
      docs/apidoc/modules.rst
  31. 48 0
      docs/architecture.rst
  32. 0 0
      docs/assets/lakesuperior_arch.png
  33. 0 0
      docs/assets/lakesuperior_recommendations.pdf
  34. BIN
      docs/assets/profile_1K_children_get.pdf
  35. 33 0
      docs/cli.rst
  36. 191 0
      docs/conf.py
  37. 40 0
      docs/contributing.rst
  38. 0 0
      docs/examples/store_layouts/graph_per_aspect.trig
  39. 0 0
      docs/examples/store_layouts/graph_per_resource+.trig
  40. 0 0
      docs/examples/store_layouts/graph_per_resource.trig
  41. 244 0
      docs/fcrepo4_deltas.rst
  42. 9 0
      docs/help.rst
  43. 52 0
      docs/index.rst
  44. 311 0
      docs/indexing_strategy.rst
  45. 30 0
      docs/messaging.rst
  46. 65 0
      docs/migration.rst
  47. 65 0
      docs/model.rst
  48. 0 0
      docs/notes/TODO.historic
  49. 133 0
      docs/performance.rst
  50. 101 0
      docs/setup.rst
  51. 0 0
      docs/src/lakesuperior_arch.graphml
  52. 0 0
      docs/src/lakesuperior_content_model.graphml
  53. 0 0
      docs/src/lakesuperior_recommendations.md
  54. 0 0
      docs/src/template.latex
  55. 0 0
      docs/src/use_cases_transactions.md
  56. 94 0
      docs/storage.rst
  57. 117 0
      docs/usage.rst
  58. 0 32
      etc.defaults/gunicorn.py
  59. 24 0
      etc.defaults/gunicorn.yml
  60. 0 5
      fcrepo
  61. 0 0
      lakesuperior/api/__init__.py
  62. 9 8
      lakesuperior/api/query.py
  63. 65 62
      lakesuperior/api/resource.py
  64. 2 2
      lakesuperior/config_parser.py
  65. 0 0
      lakesuperior/dictionaries/__init__.py
  66. 0 0
      lakesuperior/endpoints/__init__.py
  67. 8 8
      lakesuperior/endpoints/admin.py
  68. 50 47
      lakesuperior/endpoints/ldp.py
  69. 3 7
      lakesuperior/endpoints/main.py
  70. 5 5
      lakesuperior/endpoints/query.py
  71. 0 0
      lakesuperior/endpoints/templates/static/assets/css/bootstrap-theme.css
  72. 0 0
      lakesuperior/endpoints/templates/static/assets/css/bootstrap-theme.css.map
  73. 0 0
      lakesuperior/endpoints/templates/static/assets/css/bootstrap-theme.min.css
  74. 0 0
      lakesuperior/endpoints/templates/static/assets/css/bootstrap-theme.min.css.map
  75. 0 0
      lakesuperior/endpoints/templates/static/assets/css/bootstrap.css
  76. 0 0
      lakesuperior/endpoints/templates/static/assets/css/bootstrap.css.map
  77. 0 0
      lakesuperior/endpoints/templates/static/assets/css/bootstrap.min.css
  78. 0 0
      lakesuperior/endpoints/templates/static/assets/css/bootstrap.min.css.map
  79. 0 0
      lakesuperior/endpoints/templates/static/assets/css/yasgui.min.css
  80. 0 0
      lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.eot
  81. 0 0
      lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.svg
  82. 0 0
      lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.ttf
  83. 0 0
      lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.woff
  84. 0 0
      lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.woff2
  85. 0 0
      lakesuperior/endpoints/templates/static/assets/js/bootstrap.js
  86. 0 0
      lakesuperior/endpoints/templates/static/assets/js/bootstrap.min.js
  87. 0 0
      lakesuperior/endpoints/templates/static/assets/js/jquery-3.2.1.min.js
  88. 0 0
      lakesuperior/endpoints/templates/static/assets/js/npm.js
  89. 0 0
      lakesuperior/endpoints/templates/static/assets/js/yasgui.min.js
  90. 10 3
      lakesuperior/env_setup.py
  91. 59 9
      lakesuperior/globals.py
  92. 0 1
      lakesuperior/lsup_admin.py
  93. 0 0
      lakesuperior/messaging/__init__.py
  94. 14 18
      lakesuperior/messaging/formatters.py
  95. 24 16
      lakesuperior/messaging/handlers.py
  96. 6 8
      lakesuperior/messaging/messenger.py
  97. 27 25
      lakesuperior/migrator.py
  98. 28 26
      lakesuperior/model/ldp_factory.py
  99. 14 13
      lakesuperior/model/ldp_nr.py
  100. 15 17
      lakesuperior/model/ldp_rs.py

+ 2 - 3
.travis.yml

@@ -3,7 +3,6 @@ python:
   - "3.5"
   - "3.5"
   - "3.6"
   - "3.6"
 install:
 install:
-  - pip install -r requirements.txt
-  - coilmq&
+  - pip install -e .
 script:
 script:
-- pytest tests
+  - python setup.py test

+ 1 - 1
Dockerfile

@@ -5,7 +5,7 @@ WORKDIR     /usr/local
 ADD         . lakesuperior
 ADD         . lakesuperior
 WORKDIR     /usr/local/lakesuperior
 WORKDIR     /usr/local/lakesuperior
 RUN         cp ./docker/etc/* ./etc.defaults/
 RUN         cp ./docker/etc/* ./etc.defaults/
-RUN         pip install -r requirements.txt
+RUN         pip install -e .
 CMD         ./docker/docker_entrypoint
 CMD         ./docker/docker_entrypoint
 EXPOSE      8000
 EXPOSE      8000
 HEALTHCHECK --interval=30s --timeout=5s \
 HEALTHCHECK --interval=30s --timeout=5s \

+ 3 - 0
MANIFEST.in

@@ -0,0 +1,3 @@
+include README.rst
+include LICENSE
+graft lakesuperior/endpoints/templates

+ 0 - 177
README.md

@@ -1,177 +0,0 @@
-# LAKEsuperior
-
-[![build status](
-  http://img.shields.io/travis/scossu/lakesuperior/master.svg?style=flat)](
- https://travis-ci.org/scossu/lakesuperior)
-
-LAKEsuperior is an alternative [Fedora Repository](http://fedorarepository.org)
-implementation.
-
-## Guiding Principles
-
-LAKEsuperior aims at being an uncomplicated, efficient Fedora 4 implementation.
-
-Its main goals are:
-
-- **Reliability:** Based on solid technologies with stability in mind.
-- **Efficiency:** Small memory and CPU footprint, high scalability.
-- **Ease of management:** Tools to perform monitoring and maintenance included.
-- **Simplicity of design:** Straight-forward architecture, robustness over
-  features.
-
-## Key features
-
-- Drop-in replacement for Fedora4 (with some
-  [caveats](doc/notes/fcrepo4_deltas.md)); currently being tested with Hyrax 2
-- Very stable persistence layer based on [LMDB](https://symas.com/lmdb/) and
-  filesystem. Fully ACID-compliant writes guarantee consistency of data.
-- Term-based search (*planned*) and SPARQL Query API + UI
-- No performance penalty for storing many resources under the same container; no
-  [kudzu](https://www.nature.org/ourinitiatives/urgentissues/land-conservation/forests/kudzu.xml)
-  pairtree segmentation <sup id="a1">[1](#f1)</sup>
-- Extensible [provenance metadata](doc/notes/model.md) tracking
-- [Multi-modal access](doc/notes/architecture.md#multi-modal-access): HTTP
-  (REST), command line interface and native Python API.
-- Fits in a pocket: you can carry 50M triples in an 8Gb memory stick.
-
-Implementation of the official [Fedora API specs](https://fedora.info/spec/)
-(Fedora 5.x and beyond) is not
-foreseen in the short term, however it would be a natural evolution of this
-project if it gains support.
-
-Please make sure you read the [Delta document](doc/notes/fcrepo4_deltas.md) for
-divergences with the official Fedora4 implementation.
-
-## Target Audience
-
-LAKEsuperior is for anybody who cares about preserving data in the long term.
-
-Less vaguely, LAKEsuperior is targeted at who needs to store large quantities
-of highly linked metadata and documents.
-
-Its Python/C environment and API make it particularly well suited for academic
-and scientific environments who would be able to embed it in a Python
-application as a library or extend it via plug-ins.
-
-LAKEsuperior is able to be exposed to the Web as a
-[Linked Data Platform](https://www.w3.org/TR/ldp-primer/) server. It also acts
-as a SPARQL query (read-only) endpoint, however it is not meant to be used as
-a full-fledged triplestore at the moment.
-
-In its current status, LAKEsuperior is aimed at developers and
-hands-on managers who are interested in evaluating this project.
-
-## Quick Install: Running in Docker
-
-Thanks to @mbklein for docker image and instructions.
-
-You can run LAKEsuperior in Docker for a hands-off quickstart.
-
-[Docker](http://docker.com/) is a containerization platform that allows you to run
-services in lightweight virtual machine environments without having to worry about
-installing all of the prerequisites on your host machine.
-
-1. Install the correct [Docker Community Edition](https://www.docker.com/community-edition)
-   for your operating system.
-1. Clone this repo: `git clone https://github.com/scossu/lakesuperior.git`
-1. `cd` into repo folder
-1. Run `docker-compose up`
-
-LAKEsuperior should now be available at `http://localhost:8000/`.
-
-The provided Docker configuration includes persistent storage as a self-container Docker
-volume, meaning your data will persist between runs. If you want to clear the decks,
-simply run `docker-compose down -v`.
-
-## Manual Install (a bit less quick, a bit more power)
-
-**Note:** These instructions have been tested on Linux. They may work on Darwin
-with little modification, and possibly on Windows with some
-modifications. Feedback is welcome.
-
-### Dependencies
-
-1. Python 3.5 or greater.
-1. A message broker supporting the STOMP protocol. For testing and evaluation
-purposes, [CoilMQ](https://github.com/hozn/coilmq) is included with the
-dependencies and should be automatically installed.
-
-### Installation steps
-
-1. Create a virtualenv in a project folder:
-   `python3 -m venv <virtualenv folder>`
-1. Activate the virtualenv: `source <path_to_virtualenv>/bin/activate`
-1. Clone this repo: `git clone https://github.com/scossu/lakesuperior.git`
-1. `cd` into repo folder
-1. Install dependencies: `pip install -r requirements.txt`
-1. Start your STOMP broker, e.g.: `coilmq &`. If you have another queue manager
-   listening to port 61613 you can either configure a different port on the
-   application configuration, or use the existing message queue.
-1. Run `./lsup-admin bootstrap` to initialize the binary and graph stores
-1. Run `./fcrepo`.
-
-### Configuration
-
-The app should run for testing and evaluation purposes without any further
-configuration. All the application data are stored by default in the `data`
-directory.
-
-To change the default configuration you should:
-
-1. Copy the `etc.skeleton` folder to a separate location
-1. Set the configuration folder location in the environment:
-   `export FCREPO_CONFIG_DIR=<your config dir location>` (you can
-   add this line at the end of your virtualenv `activate` script)
-1. Configure the application
-1. Bootstrap the app or copy the original data folders to the new location if
-   any loction options changed
-1. (Re)start the server: `./fcrepo`
-
-The configuration options are documented in the files.
-
-**Note:** `test.yml` must specify a different location for the graph and for
-the binary stores than the default one, otherwise running a test suite will
-destroy your main data store. The application will issue an error message and
-refuse to start if these locations overlap.
-
-### Production deployment
-
-If you like fried repositories for lunch, deploy before 11AM.
-
-## Status and development
-
-LAKEsuperior is in **alpha** status. Please see the
-[project issues](https://github.com/scossu/lakesuperior/issues) list for a
-rudimentary road map.
-
-## Contributing
-
-This has been so far a single person's off-hours project (with much input from
-several sides). In order to turn into anything close to a Beta release and
-eventually to a production-ready implementation, it needs some community love.
-
-Contributions are welcome in all forms, including ideas, issue reports, or
-even just spinning up the software and providing some feedback. LAKEsuperior is
-meant to live as a community project.
-
-## Technical documentation
-
-[Architecture Overview](doc/notes/architecture.md)
-
-[Content Model](doc/notes/model.md)
-
-[Messaging](doc/notes/messaging.md)
-
-[Migration, Backup & Restore](doc/notes/migration.md)
-
-[Command-Line Reference](doc/notes/cli.md)
-
-[Storage Implementation](doc/notes/storage.md)
-
-[Performance Benchmarks](doc/notes/performance.md)
-
----
-
-<b id="f1">1</b> However if your client splits pairtrees upstream, such as
-Hyrax does, that obviously needs to change to get rid of the path
-segments. [↩](#a1)

+ 69 - 0
README.rst

@@ -0,0 +1,69 @@
+LAKEsuperior
+============
+
+|build status| |docs|
+
+LAKEsuperior is an alternative `Fedora
+Repository <http://fedorarepository.org>`__ implementation.
+
+Documentation
+-------------
+
+The full documentation is maintained in `Read The Docs
+<http://lakesuperior.readthedocs.io/>`__. Please refer to that for more info.
+
+Installation
+------------
+
+The following instructions are aimed at a manual install using this git
+repository. For a hands-off install using Docker, see
+`the setup documentation
+<http://lakesuperior.readthedocs.io/en/latest/setup.html>`__.
+
+Dependencies
+~~~~~~~~~~~~
+
+1. Python 3.5 or greater.
+2. A message broker supporting the STOMP protocol. For testing and
+   evaluation purposes, `CoilMQ <https://github.com/hozn/coilmq>`__ is
+   included with the dependencies and should be automatically installed.
+
+Installation steps
+~~~~~~~~~~~~~~~~~~
+
+#. Create a virtualenv in a project folder:
+   ``python3 -m venv <virtualenv folder>``
+#. Activate the virtualenv: ``source <path_to_virtualenv>/bin/activate``
+#. Install dependencies: ``pip install -r requirements.txt``
+#. Start your STOMP broker, e.g.: ``coilmq &``. If you have another
+   queue manager listening to port 61613 you can either configure a
+   different port on the application configuration, or use the existing
+   message queue.
+#. Run ``lsup-admin bootstrap`` to initialize the binary and graph
+   stores.
+#. Run ``fcrepo``.
+
+Contributing
+------------
+
+This has been so far a single person’s off-hours project (with much
+input from several sides). In order to turn into anything close to a
+Beta release and eventually to a production-ready implementation, it
+needs some community love.
+
+Contributions are welcome in all forms, including ideas, issue reports,
+or even just spinning up the software and providing some feedback.
+LAKEsuperior is meant to live as a community project.
+
+See `Contributing Guidelines
+<http://lakesuperior.readthedocs.io/en/latest/contributing.html>`__
+for further details on how to fork, improve, document and test the project.
+
+.. |build status| image:: http://img.shields.io/travis/scossu/lakesuperior/master.svg?style=flat
+   :alt: Build Status
+   :target: https://travis-ci.org/username/repo
+
+.. |docs| image:: https://readthedocs.org/projects/lakesuperior/badge/
+    :alt: Documentation Status
+    :scale: 100%
+    :target: https://lakesuperior.readthedocs.io/en/latest/?badge=latest

+ 3 - 1
conftest.py

@@ -6,9 +6,11 @@ sys.path.append('.')
 from lakesuperior.config_parser import test_config
 from lakesuperior.config_parser import test_config
 from lakesuperior.globals import AppGlobals
 from lakesuperior.globals import AppGlobals
 from lakesuperior.env import env
 from lakesuperior.env import env
+
+env.config = test_config
 env.app_globals = AppGlobals(test_config)
 env.app_globals = AppGlobals(test_config)
 from lakesuperior.app import create_app
 from lakesuperior.app import create_app
-from util.generators import random_image
+from lakesuperior.util.generators import random_image
 
 
 env.config = test_config
 env.config = test_config
 
 

+ 0 - 43
doc/notes/architecture.md

@@ -1,43 +0,0 @@
-# LAKEsuperior Architecture
-
-LAKEsuperior is written in Python. It is not excluded that parts of the code
-may be rewritten in [Cython](http://cython.readthedocs.io/) for performance.
-
-
-## Multi-Modal Access
-
-LAKEsuperior services and data are accessible in multiple ways:
-
-- Via HTTP. This is the canonical way to interact with LDP resources and
-  conforms quite closely to the Fedora specs (currently v4).
-- Via command line. This method includes long-running admin tasks which are not
-  available via HTTP.
-- Via a Python API. This method allows to use Python scripts to access the same
-  methods available to the two methods above in a programmatic way. It is
-  possible to write Python plugins or even to embed LAKEsuperior in a
-  Python application, even without running a web server.
-
-
-## Architecture Overview
-
-![LAKEsuperior Architecture](../assets/lakesuperior_arch.png)
-
-The LAKEsuperior REST API provides access to the underlying Python API. All
-REST and CLI operations can be replicated by a Python program accessing this
-API.
-
-The main advantage of the Python API is that it makes it very easy to maipulate
-graph and binary data without the need to serialize or deserialize native data
-structures. This matters when handling large ETL jobs for example.
-
-The Python API is divided in three main areas:
-
-- [Resource API](../../lakesuperior/api/resource.py). This API is in charge of
-  all the resource CRUD operations and implements the majority of the Fedora
-  specs.
-- [Admin API](../../lakesuperior/api/admin.py). This exposes utility methods,
-  mostly long-running maintenance jobs.
-- [Query API](../../lakesuperior/api/query.py). This provides several
-  facilities for querying repository data.
-
-

+ 0 - 33
doc/notes/cli.md

@@ -1,33 +0,0 @@
-# LAKEsuperior Command Line Reference
-
-The LAKEsuperior command line tool is used for maintenance and administration
-purposes.
-
-The script is invoked from the main install directory. The tool is
-self-documented, so this is just a redundant overview:
-
-```
-$ ./lsup_admin
-Usage: lsup-admin [OPTIONS] COMMAND [ARGS]...
-
-Options:
-  --help  Show this message and exit.
-
-  bootstrap     Bootstrap binary and graph stores.
-  check_fixity  [STUB] Check fixity of a resource.
-  check_refint  [STUB] Check referential integrity.
-  cleanup       [STUB] Clean up orphan database items.
-  copy          [STUB] Copy (backup) repository data.
-  dump          [STUB] Dump repository to disk.
-  load          [STUB] Load serialized repository data.
-  stats         Print repository statistics.
-
-```
-
-All entries marked `[STUB]` are not yet implemented, however the
-`lsup_admin <command> --help` command will issue a description of what the
-command is meant to do. Please see the [TODO](TODO) document for a rough road
-map.
-
-All of the above commands are also available via, and based upon, the native
-Python API.

+ 0 - 213
doc/notes/fcrepo4_deltas.md

@@ -1,213 +0,0 @@
-# Divergencies between lakesuperior and FCREPO4
-
-This is a (vastly incomplete) list of discrepancies between the current FCREPO4
-implementation and LAKEsuperior. More will be added as more clients will use
-it.
-
-
-## Not yet implemented (but in the plans)
-
-- Various headers handling
-- Versioning (incomplete)
-- AuthN/Z
-- Fixity check
-- Blank nodes
-
-
-## Potentially breaking changes
-
-The following  divergences may lead into incompatibilities with some clients.
-
-### Atomicity
-
-FCREPO4 supports batch atomic operations whereas a transaction can be opened
-and a number of operations (i.e. multiple R/W requests to the repository) can
-be performed. The operations are persisted in the repository only if and when
-the transaction is committed.
-
-LAKesuperior only supports atomicity for a single HTTP request. I.e. a single
-HTTTP request that should result in multiple write operations to the storage
-layer is only persisted if no exception is thrown. Otherwise, the operation is
-rolled back in order to prevent resources to be left in an inconsistent state.
-
-### Tombstone methods
-
-If a client requests a tombstone resource in
-FCREPO4 with a method other than DELETE, the server will return `405 Method Not
-Allowed` regardless of whether the tombstone exists or not.
-
-LAKEsuperior will return `405` only if the tombstone actually exists, `404`
-otherwise.
-
-### Web UI
-
-FCREPO4 includes a web UI for simple CRUD operations.
-
-Such a UI is not in the immediate LAKEsuperior development plans. However, a
-basic UI is available for read-only interaction: LDP resource browsing, SPARQL
-query and other search facilities, and administrative tools. Some of the latter
-*may* involve write operations, such as clean-up tasks.
-
-### Automatic path segment generation
-
-A `POST` request without a slug in FCREPO4 results in a pairtree consisting of
-several intermediate nodes leading to the automatically minted identifier. E.g.
-
-    POST /rest
-
-results in `/rest/8c/9a/07/4e/8c9a074e-dda3-5256-ea30-eec2dd4fcf61` being
-created.
-
-The same request in LAKEsuperior would create
-`/rest/8c9a074e-dda3-5256-ea30-eec2dd4fcf61` (obviously the identifiers will be
-different).
-
-This seems to break Hyrax at some point, but might have been fixed. This needs
-to be verified further.
-
-
-## Non-standard client breaking changes
-
-The following changes may be incompatible with clients relying on some FCREPO4
-behavior not endorsed by LDP or other specifications.
-
-### Pairtrees
-
-FCREPO4 generates "pairtree" resources if a resource is created in a path whose
-segments are missing. E.g. when creating `/a/b/c/d`, if `/a/b` and `/a/b/c` do
-not exist, FCREPO4 will create two Pairtree resources. POSTing and PUTting into
-Pairtrees is not allowed. Also, a containment triple is established between the
-closest LDPC and the created resource, e.g. if `a` exists, a `</a> ldp:contains
-</a/b/c/d>` triple is created.
-
-LAKEsuperior does not employ Pairtrees. In the example above LAKEsuperior would
-create a fully qualified LDPC for each missing segment, which can be POSTed and
-PUT to. Containment triples are created between each link in the path, i.e.
-`</a> ldp:contains </a/b>`, `</a/b> ldp:contains </a/b/c>` etc. This may
-potentially break clients relying on the direct containment model.
-
-The rationale behind this change is that Pairtrees are the byproduct of a
-limitation imposed by Modeshape and introduce complexity in the software stack
-and confusion for the client. LAKEsuperior aligns with the more intuitive UNIX
-filesystem model, where each segment of a path is a "folder" or container
-(except for the leaf nodes that can be either folders or files). In any
-case, clients are discouraged from generating deep paths in LAKEsuperior
-without a specific purpose because these resources create unnecessary data.
-
-### Non-mandatory, non-authoritative slug in version POST
-
-FCREPO4 requires a `Slug` header to POST to `fcr:versions` to create a new
-version.
-
-LAKEsuperior adheres to the more general FCREPO POST rule and if no slug is
-provided, an automatic ID is generated instead. The ID is a UUID4.
-
-Note that internally this ID is not called "label" but "uid" since it
-is treated as a fully qualified identifier. The `fcrepo:hasVersionLabel`
-predicate, however ambiguous in this context, will be kept until the adoption
-of Memento, which will change the retrieval mechanisms.
-
-Also, if a POST is issued on the same resource `fcr:versions` location using
-a version ID that already exists, LAKEsuperior will just mint a random
-identifier rather than returning an error.
-
-
-## Deprecation track
-
-LAKEsuperior offers some "legacy" options to replicate the FCREPO4 behavior,
-however encourages new development to use a different approach for some types
-of interaction.
-
-### Endpoints
-
-The FCREPO root endpoint is `/rest`. The LAKEsuperior root endpoint is `/ldp`.
-
-This should not pose a problem if a client does not have `rest` hard-coded in
-its code, but in any event, the `/rest` endpoint is provided for backwards
-compatibility.
-
-LAKEsuperior adds the (currently stub) `query` endpoint. Other endpoints for
-non-LDP services may be opened in the future.
-
-### Automatic LDP class assignment
-
-Since LAKEsuperior rejects client-provided server-managed triples, and since
-the LDP types are among them, the LDP container type is inferred from the
-provided properties: if the `ldp:hasMemberRelation` and
-`ldp:membershipResource` properties are provided, the resource is a Direct
-Container. If in addition to these the `ldp:insertedContentRelation` property
-is present, the resource is an Indirect Container. If any of the first two are
-missing, the resource is a Container (@TODO discuss: shall it be a Basic
-Container?)
-
-Clients are encouraged to omit LDP types in PUT, POST and PATCH requests.
-
-### Lenient handling
-
-FCREPO4 requires server-managed triples to be expressly indicated in a PUT
-request, unless the `Prefer` header is set to
-`handling=lenient; received="minimal"`, in which case the RDF payload must not
-have any server-managed triples.
-
-LAKEsuperior works under the assumption that client should never provide
-server-managed triples. It automatically handles PUT requests sent to existing
-resources by returning a 412 if any server managed triples are included in the
-payload. This is the same as setting `Prefer` to `handling=strict`, which is
-the default.
-
-If `Prefer` is set to `handling=lenient`, all server-managed triples sent with
-the payload are ignored.
-
-Clients using the `Prefer` header to control PUT behavior as advertised by the
-specs should not notice any difference.
-
-
-## Optional improvements
-
-The following are improvements in performance or usability that can only be taken
-advantage of if client code is adjusted.
-
-### LDP-NR content and metadata
-
-FCREPO4 relies on the `/fcr:metadata` identifier to retrieve RDF metadata about
-an LDP-NR. LAKEsuperior supports this as a legacy option, but encourages the
-use of content negotiation to do the same while offering explicit endpoints
-for RDF and non-RDF content retrieval.
-
-Any request to an LDP-NR with an `Accept` header set to one of the supported
-RDF serialization formats will yield the RDF metadata of the resource instead
-of the binary contents.
-
-The `fcr:metadata` URI returns the RDF metadata of a LDP-NR.
-
-The `fcr:content` URI returns the non-RDF content.
-
-The two optionsabove return an HTTP error if requested for a LDP-RS.
-
-### "Include" and "Omit" options for children
-
-LAKEsuperior offers an additional `Prefer` header option to exclude all
-references to child resources (i.e. by removing all the `ldp:contains` triples)
-while leaving the other server-managed triples when retrieving a resource:
-
-    Prefer: return=representation; [include | omit]="http://fedora.info/definitions/v4/repository#Children"
-
-The default behavior is to include all children URIs.
-
-### Soft-delete and purge
-
-**NOTE**: The implementation of this section is incomplete and debated.
-
-In FCREPO4 a deleted resource leaves a tombstone deleting all traces of the
-previous resource.
-
-In LAKEsuperior, a normal DELETE creates a new version snapshot of the resource
-and puts a tombstone in its place. The resource versions are still available
-in the `fcr:versions` location. The resource can be "resurrected" by
-issuing a POST to its tombstone. This will result in a `201`.
-
-If a tombstone is deleted, the resource and its versions are completely deleted
-(purged).
-
-Moreover, setting the `Prefer:no-tombstone` header option on DELETE allows to
-delete a resource and its versions directly without leaving a tombstone.

+ 0 - 260
doc/notes/indexing_strategy.md

@@ -1,260 +0,0 @@
-# LMDB Store design for RDFLib
-
-This is a log of subsequent strategies employed to store triples in LMDB.
-
-Strategy #5a is the one currently used. The rest is kept for historic reasons
-and academic curiosity (and also because it was too much work to just wipe out
-of memory).
-
-## Storage approach
-
-- Pickle quad and create MD5 or SHA1 hash.
-- Store triples in one database paired with key; store indices separately.
-
-Different strategies involve layout and number of databases.
-
-## Strategy #1
-
-- kq: key: serialized triple (1:1)
-- sk: Serialized subject: key (1:m)
-- pk: Serialized predicate: key (1:m)
-- ok: Serialized object: key (1:m)
-- (optional) lok: Serialized literal object: key (1:m)
-- (optional) tok: Serialized RDF type: key (1:m)
-- ck: Serialized context: key (1:m)
-
-### Retrieval approach
-
-To find all matches for a quad:
-
-- If all terms in the quad are bound, generate the key from the pickled
-quad and look up the triple in `kt`
-- If all terms are unbound, return an iterator of all values in `kt`.
-- If some values are bound and some unbound (most common query):
-  - Get a base list of keys associated wirh the first bound term
-  - For each subsequent bound term, check if each key associated with the term
-  matches a key in the base list
-  - Continue through all the bound terms. If a match is not found at any point,
-  continue to the next term
-  - If a match is found in all the bound term databases, look up the pickled quad
-  matching the key in `kq` and yield it
-
-More optimization can be introduced later, e.g. separating literal and RDF
-type objects in separate databases. Literals can have very long values and a
-database with a longer key setting may be useful. RDF terms can be indexed
-separately because they are the most common bound term.
-
-### Example lookup
-
-Keys and Triples (should actually be quads but this is a simplified version):
-
-A: s1 p1 o1
-B: s1 p2 o2
-C: s2 p3 o1
-D: s2 p3 o3
-
-Indices:
-
-- SK:
-  - s1: A, B
-  - s2: C, D
-- PK:
-  - p1: A
-  - p2: B
-  - p3: C, D
- - OK:
-  - o1: A, C
-  - o2: B
-  - o3: D
-
-Queries:
-
-- s1 ?p ?o → {A, B}
-- s1 p2 ?o → {A, B} & {B} = {B}
-- ?s ?p o3 → {D}
-- s1 p2 o5 → {} (Exit at OK: no term matches 'o5')
-- s2 p3 o2 → {C, D} & {C, D} & {B} = {}
-
-
-## Strategy #2
-
-Separate data and indices in two environments.
-
-### Main data store
-
-Key to quad; main keyspace; all unique.
-
-### Indices
-
-None of these databases is of critical preservation concern. They can be
-rebuilt from the main data store.
-
-All dupsort and dupfixed.
-
-@TODO The first three may not be needed if computing term hash is fast enough.
-
-- t2k (term to term key)
-- lt2k (literal to term key: longer keys)
-- k2t (term key to term)
-
-- s2k (subject key to quad key)
-- p2k (pred key to quad key)
-- o2k (object key to quad key)
-- c2k (context key to quad key)
-
-- sc2qk (subject + context keys to quad key)
-- po2qk (predicate + object keys to quad key)
-
-- sp2qk (subject + predicate keys to quad key)
-- oc2qk (object + context keys to quad key)
-
-- so2qk (subject + object keys to quad key)
-- pc2qk (predicate + context keys to quad key)
-
-
-## Strategy #3
-
-Contexts are much fewer (even in graph per aspect, 5-10 triples per graph)
-
-### Main data store
-
-Preservation-worthy data
-
-- tk:t (triple key: triple; dupsort, dupfixed)
-- tk:c (context key: triple; unique)
-
-### Indices
-
-Rebuildable from main data store
-
-- s2k (subject key: triple key)
-- p2k (pred key: triple key)
-- o2k (object key: triple key)
-- sp2k
-- so2k
-- po2k
-- spo2k
-
-### Lookup
-
-1. Look up triples by s, p, o, sp, so, po and get keys
-2. If a context is specified, for each key try to seek to (context, key) in ct
-   to verify it exists
-3. Intersect sets
-4. Match triple keys with data using kt
-
-#### Shortcuts
-
-- Get all contexts: return list of keys from ct
-- Get all triples for a context: get all values for a contex from ct and match
-  triple data with kt
-- Get one triple match for all contexts: look up in triple indices and match
-  triple data with kt
-
-
-## Strategy #4
-
-Terms are entered individually in main data store. Also, shorter keys are
-used rather than hashes. These two aspects save a great deal of space and I/O,
-but require an additional index to put the terms together in a triple.
-
-### Main Data Store
-
-- t:st (term key: serialized term; 1:1)
-- spo:c (joined S, P, O keys: context key; 1:m)
-- c: (context keys only, values are the empty bytestring)
-
-Storage total: variable
-
-### Indices
-
-- th:t (term hash: term key; 1:1)
-- c:spo (context key: joined triple keys; 1:m)
-- s:po (S key: P + O key; 1:m)
-- p:so (P key: S + O keys; 1:m)
-- o:sp (object key: triple key; 1:m)
-- sp:o (S + P keys: O key; 1:m)
-- so:p (S + O keys: P key; 1:m)
-- po:s (P + O keys: S key; 1:m)
-
-Storage total: 143 bytes per triple
-
-### Disadvantages
-
-- Lots of indices
-- Terms can get orphaned:
-  - No easy way to know if a term is used anywhere in a quad
-  - Needs some routine cleanup
-  - On the other hand, terms are relatively light-weight and can be reused
-  - Almost surely not reusable are UUIDs, message digests, timestamps etc.
-
-
-## Strategy #5
-
-Reduce number of indices and rely on parsing and splitting keys to find triples
-with two bound parameters.
-
-This is especially important for keeping indexing synchronous to achieve fully
-ACID writes.
-
-### Main data store
-
-Same as Strategy #4:
-
-- t:st (term key: serialized term; 1:1)
-- spo:c (joined S, P, O keys: context key; dupsort, dupfixed)
-- c: (context keys only, values are the empty bytestring; 1:1)
-
-Storage total: variable (same as #4)
-
-### Indices
-
-- th:t (term hash: term key; 1:1)
-- s:po (S key: joined P, O keys; dupsort, dupfixed)
-- p:so (P key: joined S, O keys; dupsort, dupfixed)
-- o:sp (O key: joined S, P keys; dupsort, dupfixed)
-- c:spo (context → triple association; dupsort, dupfixed)
-
-Storage total: 95 bytes per triple
-
-### Lookup strategy
-
-- ? ? ? c: [c:spo] all SPO for C → split key → [t:st] term from term key
-- s p o c: [c:spo] exact SPO & C match → split key → [t:st] term from term key
-- s ? ?: [s:po] All PO for S → split key → [t:st] term from term key
-- s p ?: [s:po] All PO for S → filter result by P in split key
-    → [t:st] term from term key
-
-### Advantages
-
-- Less indices: smaller index size and less I/O
-
-### Disadvantages
-
-- Possibly slower retrieval for queries with 2 bound terms (run metrics)
-
-### Further optimization
-
-In order to minimize traversing and splittig results, the first retrieval
-should be made on the term with less average keys. Search order can be balanced
-by establishing a lookup order for indices.
-
-This can be achieved by calling stats on the index databases and looking up the
-database with *most* keys. Since there is an equal number of entries in each of
-the (s:po, p:so, o:sp) indices, the one with most keys will have the least
-average number of values per key. If that lookup is done first, the initial
-data set to traverse and filter will be smaller.
-
-
-## Strategy #5a
-
-This is a slightly different implementation of #5 that somewhat simplifies and
-perhaps speeds up things a bit. It is the currently employed solution.
-
-The indexing and lookup strtegy is the same; but instead of using a separator
-byte for splitting compound keys, the logic relies on the fact that keys have
-a fixed length and are sliced instead. This *should* result in faster key
-manipulation, also because in most cases `memoryview` buffers can be used
-directly instead of being copied from memory.
-
-Index storage is 90 bytes per triple.

+ 0 - 27
doc/notes/messaging.md

@@ -1,27 +0,0 @@
-# LAKEsuperior Messaging
-
-LAKEsuperior implements a messaging system based on ActivityStreams, as
-indicated by the
-[Feodra API specs](https://fedora.info/2017/06/30/spec/#notifications).
-The metadata set provided is currently quite minimal but can be easily
-enriched by extending the
-[default formatter class](https://github.com/scossu/lakesuperior/blob/master/lakesuperior/messaging/messenger.py).
-
-STOMP is the only supported protocol at the moment. More protocols may be made
-available at a later time.
-
-LAKEsuperior can send messages to any number of destinations: see
-[configuration](https://github.com/scossu/lakesuperior/blob/master/etc.defaults/application.yml#L79).
-By default, CoilMQ is provided for testing purposes and listens to
-`localhost:61613`. The default route sends messages to `/topic/fcrepo`.
-
-A small command-line utility, also provided with the Python dependencies,
-allows to watch incoming messages. To monitor messages, enter the following
-*after activating your virtualenv*:
-
-```
-stomp -H localhost -P 61613 -L /topic/fcrepo
-```
-
-See the [stomp.py library reference page](https://github.com/jasonrbriggs/stomp.py/wiki/Command-Line-Access)
-for details.

+ 0 - 57
doc/notes/migration.md

@@ -1,57 +0,0 @@
-# Migration, Backup & Restore
-
-The LAKEsuperior dataset is by default fully contained in a folder. This means
-that only the data, configuration and code are needed for it to run.
-No Postgres, Redis, or such. These folders can be moved around as needed.
-
-## Migration Tool
-
-Migration is the process of importing and converting data from a different
-Fedora or LDP implementation into a new LAKEsuperior instance. This process
-uses the HTTP/LDP API of the original repository. A command-line utility is
-available as part of the `lsup-admin` suite to assist in such operation.
-
-A repository can be migrated with a one-line command such as:
-
-```
-./lsup-admin migrate http://source-repo.edu/rest /local/dest/folder
-```
-
-For more options, enter
-
-```
-./lsup-admin migrate --help
-```
-
-The script will crawl through the resources and crawl through outbound links
-within them. In order to do this, resources are added as raw triples (i.e.
-no consistency checks are made).
-
-**Note:** the consistency check tool has not yet been implemented at the moment
-but its release should follow shortly. This will ensure that all the links
-between resources are consistent in regard to referential integrity.
-
-This script will create a full dataset in the specified destination folder,
-complete with a default configuration that allows to start the LAKEsuperior
-server immediately after the migration is complete.
-
-Two approaches to migration are possible:
-
-1. By providing a starting point on the source repository. E.g. if the
-   repository you want to migrate is at `http://repo.edu/rest/prod` you can add
-   the `-s /prod` option to the script to avoid migrating irrelevant branches.
-   Note that the script will still reach outside of the starting point if
-   resources are referencing other resources outside of it.
-2. By providing a file containing a list of resources to migrate. This is
-   useful if a source repository cannot produce a full list (e.g. the root node
-   has more children than the server can handle) but a list of individual
-   resources is available via an external index (Solr, triplestore, etc.).
-   The resources can be indicated by their fully qualified URIs or paths
-   relative to the repository root. (*TODO latter option needs testing*)
-
-## Backup & Restore
-
-A back up of a LAKEshore repository consists in copying the RDF and non-RDF
-data folders. The location of these folders is indicated in the application
-configuration. The default commands provided by your OS (`cp`, `rsync`,
-`tar` etc. for Unix) are all is needed.

+ 0 - 63
doc/notes/model.md

@@ -1,63 +0,0 @@
-# LAKEsuperior Content Model Rationale
-
-## Internal and Public URIs; Identifiers
-
-Resource URIs are stored internally in LAKEsuperior as domain-agnostic URIs
-with the scheme `info:fcres<resource UID>`. This allows resources to be
-portable across systems. E.g. a resource with an internal URI of
-`info:fcres/a/b/c`, when accessed via the `http://localhost:8000/ldp`
-endpoint, will be found at `http://localhost:8000/ldp/a/b/c`.
-
-The resource UID making up the looks like a UNIX
-filesystem path, i.e. it always starts with a forward slash and can be made up
-of multiple segments separated by slashes. E.g. `/` is the root node UID,
-`/a` is a resource UID just below root. their internal URIs are `info:fcres/`
-and `info:fcres/a` respectively.
-
-In the Python API, the UID and internal URI of an LDP resource can be accessed
-via the `uid` and `uri` properties respectively:
-
-```
->>> import lakesuperior.env_setup
->>> from lakesuperior.api import resource
->>> rsrc = resource.get('/a/b/c')
->>> rsrc.uid
-/a/b/c
->>> rsrc.uri
-rdflib.terms.URIRef('info:fcres/a/b/c')
-```
-
-## Store Layout
-
-One of the key concepts in LAKEsuperior is the store layout. This is a
-module built with a
-specific purpose in mind, i.e. allowing fine-grained recording of provenance
-metadata while providing reasonable performance.
-
-Store layout modules could be replaceable (work needs to
-be done to develop an interface to allow that). The default (and only at the
-moment) layout shipped with LAKEsuperior is the
-[resource-centric layout](../../lakesuperior/store/ldp_rs/rsrc_centric_layout).
-This layout implements a so-called
-[graph-per-aspect pattern](http://patterns.dataincubator.org/book/graph-per-aspect.html)
-which stores different sets of statements about a resource in separate named
-graphs.
-
-The named graphs used for each resource are:
-
-- An admin graph (`info:fcsystem/graph/admin<resource UID>`) which stores
-  administrative metadata, mostly server-managed triples such as LDP types,
-  system create/update timestamps and agents, etc.
-- A structure graph (`info:fcsystem/graph/structure<resource UID>`) reserved for
-  containment triples. The reason
-  for this separation is purely convenience, since it makes it easy to retrieve
-  all the properties of a large container without its child references.
-- One (and, possibly, in the future, more user-defined) named graph for
-  user-provided data (`info:fcsystem/graph/userdata/_main<resource UID>`).
-
-Each of these graphs can be annotated with provenance metadata. The layout
-decides which triples go in which graph based on the predicate or RDF type
-contained in the triple. Adding logic to support arbitrary named graphs based
-e.g. on user agent, or to add more provenance information, should be relatively
-simple.
-

+ 0 - 112
doc/notes/performance.md

@@ -1,112 +0,0 @@
-# Performance Benchmark Report
-
-## Environment
-
-### Hardware
-
-#### ‘Rather Snappy’ Laptop
-
-- Dell Precison M3800 Laptop
-- 4x Intel(R) Core(TM) i7-4712HQ CPU @ 2.30GHz
-- 12Gb RAM
-- SSD
-
-#### ‘Ole Workhorse’ server
-
-8x Intel(R) Xeon(R) CPU X5550  @ 2.67GHz
-16Gb RAM
-Magnetic drive, XXX RPM
-
-### Software
-
-- Arch Linux OS
-- glibc 2.26-11
-- python 3.5.4
-- lmdb 0.9.21-1
-
-### Benchmark script
-
-[Generator script](../../util/benchmark.py)
-
-The script was run with default values: 10,000 children under the same parent,
-PUT requests.
-
-### Data Set
-
-Synthetic graph created by the benchmark script. The graph is unique for each
-request and consists of 200 triples which are partly random data, with a
-consistent size and variation:
-
-- 50 triples have an object that is a URI of an external resource (50 unique
-  predicates; 5 unique objects).
-- 50 triples have an object that is a URI of a repository-managed resource
-  (50 unique predicates; 5 unique objects).
-- 100 triples have an object that is a 64-character random Unicode string
-  (50 unique predicates; 100 unique objects).
-
-## Results
-
-### ‘Rather Snappy’ Laptop
-
-#### FCREPO/Modeshape 4.7.5
-
-15'45" running time
-
-0.094" per resource (100%—reference point)
-
-3.4M triples total in repo at the end of the process
-
-Retrieval of parent resource (~10000 triples), pipe to /dev/null: 3.64" (100%)
-
-Peak memory usage: 2.47Gb
-
-Database size: 3.3 Gb
-
-
-#### LAKEsuperior Alpha 6, LMDB Back End
-
-25' running time
-
-0.152" per resource (161%)
-
-*Some gaps every ~40-50 requests, probably disk flush*
-
-Retrieval of parent resource (10K triples), pipe to /dev/null: 2.13" (58%)
-
-Peak memory usage: ~650 Mb (3 idle workers, 1 active)
-
-Database size: 523 Mb (16%)
-
-### ‘Ole Workhorse’ server
-
-#### FCREPO
-
-0:47:38 running time
-
-0.285" per resource (100%)
-
-Retrieval of parent resource: 9.6" (100%)
-
-#### LAKEsuperior
-
-1:14:19 running time
-
-0.446" per resource (156%)
-
-Retrieval of parent resource: 5.58" (58%)
-
-## Conclusions
-
-LAKEsuperior appears to be markedly slower on writes and markedly faster on
-reads. Both these factors are very likely related to the underlying LMDB store
-which is optimized for read performance.
-
-Comparison of results between the laptop and the server demonstrates that both
-read and write performance gaps
-are identical in the two environments. Disk speed severely affects the numbers.
-
-**Note:** As you can guess, these are only very partial and specific results. They
-should not be taken as a thorough performance assessment. Such an assessment
-may be impossible and pointless to make given the very different nature of
-the storage models, which may behave radically differently depending on many
-variables.

+ 0 - 82
doc/notes/storage.md

@@ -1,82 +0,0 @@
-# Storage Implementation
-
-LAKEsuperior stores non-RDF ("binary") data in the filesystem and RDF data in
-an embedded key-value store, [LMDB](https://symas.com/lmdb/).
-
-## RDF Storage design
-
-LMDB is a very fast, very lightweight C library. It is inspired by BerkeleyDB
-but introduces significant improvements in terms of efficiency and stability.
-
-The LAKEsuperior RDF store consists of two files: the main data store and the
-indices (plus two lock files that are generated at runtime). A good amount of
-effort has been put to develop an indexing strategy that is balanced between
-write performance, read performance, and data size, with no compromise made on
-consistency.
-
-The main data
-store is the one containing the preservation-worthy data. While the indices are
-necessary for LAKEsuperior to function, they can be entirely rebuilt from the
-main data store in case of file corruption (recovery tools are on the TODO
-list).
-
-Detailed notes about the various strategies researched can be found
-[here](indexing_strategy.md).
-
-## Scalability
-
-Since LAKEsuperior is focused on design simplicity, efficiency and reliability,
-its RDF store is embedded and not horizontally scalable. However, LAKEsuperior
-is quite frugal with disk space. About 55 million triples can be
-stored in 8Gb of space (mileage can vary depending on how heterogeneous the
-triples are). This makes it easier to use expensive SSD drives for
-the RDF store, in order to improve performance. A single LMDB environment can
-reportedly scale up to 128 terabytes.
-
-## Maintenance
-
-LMDB has a very simple configuration, and all options are hardcoded
-in LAKESuperior in order to exploit its features. A database automatically
-recovers from a crash.
-
-The LAKEsuperior RDF store abstraction maintains a registry of unique terms.
-These terms are not deleted if a triple is deleted, even if no triple is using
-them, because it would be too expesive to look up for orphaned terms during a
-delete request. While these terms are relatively lightweight, it would be good
-to run a periodical clean-up job. Tools will be developed in the near future to
-facilitate this maintenance task.
-
-## Consistency
-
-LAKEsuperior wraps each LDP operation in a transaction. The indices are updated
-synchronously within the same transaction in order to guarantee
-consistency. If a system loses power or crashes, only the last transaction is
-lost, and the last successful write will include primary and index data.
-
-## Concurrency
-
-LMDB employs
-[MVCC](https://en.wikipedia.org/wiki/Multiversion_concurrency_control)
-to achieve fully ACID transactions. This implies that during
-a write, the whole database is locked. Multiple writes can be initiated
-concurrently, but the performance gain of doing so may be little because
-only one write operation can be performed at a time. Reasonable efforts have
-been put to make write transactions as short as possible (and more can be
-done). Also, this excludes a priori the option to implement long-running atomic
-operations, unless one is willing to block writes on the application for an
-indefinite length of time. On the other hand, write operations never block and
-are never blocked, so an application with a high read-to-write ratio may still
-benefit from multi-threaded requests.
-
-## Performance
-
-The [Performance Benchmark Report](performance.md) contains benchmark results.
-
-Write performance is lower than Modeshape/Fedora4; this may be mostly due to
-the fact that indices are written synchronously in a blocking transaction;
-also, the LMDB B+Tree structure is optimized for read performance rather than
-write performance. Some optimizations on the application layer could be made.
-
-Reads are faster than Modeshape/Fedora.
-
-All tests so far have been performed in a single thread.

+ 3 - 2
docker/docker_entrypoint

@@ -2,8 +2,9 @@
 
 
 mkdir -p /data/log /data/run /data/bootstrap
 mkdir -p /data/log /data/run /data/bootstrap
 cp ./data/bootstrap/* /data/bootstrap
 cp ./data/bootstrap/* /data/bootstrap
+pip install -e .
 coilmq &
 coilmq &
 if [ ! -d /data/ldpnr_store ] && [ ! -d /data/ldprs_store ]; then
 if [ ! -d /data/ldpnr_store ] && [ ! -d /data/ldprs_store ]; then
-  echo yes | ./lsup-admin bootstrap
+  echo yes | lsup-admin bootstrap
 fi
 fi
-exec ./fcrepo
+exec fcrepo

+ 0 - 31
docker/etc/gunicorn.py

@@ -1,31 +0,0 @@
-# See: http://docs.gunicorn.org/en/stable/settings.html
-
-# Directory where to store logs, PIDfile, etc.
-_data_dir = '/data/'
-
-# Set app_mode to either 'prod', 'test' or 'dev'.
-# 'prod' is normal running mode. 'test' is used for running test suites.
-# 'dev' is similar to normal mode but with reload and debug enabled.
-_app_mode = 'dev'
-
-
-bind = "0.0.0.0:8000"
-workers = 4
-worker_class = 'gevent'
-max_requests = 512
-
-#user = "user"
-#group = "group"
-
-raw_env = 'APP_MODE={}'.format(_app_mode)
-
-# Set this to the directory containing logs, etc.
-# The path must end with a slash.
-#chdir = "/usr/local/lakesuperior/"
-
-daemon = _app_mode=='prod'
-pidfile = _data_dir + "run/fcrepo.pid"
-reload = _app_mode=='dev'
-
-accesslog = _data_dir + "log/gunicorn-access.log"
-errorlog = _data_dir + "log/gunicorn-error.log"

+ 24 - 0
docker/etc/gunicorn.yml

@@ -0,0 +1,24 @@
+# Set up main GUnicorn options.
+# See: http://docs.gunicorn.org/en/stable/settings.html
+
+# Commented values are the application defaults.
+
+# Directory where the WSGI server data are stored.
+data_dir: 'data'
+
+# Set app_mode to either 'prod', 'test' or 'dev'.
+# 'prod' is normal running mode. 'test' is used for running test suites.
+# 'dev' is similar to normal mode but with reload and debug enabled.
+app_mode: 'dev'
+
+#listen_addr: '0.0.0.0'
+#listen_port: 8000
+#workers: 4
+#worker_class: 'gevent'
+#max_requests: 512
+
+#user: ''
+#group: ''
+
+#preload_app: True
+

+ 23 - 0
docs/Makefile

@@ -0,0 +1,23 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+# Workaround to prevent lmdb from throwing an exception when loaded from Sphinx
+# Does not work with readthedocs unfortunately.
+#SPHINXBUILD   = python -m sphinx
+SPHINXBUILD   = sphinx-build
+SPHINXPROJ    = lakesuperior
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

+ 86 - 0
docs/about.rst

@@ -0,0 +1,86 @@
+About LAKEsuperior
+==================
+
+LAKEsuperior is an alternative `Fedora
+Repository <http://fedorarepository.org>`__ implementation.
+
+Fedora is a mature repository software system historically adopted by
+major cultural heritage institutions. It exposes an
+`LDP <https://www.w3.org/TR/ldp-primer/>`__ endpoint to manage
+any type of binary files and their metadata in Linked Data format.
+
+Guiding Principles
+------------------
+
+LAKEsuperior aims at being an uncomplicated, efficient Fedora 4
+implementation.
+
+Its main goals are:
+
+-  **Reliability:** Based on solid technologies with stability in mind.
+-  **Efficiency:** Small memory and CPU footprint, high scalability.
+-  **Ease of management:** Tools to perform monitoring and maintenance
+   included.
+-  **Simplicity of design:** Straight-forward architecture, robustness
+   over features.
+
+Key features
+------------
+
+-  Drop-in replacement for Fedora4 (with some
+   :doc:`caveats <fcrepo4_deltas>`); currently being tested
+   with Hyrax 2
+-  Very stable persistence layer based on
+   `LMDB <https://symas.com/lmdb/>`__ and filesystem. Fully
+   ACID-compliant writes guarantee consistency of data.
+-  Term-based search (*planned*) and SPARQL Query API + UI
+-  No performance penalty for storing many resources under the same
+   container; no
+   `kudzu <https://www.nature.org/ourinitiatives/urgentissues/land-conservation/forests/kudzu.xml>`__
+   pairtree segmentation [#]_ 
+-  Extensible :doc:`provenance metadata <model>` tracking
+-  :doc:`Multi-modal access <architecture>`: HTTP
+   (REST), command line interface and native Python API.
+-  Fits in a pocket: you can carry 50M triples in an 8Gb memory stick.
+
+Implementation of the official `Fedora API
+specs <https://fedora.info/spec/>`__ (Fedora 5.x and beyond) is not
+foreseen in the short term, however it would be a natural evolution of
+this project if it gains support.
+
+Please make sure you read the :doc:`Delta
+document <fcrepo4_deltas>` for divergences with the
+official Fedora4 implementation.
+
+Target Audience
+---------------
+
+LAKEsuperior is for anybody who cares about preserving data in the long
+term.
+
+Less vaguely, LAKEsuperior is targeted at who needs to store large
+quantities of highly linked metadata and documents.
+
+Its Python/C environment and API make it particularly well suited for
+academic and scientific environments who would be able to embed it in a
+Python application as a library or extend it via plug-ins.
+
+LAKEsuperior is able to be exposed to the Web as a `Linked Data
+Platform <https://www.w3.org/TR/ldp-primer/>`__ server. It also acts as
+a SPARQL query (read-only) endpoint, however it is not meant to be used
+as a full-fledged triplestore at the moment.
+
+In its current status, LAKEsuperior is aimed at developers and hands-on
+managers who are interested in evaluating this project.
+
+Status and development
+----------------------
+
+LAKEsuperior is in **alpha** status. Please see the `project
+issues <https://github.com/scossu/lakesuperior/issues>`__ list for a
+rudimentary road map.
+
+--------------
+
+.. [#] However if your client splits pairtrees upstream, such as Hyrax does,
+   that obviously needs to change to get rid of the path segments.

+ 25 - 0
docs/api.rst

@@ -0,0 +1,25 @@
+==================
+API Documentation
+==================
+
+Main Interface
+==============
+
+The LAKEsuperior API modules of most interest for a client are:
+
+- :mod:`Resource API <lakesuperior.api.resource>`
+- :mod:`Query API <lakesupeiror.api.query>`
+- :mod:`Admin API <lakesuperior.api.admin>`
+
+When manipulating resources the
+:mod:`Resource module <lakesuperior.model.ldpr>` is used.
+
+The full API docs are listed below.
+
+Full API Documentation
+======================
+
+.. toctree::
+   :caption: Modules
+
+   apidoc/modules

+ 38 - 0
docs/apidoc/lakesuperior.api.rst

@@ -0,0 +1,38 @@
+lakesuperior\.api package
+=========================
+
+Submodules
+----------
+
+lakesuperior\.api\.admin module
+-------------------------------
+
+.. automodule:: lakesuperior.api.admin
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.api\.query module
+-------------------------------
+
+.. automodule:: lakesuperior.api.query
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.api\.resource module
+----------------------------------
+
+.. automodule:: lakesuperior.api.resource
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: lakesuperior.api
+    :members:
+    :undoc-members:
+    :show-inheritance:

+ 46 - 0
docs/apidoc/lakesuperior.endpoints.rst

@@ -0,0 +1,46 @@
+lakesuperior\.endpoints package
+===============================
+
+Submodules
+----------
+
+lakesuperior\.endpoints\.admin module
+-------------------------------------
+
+.. automodule:: lakesuperior.endpoints.admin
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.endpoints\.ldp module
+-----------------------------------
+
+.. automodule:: lakesuperior.endpoints.ldp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.endpoints\.main module
+------------------------------------
+
+.. automodule:: lakesuperior.endpoints.main
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.endpoints\.query module
+-------------------------------------
+
+.. automodule:: lakesuperior.endpoints.query
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: lakesuperior.endpoints
+    :members:
+    :undoc-members:
+    :show-inheritance:

+ 38 - 0
docs/apidoc/lakesuperior.messaging.rst

@@ -0,0 +1,38 @@
+lakesuperior\.messaging package
+===============================
+
+Submodules
+----------
+
+lakesuperior\.messaging\.formatters module
+------------------------------------------
+
+.. automodule:: lakesuperior.messaging.formatters
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.messaging\.handlers module
+----------------------------------------
+
+.. automodule:: lakesuperior.messaging.handlers
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.messaging\.messenger module
+-----------------------------------------
+
+.. automodule:: lakesuperior.messaging.messenger
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: lakesuperior.messaging
+    :members:
+    :undoc-members:
+    :show-inheritance:

+ 46 - 0
docs/apidoc/lakesuperior.model.rst

@@ -0,0 +1,46 @@
+lakesuperior\.model package
+===========================
+
+Submodules
+----------
+
+lakesuperior\.model\.ldp\_factory module
+----------------------------------------
+
+.. automodule:: lakesuperior.model.ldp_factory
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.model\.ldp\_nr module
+-----------------------------------
+
+.. automodule:: lakesuperior.model.ldp_nr
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.model\.ldp\_rs module
+-----------------------------------
+
+.. automodule:: lakesuperior.model.ldp_rs
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.model\.ldpr module
+--------------------------------
+
+.. automodule:: lakesuperior.model.ldpr
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: lakesuperior.model
+    :members:
+    :undoc-members:
+    :show-inheritance:

+ 89 - 0
docs/apidoc/lakesuperior.rst

@@ -0,0 +1,89 @@
+lakesuperior package
+====================
+
+Subpackages
+-----------
+
+.. toctree::
+
+    lakesuperior.api
+    lakesuperior.endpoints
+    lakesuperior.messaging
+    lakesuperior.model
+    lakesuperior.store
+
+Submodules
+----------
+
+lakesuperior\.app module
+------------------------
+
+.. automodule:: lakesuperior.app
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.config\_parser module
+-----------------------------------
+
+.. automodule:: lakesuperior.config_parser
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.env module
+------------------------
+
+.. automodule:: lakesuperior.env
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.env\_setup module
+-------------------------------
+
+.. automodule:: lakesuperior.env_setup
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.exceptions module
+-------------------------------
+
+.. automodule:: lakesuperior.exceptions
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.globals module
+----------------------------
+
+.. automodule:: lakesuperior.globals
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.migrator module
+-----------------------------
+
+.. automodule:: lakesuperior.migrator
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.toolbox module
+----------------------------
+
+.. automodule:: lakesuperior.toolbox
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: lakesuperior
+    :members:
+    :undoc-members:
+    :show-inheritance:

+ 30 - 0
docs/apidoc/lakesuperior.store.ldp_nr.rst

@@ -0,0 +1,30 @@
+lakesuperior\.store\.ldp\_nr package
+====================================
+
+Submodules
+----------
+
+lakesuperior\.store\.ldp\_nr\.base\_non\_rdf\_layout module
+-----------------------------------------------------------
+
+.. automodule:: lakesuperior.store.ldp_nr.base_non_rdf_layout
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.store\.ldp\_nr\.default\_layout module
+----------------------------------------------------
+
+.. automodule:: lakesuperior.store.ldp_nr.default_layout
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: lakesuperior.store.ldp_nr
+    :members:
+    :undoc-members:
+    :show-inheritance:

+ 30 - 0
docs/apidoc/lakesuperior.store.ldp_rs.rst

@@ -0,0 +1,30 @@
+lakesuperior\.store\.ldp\_rs package
+====================================
+
+Submodules
+----------
+
+lakesuperior\.store\.ldp\_rs\.lmdb\_store module
+------------------------------------------------
+
+.. automodule:: lakesuperior.store.ldp_rs.lmdb_store
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+lakesuperior\.store\.ldp\_rs\.rsrc\_centric\_layout module
+----------------------------------------------------------
+
+.. automodule:: lakesuperior.store.ldp_rs.rsrc_centric_layout
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: lakesuperior.store.ldp_rs
+    :members:
+    :undoc-members:
+    :show-inheritance:

+ 18 - 0
docs/apidoc/lakesuperior.store.rst

@@ -0,0 +1,18 @@
+lakesuperior\.store package
+===========================
+
+Subpackages
+-----------
+
+.. toctree::
+
+    lakesuperior.store.ldp_nr
+    lakesuperior.store.ldp_rs
+
+Module contents
+---------------
+
+.. automodule:: lakesuperior.store
+    :members:
+    :undoc-members:
+    :show-inheritance:

+ 7 - 0
docs/apidoc/modules.rst

@@ -0,0 +1,7 @@
+lakesuperior
+============
+
+.. toctree::
+   :maxdepth: 4
+
+   lakesuperior

+ 48 - 0
docs/architecture.rst

@@ -0,0 +1,48 @@
+LAKEsuperior Architecture
+=========================
+
+LAKEsuperior is written in Python. It is not excluded that parts of the
+code may be rewritten in `Cython <http://cython.readthedocs.io/>`__ for
+performance.
+
+Multi-Modal Access
+------------------
+
+LAKEsuperior services and data are accessible in multiple ways:
+
+-  Via HTTP. This is the canonical way to interact with LDP resources
+   and conforms quite closely to the Fedora specs (currently v4).
+-  Via command line. This method includes long-running admin tasks which
+   are not available via HTTP.
+-  Via a Python API. This method allows to use Python scripts to access
+   the same methods available to the two methods above in a programmatic
+   way. It is possible to write Python plugins or even to embed
+   LAKEsuperior in a Python application, even without running a web
+   server.
+
+Architecture Overview
+---------------------
+
+.. figure:: assets/lakesuperior_arch.png
+   :alt: LAKEsuperior Architecture
+
+   LAKEsuperior Architecture
+
+The LAKEsuperior REST API provides access to the underlying Python API.
+All REST and CLI operations can be replicated by a Python program
+accessing this API.
+
+The main advantage of the Python API is that it makes it very easy to
+maipulate graph and binary data without the need to serialize or
+deserialize native data structures. This matters when handling large ETL
+jobs for example.
+
+The Python API is divided in three main areas:
+
+-  Resource API: this API in charge of all the resource CRUD operations and
+   implements the majority of the Fedora specs.
+-  Admin API: exposes utility methods, mostly long-running maintenance jobs.
+-  Query API: provides several facilities for querying repository data.
+
+
+See :doc:`API documentation<api>` for more details.

+ 0 - 0
doc/assets/lakesuperior_arch.png → docs/assets/lakesuperior_arch.png


+ 0 - 0
doc/assets/lakesuperior_recommendations.pdf → docs/assets/lakesuperior_recommendations.pdf


BIN
docs/assets/profile_1K_children_get.pdf


+ 33 - 0
docs/cli.rst

@@ -0,0 +1,33 @@
+LAKEsuperior Command Line Reference
+===================================
+
+The LAKEsuperior command line tool is used for maintenance and
+administration purposes.
+
+The script is invoked from the main install directory. The tool is
+self-documented, so this is just a redundant overview:
+
+::
+
+    $ ./lsup_admin
+    Usage: lsup-admin [OPTIONS] COMMAND [ARGS]...
+
+    Options:
+      --help  Show this message and exit.
+
+      bootstrap     Bootstrap binary and graph stores.
+      check_fixity  [STUB] Check fixity of a resource.
+      check_refint  [STUB] Check referential integrity.
+      cleanup       [STUB] Clean up orphan database items.
+      copy          [STUB] Copy (backup) repository data.
+      dump          [STUB] Dump repository to disk.
+      load          [STUB] Load serialized repository data.
+      stats         Print repository statistics.
+
+All entries marked ``[STUB]`` are not yet implemented, however the
+``lsup_admin <command> --help`` command will issue a description of what
+the command is meant to do. Please see the `TODO <TODO>`__ document for
+a rough road map.
+
+All of the above commands are also available via, and based upon, the
+native Python API.

+ 191 - 0
docs/conf.py

@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# lakesuperior documentation build configuration file, created by
+# sphinx-quickstart on Sat Mar 24 23:05:46 2018.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+
+from unittest.mock import MagicMock
+
+#sys.path.insert(0, os.path.abspath('../'))
+sys.path.append(os.path.abspath('../'))
+
+class MockModule(MagicMock):
+    @classmethod
+    def __getattr__(cls, name):
+        return MagicMock()
+
+# LMDB raises an issue if imported by Sphinx. This bypasses the issue.
+# https://github.com/dw/py-lmdb/issues/172
+MOCK_MODULES = ['lmdb']
+sys.modules.update((mod_name, MockModule()) for mod_name in MOCK_MODULES)
+
+import lakesuperior.env_setup
+
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.todo',
+    'sphinx.ext.coverage',
+    'sphinx.ext.imgmath',
+    'sphinx.ext.viewcode']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = 'lakesuperior'
+copyright = '2018, Everybody & Nobody'
+author = 'Stefano Cossu'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '1.0-alpha'
+# The full version, including alpha/beta/rc tags.
+release = '1.0.0-alpha.8'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = True
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# This is required for the alabaster theme
+# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
+html_sidebars = {
+    '**': [
+        'relations.html',  # needs 'show_related': True theme option to display
+        'searchbox.html',
+    ]
+}
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'lakesuperiordoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'lakesuperior.tex', 'lakesuperior Documentation',
+     'Stefano Cossu', 'manual'),
+]
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'lakesuperior', 'lakesuperior Documentation',
+     [author], 1)
+]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'lakesuperior', 'lakesuperior Documentation',
+     author, 'lakesuperior', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {'https://docs.python.org/': None}
+

+ 40 - 0
docs/contributing.rst

@@ -0,0 +1,40 @@
+Contributing to LAKEsuperior
+============================
+
+LAKEsuperior has been so far a single person’s off-hours project (with much
+very valuable input from several sides). In order to turn into anything close
+to a Beta release and eventually to a production-ready implementation, it
+needs some community love.
+
+Contributions are welcome in all forms, including ideas, issue reports,
+or even just spinning up the software and providing some feedback.
+LAKEsuperior is meant to live as a community project.
+
+Development Setup
+-----------------
+
+To set up the software for developing code, documentation, or tests, follow the
+:ref:`manual installation <manual_install>` instructions.
+The additional packages indicated as optional need to be installed.
+
+Contribution Guidelines
+-----------------------
+
+You can contribute by (from least to most involved):
+
+- Installing the repository and reporting any issues
+- Testing on other platforms (OS X, Windows, other Linux distros)
+- Loading some real-world data set and sharing interesting results
+- Amending incorrect documentation or adding missing one
+- Adding test coverage (**HOT**)
+- Browsing the list of open issues and picking a ticket that you may find
+  interesting and within your reach
+- Suggesting new functionality or improvements and/or implementing them
+
+Please open a ticket and discuss the issue you are raising before opening a PR.
+
+Development is done on the development branch. If you have any suggested
+addition to the code, please fork the repo, create a new branch for your topic
+and open a pull request against development. In case you find a critical bug,
+a hotfix can be proposed against master if agreed in the related issue
+discussion.

+ 0 - 0
doc/examples/store_layouts/graph_per_aspect.trig → docs/examples/store_layouts/graph_per_aspect.trig


+ 0 - 0
doc/examples/store_layouts/graph_per_resource+.trig → docs/examples/store_layouts/graph_per_resource+.trig


+ 0 - 0
doc/examples/store_layouts/graph_per_resource.trig → docs/examples/store_layouts/graph_per_resource.trig


+ 244 - 0
docs/fcrepo4_deltas.rst

@@ -0,0 +1,244 @@
+Divergencies between lakesuperior and FCREPO4
+=============================================
+
+This is a (vastly incomplete) list of discrepancies between the current
+FCREPO4 implementation and LAKEsuperior. More will be added as more
+clients will use it.
+
+Not yet implemented (but in the plans)
+--------------------------------------
+
+-  Various headers handling
+-  Versioning (incomplete)
+-  AuthN/Z
+-  Fixity check
+-  Blank nodes
+
+Potentially breaking changes
+----------------------------
+
+The following divergences may lead into incompatibilities with some
+clients.
+
+Atomicity
+~~~~~~~~~
+
+FCREPO4 supports batch atomic operations whereas a transaction can be
+opened and a number of operations (i.e. multiple R/W requests to the
+repository) can be performed. The operations are persisted in the
+repository only if and when the transaction is committed.
+
+LAKesuperior only supports atomicity for a single HTTP request. I.e. a
+single HTTTP request that should result in multiple write operations to
+the storage layer is only persisted if no exception is thrown.
+Otherwise, the operation is rolled back in order to prevent resources to
+be left in an inconsistent state.
+
+Tombstone methods
+~~~~~~~~~~~~~~~~~
+
+If a client requests a tombstone resource in FCREPO4 with a method other
+than DELETE, the server will return ``405 Method Not Allowed``
+regardless of whether the tombstone exists or not.
+
+LAKEsuperior will return ``405`` only if the tombstone actually exists,
+``404`` otherwise.
+
+Web UI
+~~~~~~
+
+FCREPO4 includes a web UI for simple CRUD operations.
+
+Such a UI is not in the immediate LAKEsuperior development plans.
+However, a basic UI is available for read-only interaction: LDP resource
+browsing, SPARQL query and other search facilities, and administrative
+tools. Some of the latter *may* involve write operations, such as
+clean-up tasks.
+
+Automatic path segment generation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A ``POST`` request without a slug in FCREPO4 results in a pairtree
+consisting of several intermediate nodes leading to the automatically
+minted identifier. E.g.
+
+::
+
+    POST /rest
+
+results in ``/rest/8c/9a/07/4e/8c9a074e-dda3-5256-ea30-eec2dd4fcf61``
+being created.
+
+The same request in LAKEsuperior would create
+``/rest/8c9a074e-dda3-5256-ea30-eec2dd4fcf61`` (obviously the
+identifiers will be different).
+
+This seems to break Hyrax at some point, but might have been fixed. This
+needs to be verified further.
+
+Non-standard client breaking changes
+------------------------------------
+
+The following changes may be incompatible with clients relying on some
+FCREPO4 behavior not endorsed by LDP or other specifications.
+
+Pairtrees
+~~~~~~~~~
+
+FCREPO4 generates “pairtree” resources if a resource is created in a
+path whose segments are missing. E.g. when creating ``/a/b/c/d``, if
+``/a/b`` and ``/a/b/c`` do not exist, FCREPO4 will create two Pairtree
+resources. POSTing and PUTting into Pairtrees is not allowed. Also, a
+containment triple is established between the closest LDPC and the
+created resource, e.g. if ``a`` exists, a
+``</a> ldp:contains </a/b/c/d>`` triple is created.
+
+LAKEsuperior does not employ Pairtrees. In the example above
+LAKEsuperior would create a fully qualified LDPC for each missing
+segment, which can be POSTed and PUT to. Containment triples are created
+between each link in the path, i.e. ``</a> ldp:contains </a/b>``,
+``</a/b> ldp:contains </a/b/c>`` etc. This may potentially break clients
+relying on the direct containment model.
+
+The rationale behind this change is that Pairtrees are the byproduct of
+a limitation imposed by Modeshape and introduce complexity in the
+software stack and confusion for the client. LAKEsuperior aligns with
+the more intuitive UNIX filesystem model, where each segment of a path
+is a “folder” or container (except for the leaf nodes that can be either
+folders or files). In any case, clients are discouraged from generating
+deep paths in LAKEsuperior without a specific purpose because these
+resources create unnecessary data.
+
+Non-mandatory, non-authoritative slug in version POST
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+FCREPO4 requires a ``Slug`` header to POST to ``fcr:versions`` to create
+a new version.
+
+LAKEsuperior adheres to the more general FCREPO POST rule and if no slug
+is provided, an automatic ID is generated instead. The ID is a UUID4.
+
+Note that internally this ID is not called “label” but “uid” since it is
+treated as a fully qualified identifier. The ``fcrepo:hasVersionLabel``
+predicate, however ambiguous in this context, will be kept until the
+adoption of Memento, which will change the retrieval mechanisms.
+
+Another notable difference is that if a POST is issued on the same resource
+``fcr:versions`` location using a version ID that already exists, LAKEsuperior
+will just mint a random identifier rather than returning an error.
+
+Deprecation track
+-----------------
+
+LAKEsuperior offers some “legacy” options to replicate the FCREPO4
+behavior, however encourages new development to use a different approach
+for some types of interaction.
+
+Endpoints
+~~~~~~~~~
+
+The FCREPO root endpoint is ``/rest``. The LAKEsuperior root endpoint is
+``/ldp``.
+
+This should not pose a problem if a client does not have ``rest``
+hard-coded in its code, but in any event, the ``/rest`` endpoint is
+provided for backwards compatibility.
+
+Future implementations of the Fedora API specs may employ a "versioned"
+endpoint scheme that allows multiple Fedora API versions to be available to the
+client, e.g. ``/ldp/fc4`` for the current LDP API version, ``/ldp/fc5`` for
+Fedora version 5.x, etc.
+
+Automatic LDP class assignment
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since LAKEsuperior rejects client-provided server-managed triples, and
+since the LDP types are among them, the LDP container type is inferred
+from the provided properties: if the ``ldp:hasMemberRelation`` and
+``ldp:membershipResource`` properties are provided, the resource is a
+Direct Container. If in addition to these the
+``ldp:insertedContentRelation`` property is present, the resource is an
+Indirect Container. If any of the first two are missing, the resource is
+a Container.
+
+Clients are encouraged to omit LDP types in PUT, POST and PATCH
+requests.
+
+Lenient handling
+~~~~~~~~~~~~~~~~
+
+FCREPO4 requires server-managed triples to be expressly indicated in a
+PUT request, unless the ``Prefer`` header is set to
+``handling=lenient; received="minimal"``, in which case the RDF payload
+must not have any server-managed triples.
+
+LAKEsuperior works under the assumption that client should never provide
+server-managed triples. It automatically handles PUT requests sent to
+existing resources by returning a 412 if any server managed triples are
+included in the payload. This is the same as setting ``Prefer`` to
+``handling=strict``, which is the default.
+
+If ``Prefer`` is set to ``handling=lenient``, all server-managed triples
+sent with the payload are ignored.
+
+Clients using the ``Prefer`` header to control PUT behavior as
+advertised by the specs should not notice any difference.
+
+Optional improvements
+---------------------
+
+The following are improvements in performance or usability that can only
+be taken advantage of if client code is adjusted.
+
+LDP-NR content and metadata
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+FCREPO4 relies on the ``/fcr:metadata`` identifier to retrieve RDF
+metadata about an LDP-NR. LAKEsuperior supports this as a legacy option,
+but encourages the use of content negotiation to do the same while
+offering explicit endpoints for RDF and non-RDF content retrieval.
+
+Any request to an LDP-NR with an ``Accept`` header set to one of the
+supported RDF serialization formats will yield the RDF metadata of the
+resource instead of the binary contents.
+
+The ``fcr:metadata`` URI returns the RDF metadata of a LDP-NR.
+
+The ``fcr:content`` URI returns the non-RDF content.
+
+The two optionsabove return an HTTP error if requested for a LDP-RS.
+
+“Include” and “Omit” options for children
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+LAKEsuperior offers an additional ``Prefer`` header option to exclude
+all references to child resources (i.e. by removing all the
+``ldp:contains`` triples) while leaving the other server-managed triples
+when retrieving a resource:
+
+::
+
+    Prefer: return=representation; [include | omit]="http://fedora.info/definitions/v4/repository#Children"
+
+The default behavior is to include all children URIs.
+
+Soft-delete and purge
+~~~~~~~~~~~~~~~~~~~~~
+
+**NOTE**: The implementation of this section is incomplete and debated.
+
+In FCREPO4 a deleted resource leaves a tombstone deleting all traces of
+the previous resource.
+
+In LAKEsuperior, a normal DELETE creates a new version snapshot of the
+resource and puts a tombstone in its place. The resource versions are
+still available in the ``fcr:versions`` location. The resource can be
+“resurrected” by issuing a POST to its tombstone. This will result in a
+``201``.
+
+If a tombstone is deleted, the resource and its versions are completely
+deleted (purged).
+
+Moreover, setting the ``Prefer:no-tombstone`` header option on DELETE
+allows to delete a resource and its versions directly without leaving a
+tombstone.

+ 9 - 0
docs/help.rst

@@ -0,0 +1,9 @@
+Getting Help
+============
+
+Discussion is on the `lakesuperior
+<https://groups.google.com/forum/#!forum/lakesuperior>`__ Google group.
+
+You can report bugs or feature requests on the `Github issues page
+<https://github.com/scossu/lakesuperior/issues>`__. Please start a conversation
+in the Google group before filing an issue, especially for feature requests.

+ 52 - 0
docs/index.rst

@@ -0,0 +1,52 @@
+LAKEsuperior
+============
+
+|build status|
+
+LAKEsuperior is an alternative `Fedora
+Repository <http://fedorarepository.org>`__ implementation.
+
+Fedora is a mature repository software system historically adopted by
+major cultural heritage institutions. It exposes an
+`LDP <https://www.w3.org/TR/ldp-primer/>`__ endpoint to manage
+any type of binary files and their metadata in Linked Data format.
+
+.. |build status| image:: http://img.shields.io/travis/scossu/lakesuperior/master.svg?style=flat
+   :target: https://travis-ci.org/username/repo
+
+Indices and tables
+------------------
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Getting Started
+
+    About LAKEsuperior <about>
+    Installation and Configuration <setup>
+    Sample Usage <usage>
+    Getting Help <help>
+
+.. toctree::
+   :maxdepth: 3
+   :caption: User Reference
+
+    Divergences from Fedora 4 <fcrepo4_deltas>
+    Messaging <messaging>
+    Migration Guide <migration>
+    Command Line Reference <cli>
+    Contributing <contributing>
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Technical Documents
+
+    API Documentation <api>
+    Architecture <architecture>
+    Performance Benchmarks <performance>
+    Content Model <model>
+    Storage Implementation <storage>
+    Indexing Strategy <indexing_strategy>
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`

+ 311 - 0
docs/indexing_strategy.rst

@@ -0,0 +1,311 @@
+LMDB Store design for RDFLib
+============================
+
+This is a log of subsequent strategies employed to store triples in
+LMDB.
+
+Strategy #5a is the one currently used. The rest is kept for historic
+reasons and academic curiosity (and also because it was too much work to
+just wipe out of memory).
+
+Storage approach
+----------------
+
+-  Pickle quad and create MD5 or SHA1 hash.
+-  Store triples in one database paired with key; store indices
+   separately.
+
+Different strategies involve layout and number of databases.
+
+Strategy #1
+-----------
+
+-  kq: key: serialized triple (1:1)
+-  sk: Serialized subject: key (1:m)
+-  pk: Serialized predicate: key (1:m)
+-  ok: Serialized object: key (1:m)
+-  (optional) lok: Serialized literal object: key (1:m)
+-  (optional) tok: Serialized RDF type: key (1:m)
+-  ck: Serialized context: key (1:m)
+
+Retrieval approach
+~~~~~~~~~~~~~~~~~~
+
+To find all matches for a quad:
+
+-  If all terms in the quad are bound, generate the key from the pickled
+   quad and look up the triple in ``kt``
+-  If all terms are unbound, return an iterator of all values in ``kt``.
+-  If some values are bound and some unbound (most common query):
+
+   -  Get a base list of keys associated wirh the first bound term
+   -  For each subsequent bound term, check if each key associated with
+      the term matches a key in the base list
+   -  Continue through all the bound terms. If a match is not found at
+      any point, continue to the next term
+   -  If a match is found in all the bound term databases, look up the
+      pickled quad matching the key in ``kq`` and yield it
+
+More optimization can be introduced later, e.g. separating literal and
+RDF type objects in separate databases. Literals can have very long
+values and a database with a longer key setting may be useful. RDF terms
+can be indexed separately because they are the most common bound term.
+
+Example lookup
+~~~~~~~~~~~~~~
+
+Keys and Triples (should actually be quads but this is a simplified
+version):
+
+A: s1 p1 o1 B: s1 p2 o2 C: s2 p3 o1 D: s2 p3 o3
+
+Indices:
+
+-  SK:
+
+   -  s1: A, B
+   -  s2: C, D
+
+-  PK:
+
+   -  p1: A
+   -  p2: B
+   -  p3: C, D
+
+-  OK:
+-  o1: A, C
+-  o2: B
+-  o3: D
+
+Queries:
+
+-  s1 ?p ?o → {A, B}
+-  s1 p2 ?o → {A, B} & {B} = {B}
+-  ?s ?p o3 → {D}
+-  s1 p2 o5 → {} (Exit at OK: no term matches ‘o5’)
+-  s2 p3 o2 → {C, D} & {C, D} & {B} = {}
+
+Strategy #2
+-----------
+
+Separate data and indices in two environments.
+
+Main data store
+~~~~~~~~~~~~~~~
+
+Key to quad; main keyspace; all unique.
+
+Indices
+~~~~~~~
+
+None of these databases is of critical preservation concern. They can be
+rebuilt from the main data store.
+
+All dupsort and dupfixed.
+
+@TODO The first three may not be needed if computing term hash is fast
+enough.
+
+-  t2k (term to term key)
+-  lt2k (literal to term key: longer keys)
+-  k2t (term key to term)
+
+-  s2k (subject key to quad key)
+-  p2k (pred key to quad key)
+-  o2k (object key to quad key)
+-  c2k (context key to quad key)
+
+-  sc2qk (subject + context keys to quad key)
+-  po2qk (predicate + object keys to quad key)
+
+-  sp2qk (subject + predicate keys to quad key)
+-  oc2qk (object + context keys to quad key)
+
+-  so2qk (subject + object keys to quad key)
+-  pc2qk (predicate + context keys to quad key)
+
+Strategy #3
+-----------
+
+Contexts are much fewer (even in graph per aspect, 5-10 triples per
+graph)
+
+.. _main-data-store-1:
+
+Main data store
+~~~~~~~~~~~~~~~
+
+Preservation-worthy data
+
+-  tk:t (triple key: triple; dupsort, dupfixed)
+-  tk:c (context key: triple; unique)
+
+.. _indices-1:
+
+Indices
+~~~~~~~
+
+Rebuildable from main data store
+
+-  s2k (subject key: triple key)
+-  p2k (pred key: triple key)
+-  o2k (object key: triple key)
+-  sp2k
+-  so2k
+-  po2k
+-  spo2k
+
+Lookup
+~~~~~~
+
+1. Look up triples by s, p, o, sp, so, po and get keys
+2. If a context is specified, for each key try to seek to (context, key)
+   in ct to verify it exists
+3. Intersect sets
+4. Match triple keys with data using kt
+
+Shortcuts
+^^^^^^^^^
+
+-  Get all contexts: return list of keys from ct
+-  Get all triples for a context: get all values for a contex from ct
+   and match triple data with kt
+-  Get one triple match for all contexts: look up in triple indices and
+   match triple data with kt
+
+Strategy #4
+-----------
+
+Terms are entered individually in main data store. Also, shorter keys
+are used rather than hashes. These two aspects save a great deal of
+space and I/O, but require an additional index to put the terms together
+in a triple.
+
+.. _main-data-store-2:
+
+Main Data Store
+~~~~~~~~~~~~~~~
+
+-  t:st (term key: serialized term; 1:1)
+-  spo:c (joined S, P, O keys: context key; 1:m)
+-  c: (context keys only, values are the empty bytestring)
+
+Storage total: variable
+
+.. _indices-2:
+
+Indices
+~~~~~~~
+
+-  th:t (term hash: term key; 1:1)
+-  c:spo (context key: joined triple keys; 1:m)
+-  s:po (S key: P + O key; 1:m)
+-  p:so (P key: S + O keys; 1:m)
+-  o:sp (object key: triple key; 1:m)
+-  sp:o (S + P keys: O key; 1:m)
+-  so:p (S + O keys: P key; 1:m)
+-  po:s (P + O keys: S key; 1:m)
+
+Storage total: 143 bytes per triple
+
+Disadvantages
+~~~~~~~~~~~~~
+
+-  Lots of indices
+-  Terms can get orphaned:
+
+   -  No easy way to know if a term is used anywhere in a quad
+   -  Needs some routine cleanup
+   -  On the other hand, terms are relatively light-weight and can be
+      reused
+   -  Almost surely not reusable are UUIDs, message digests, timestamps
+      etc.
+
+Strategy #5
+-----------
+
+Reduce number of indices and rely on parsing and splitting keys to find
+triples with two bound parameters.
+
+This is especially important for keeping indexing synchronous to achieve
+fully ACID writes.
+
+.. _main-data-store-3:
+
+Main data store
+~~~~~~~~~~~~~~~
+
+Same as Strategy #4:
+
+-  t:st (term key: serialized term; 1:1)
+-  spo:c (joined S, P, O keys: context key; dupsort, dupfixed)
+-  c: (context keys only, values are the empty bytestring; 1:1)
+
+Storage total: variable (same as #4)
+
+.. _indices-3:
+
+Indices
+~~~~~~~
+
+-  th:t (term hash: term key; 1:1)
+-  s:po (S key: joined P, O keys; dupsort, dupfixed)
+-  p:so (P key: joined S, O keys; dupsort, dupfixed)
+-  o:sp (O key: joined S, P keys; dupsort, dupfixed)
+-  c:spo (context → triple association; dupsort, dupfixed)
+
+Storage total: 95 bytes per triple
+
+Lookup strategy
+~~~~~~~~~~~~~~~
+
+-  ? ? ? c: [c:spo] all SPO for C → split key → [t:st] term from term
+   key
+-  s p o c: [c:spo] exact SPO & C match → split key → [t:st] term from
+   term key
+-  s ? ?: [s:po] All PO for S → split key → [t:st] term from term key
+-  s p ?: [s:po] All PO for S → filter result by P in split key → [t:st]
+   term from term key
+
+Advantages
+~~~~~~~~~~
+
+-  Less indices: smaller index size and less I/O
+
+.. _disadvantages-1:
+
+Disadvantages
+~~~~~~~~~~~~~
+
+-  Possibly slower retrieval for queries with 2 bound terms (run
+   metrics)
+
+Further optimization
+~~~~~~~~~~~~~~~~~~~~
+
+In order to minimize traversing and splittig results, the first
+retrieval should be made on the term with less average keys. Search
+order can be balanced by establishing a lookup order for indices.
+
+This can be achieved by calling stats on the index databases and looking
+up the database with *most* keys. Since there is an equal number of
+entries in each of the (s:po, p:so, o:sp) indices, the one with most
+keys will have the least average number of values per key. If that
+lookup is done first, the initial data set to traverse and filter will
+be smaller.
+
+Strategy #5a
+------------
+
+This is a slightly different implementation of #5 that somewhat
+simplifies and perhaps speeds up things a bit. It is the currently
+employed solution.
+
+The indexing and lookup strtegy is the same; but instead of using a
+separator byte for splitting compound keys, the logic relies on the fact
+that keys have a fixed length and are sliced instead. This *should*
+result in faster key manipulation, also because in most cases
+``memoryview`` buffers can be used directly instead of being copied from
+memory.
+
+Index storage is 90 bytes per triple.

+ 30 - 0
docs/messaging.rst

@@ -0,0 +1,30 @@
+LAKEsuperior Messaging
+======================
+
+LAKEsuperior implements a messaging system based on ActivityStreams, as
+indicated by the `Feodra API
+specs <https://fedora.info/2017/06/30/spec/#notifications>`__. The
+metadata set provided is currently quite minimal but can be easily
+enriched by extending the `default formatter
+class <https://github.com/scossu/lakesuperior/blob/master/lakesuperior/messaging/messenger.py>`__.
+
+STOMP is the only supported protocol at the moment. More protocols may
+be made available at a later time.
+
+LAKEsuperior can send messages to any number of destinations: see
+`configuration <https://github.com/scossu/lakesuperior/blob/master/etc.defaults/application.yml#L79>`__.
+By default, CoilMQ is provided for testing purposes and listens to
+``localhost:61613``. The default route sends messages to
+``/topic/fcrepo``.
+
+A small command-line utility, also provided with the Python
+dependencies, allows to watch incoming messages. To monitor messages,
+enter the following *after activating your virtualenv*:
+
+::
+
+    stomp -H localhost -P 61613 -L /topic/fcrepo
+
+See the `stomp.py library reference
+page <https://github.com/jasonrbriggs/stomp.py/wiki/Command-Line-Access>`__
+for details.

+ 65 - 0
docs/migration.rst

@@ -0,0 +1,65 @@
+Migration, Backup & Restore
+===========================
+
+All LAKEsuperior data is by default fully contained in a folder. This
+means that only the data, configurations and code folders are needed for
+it to run. No Postgres, Redis, or such. Data and configuration folders
+can be moved around as needed.
+
+Migration Tool
+--------------
+
+Migration is the process of importing and converting data from a
+different Fedora or LDP implementation into a new LAKEsuperior instance.
+This process uses the HTTP/LDP API of the original repository. A
+command-line utility is available as part of the ``lsup-admin`` suite to
+assist in such operation.
+
+A repository can be migrated with a one-line command such as:
+
+::
+
+    ./lsup-admin migrate http://source-repo.edu/rest /local/dest/folder
+
+For more options, enter
+
+::
+
+    ./lsup-admin migrate --help
+
+The script will crawl through the resources and crawl through outbound
+links within them. In order to do this, resources are added as raw
+triples ( i.e. no consistency checks are made).
+
+**Note:** the consistency check tool has not yet been implemented at the
+moment but its release should follow shortly. This will ensure that all
+the links between resources are consistent in regard to referential
+integrity.
+
+This script will create a full dataset in the specified destination
+folder, complete with a default configuration that allows to start the
+LAKEsuperior server immediately after the migration is complete.
+
+Two approaches to migration are possible:
+
+1. By providing a starting point on the source repository. E.g. if the
+   repository you want to migrate is at ``http://repo.edu/rest/prod``
+   you can add the ``-s /prod`` option to the script to avoid migrating
+   irrelevant branches. Note that the script will still reach outside of
+   the starting point if resources are referencing other resources
+   outside of it.
+2. By providing a file containing a list of resources to migrate. This
+   is useful if a source repository cannot produce a full list (e.g. the
+   root node has more children than the server can handle) but a list of
+   individual resources is available via an external index (Solr,
+   triplestore, etc.). The resources can be indicated by their fully
+   qualified URIs or paths relative to the repository root. (*TODO
+   latter option needs testing*)
+
+Backup And Restore
+------------------
+
+A back up of a LAKEshore repository consists in copying the RDF and
+non-RDF data folders. These folders are indicated in the application
+configuration. The default commands provided by your OS (``cp``,
+``rsync``, ``tar`` etc. for Unix) are all is needed.

+ 65 - 0
docs/model.rst

@@ -0,0 +1,65 @@
+LAKEsuperior Content Model Rationale
+====================================
+
+Internal and Public URIs; Identifiers
+-------------------------------------
+
+Resource URIs are stored internally in LAKEsuperior as domain-agnostic
+URIs with the scheme ``info:fcres<resource UID>``. This allows resources
+to be portable across systems. E.g. a resource with an internal URI of
+``info:fcres/a/b/c``, when accessed via the
+``http://localhost:8000/ldp`` endpoint, will be found at
+``http://localhost:8000/ldp/a/b/c``.
+
+The resource UID making up the looks like a UNIX filesystem path,
+i.e. it always starts with a forward slash and can be made up of
+multiple segments separated by slashes. E.g. ``/`` is the root node UID,
+``/a`` is a resource UID just below root. their internal URIs are
+``info:fcres/`` and ``info:fcres/a`` respectively.
+
+In the Python API, the UID and internal URI of an LDP resource can be
+accessed via the ``uid`` and ``uri`` properties respectively:
+
+::
+
+    >>> import lakesuperior.env_setup
+    >>> from lakesuperior.api import resource
+    >>> rsrc = resource.get('/a/b/c')
+    >>> rsrc.uid
+    /a/b/c
+    >>> rsrc.uri
+    rdflib.terms.URIRef('info:fcres/a/b/c')
+
+Store Layout
+------------
+
+One of the key concepts in LAKEsuperior is the store layout. This is a
+module built with a specific purpose in mind, i.e. allowing fine-grained
+recording of provenance metadata while providing reasonable performance.
+
+Store layout modules could be replaceable (work needs to be done to
+develop an interface to allow that). The default (and only at the
+moment) layout shipped with LAKEsuperior is the :mod:`resource-centric
+layout <lakesuperior.store.ldp_rs.rsrc_centric_layout>`. This
+layout implements a so-called `graph-per-aspect
+pattern <http://patterns.dataincubator.org/book/graph-per-aspect.html>`__
+which stores different sets of statements about a resource in separate
+named graphs.
+
+The named graphs used for each resource are:
+
+-  An admin graph (``info:fcsystem/graph/admin<resource UID>``) which
+   stores administrative metadata, mostly server-managed triples such as
+   LDP types, system create/update timestamps and agents, etc.
+-  A structure graph (``info:fcsystem/graph/structure<resource UID>``)
+   reserved for containment triples. The reason for this separation is
+   purely convenience, since it makes it easy to retrieve all the
+   properties of a large container without its child references.
+-  One (and, possibly, in the future, more user\-defined) named graph for
+   user-provided data (``info:fcsystem/graph/userdata/_main<resource UID>``).
+
+Each of these graphs can be annotated with provenance metadata. The
+layout decides which triples go in which graph based on the predicate or
+RDF type contained in the triple. Adding logic to support arbitrary
+named graphs based e.g. on user agent, or to add more provenance
+information, should be relatively simple.

+ 0 - 0
doc/notes/TODO → docs/notes/TODO.historic


+ 133 - 0
docs/performance.rst

@@ -0,0 +1,133 @@
+Performance Benchmark Report
+============================
+
+Environment
+-----------
+
+Hardware
+~~~~~~~~
+
+‘Rather Snappy’ Laptop
+^^^^^^^^^^^^^^^^^^^^^^
+
+-  Dell Precison M3800 Laptop
+-  4x Intel(R) Core(TM) i7-4712HQ CPU @ 2.30GHz
+-  12Gb RAM
+-  SSD
+
+‘Ole Workhorse’ server
+^^^^^^^^^^^^^^^^^^^^^^
+
+-  8x Intel(R) Xeon(R) CPU X5550 @ 2.67GHz
+-  16Gb RAM
+-  Magnetic drive, XXX RPM
+
+Software
+~~~~~~~~
+
+-  Arch Linux OS
+-  glibc 2.26-11
+-  python 3.5.4
+-  lmdb 0.9.21-1
+
+Benchmark script
+~~~~~~~~~~~~~~~~
+
+`Generator script <../../util/benchmark.py>`__
+
+The script was run with default values: 10,000 children under the same
+parent, PUT requests.
+
+Data Set
+~~~~~~~~
+
+Synthetic graph created by the benchmark script. The graph is unique for
+each request and consists of 200 triples which are partly random data,
+with a consistent size and variation:
+
+-  50 triples have an object that is a URI of an external resource (50
+   unique predicates; 5 unique objects).
+-  50 triples have an object that is a URI of a repository-managed
+   resource (50 unique predicates; 5 unique objects).
+-  100 triples have an object that is a 64-character random Unicode
+   string (50 unique predicates; 100 unique objects).
+
+Results
+-------
+
+.. _rather-snappy-laptop-1:
+
+‘Rather Snappy’ Laptop
+~~~~~~~~~~~~~~~~~~~~~~
+
+FCREPO/Modeshape 4.7.5
+^^^^^^^^^^^^^^^^^^^^^^
+
+15’45" running time
+
+0.094" per resource (100%—reference point)
+
+3.4M triples total in repo at the end of the process
+
+Retrieval of parent resource (~10000 triples), pipe to /dev/null: 3.64"
+(100%)
+
+Peak memory usage: 2.47Gb
+
+Database size: 3.3 Gb
+
+LAKEsuperior Alpha 6, LMDB Back End
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+25’ running time
+
+0.152" per resource (161%)
+
+*Some gaps every ~40-50 requests, probably disk flush*
+
+Retrieval of parent resource (10K triples), pipe to /dev/null: 2.13"
+(58%)
+
+Peak memory usage: ~650 Mb (3 idle workers, 1 active)
+
+Database size: 523 Mb (16%)
+
+.. _ole-workhorse-server-1:
+
+‘Ole Workhorse’ server
+~~~~~~~~~~~~~~~~~~~~~~
+
+FCREPO
+^^^^^^
+
+0:47:38 running time
+
+0.285" per resource (100%)
+
+Retrieval of parent resource: 9.6" (100%)
+
+LAKEsuperior
+^^^^^^^^^^^^
+
+1:14:19 running time
+
+0.446" per resource (156%)
+
+Retrieval of parent resource: 5.58" (58%)
+
+Conclusions
+-----------
+
+LAKEsuperior appears to be markedly slower on writes and markedly faster
+on reads. Both these factors are very likely related to the underlying
+LMDB store which is optimized for read performance.
+
+Comparison of results between the laptop and the server demonstrates
+that both read and write performance gaps are identical in the two
+environments. Disk speed severely affects the numbers.
+
+**Note:** As you can guess, these are only very partial and specific
+results. They should not be taken as a thorough performance assessment.
+Such an assessment may be impossible and pointless to make given the
+very different nature of the storage models, which may behave radically
+differently depending on many variables.

+ 101 - 0
docs/setup.rst

@@ -0,0 +1,101 @@
+Installation & Configuration
+============================
+
+Quick Install: Running in Docker
+--------------------------------
+
+You can run LAKEsuperior in Docker for a hands-off quickstart.
+
+`Docker <http://docker.com/>`__ is a containerization platform that
+allows you to run services in lightweight virtual machine environments
+without having to worry about installing all of the prerequisites on
+your host machine.
+
+1. Install the correct `Docker Community
+   Edition <https://www.docker.com/community-edition>`__ for your
+   operating system.
+2. Clone the LAKEsuperior git repository:
+   ``git clone https://github.com/scossu/lakesuperior.git``
+3. ``cd`` into repo folder
+4. Run ``docker-compose up``
+
+LAKEsuperior should now be available at ``http://localhost:8000/``.
+
+The provided Docker configuration includes persistent storage as a
+self-container Docker volume, meaning your data will persist between
+runs. If you want to clear the decks, simply run
+``docker-compose down -v``.
+
+.. _manual_install:
+
+Manual Install (a bit less quick, a bit more power)
+---------------------------------------------------
+
+**Note:** These instructions have been tested on Linux. They may work on
+Darwin with little modification, and possibly on Windows with some
+modifications. Feedback is welcome.
+
+Dependencies
+~~~~~~~~~~~~
+
+#. Python 3.5 or greater.
+#. A message broker supporting the STOMP protocol. For testing and
+   evaluation purposes, `CoilMQ <https://github.com/hozn/coilmq>`__ is
+   included with the dependencies and should be automatically installed.
+
+Installation steps
+~~~~~~~~~~~~~~~~~~
+
+Start in an empty project folder. If you are feeling lazy you can copy
+and paste the lines below in your console.
+
+::
+
+    mkdir lsup_env # Or whatever you want to call it
+    cd lsup_env
+    python3 -m venv .
+    source bin/activate
+    pip install lakesuperior
+    # Start the message broker. If you have another
+    # queue manager listening to port 61613 you can either configure a
+    # different port on the application configuration, or use the existing
+    # message queue.
+    coilmq&
+    # Bootstrap the repo
+    echo yes | lsup-admin bootstrap
+    # Run the thing
+    fcrepo
+
+Test if it works::
+
+    curl http://localhost:8000/ldp/
+
+Configuration
+-------------
+
+The app should run for testing and evaluation purposes without any
+further configuration. All the application data are stored by default in
+the ``data`` directory.
+
+To change the default configuration you should:
+
+#. Copy the ``etc.skeleton`` folder to a separate location
+#. Set the configuration folder location in the environment:
+   ``export FCREPO_CONFIG_DIR=<your config dir location>`` (you can add
+   this line at the end of your virtualenv ``activate`` script)
+#. Configure the application
+#. Bootstrap the app or copy the original data folders to the new
+   location if any loction options changed
+#. (Re)start the server: ``./fcrepo``
+
+The configuration options are documented in the files.
+
+**Note:** ``test.yml`` must specify a different location for the graph
+and for the binary stores than the default one, otherwise running a test
+suite will destroy your main data store. The application will issue an
+error message and refuse to start if these locations overlap.
+
+Production deployment
+---------------------
+
+If you like fried repositories for lunch, deploy before 11AM.

+ 0 - 0
doc/src/lakesuperior_arch.graphml → docs/src/lakesuperior_arch.graphml


+ 0 - 0
doc/src/lakesuperior_content_model.graphml → docs/src/lakesuperior_content_model.graphml


+ 0 - 0
doc/src/lakesuperior_recommendations.md → docs/src/lakesuperior_recommendations.md


+ 0 - 0
doc/src/template.latex → docs/src/template.latex


+ 0 - 0
doc/src/use_cases_transactions.md → docs/src/use_cases_transactions.md


+ 94 - 0
docs/storage.rst

@@ -0,0 +1,94 @@
+Storage Implementation
+======================
+
+LAKEsuperior stores non-RDF (“binary”) data in the filesystem and RDF
+data in an embedded key-value store, `LMDB <https://symas.com/lmdb/>`__.
+
+RDF Storage design
+------------------
+
+LMDB is a very fast, very lightweight C library. It is inspired by
+BerkeleyDB but introduces significant improvements in terms of
+efficiency and stability.
+
+The LAKEsuperior RDF store consists of two files: the main data store
+and the indices (plus two lock files that are generated at runtime). A
+good amount of effort has been put to develop an indexing strategy that
+is balanced between write performance, read performance, and data size,
+with no compromise made on consistency.
+
+The main data store is the one containing the preservation-worthy data.
+While the indices are necessary for LAKEsuperior to function, they can
+be entirely rebuilt from the main data store in case of file corruption
+(recovery tools are on the TODO list).
+
+Detailed notes about the various strategies researched can be found
+`here <indexing_strategy.md>`__.
+
+Scalability
+-----------
+
+Since LAKEsuperior is focused on design simplicity, efficiency and
+reliability, its RDF store is embedded and not horizontally scalable.
+However, LAKEsuperior is quite frugal with disk space. About 55 million
+triples can be stored in 8Gb of space (mileage can vary depending on how
+heterogeneous the triples are). This makes it easier to use expensive
+SSD drives for the RDF store, in order to improve performance. A single
+LMDB environment can reportedly scale up to 128 terabytes.
+
+Maintenance
+-----------
+
+LMDB has a very simple configuration, and all options are hardcoded in
+LAKESuperior in order to exploit its features. A database automatically
+recovers from a crash.
+
+The LAKEsuperior RDF store abstraction maintains a registry of unique
+terms. These terms are not deleted if a triple is deleted, even if no
+triple is using them, because it would be too expesive to look up for
+orphaned terms during a delete request. While these terms are relatively
+lightweight, it would be good to run a periodical clean-up job. Tools
+will be developed in the near future to facilitate this maintenance
+task.
+
+Consistency
+-----------
+
+LAKEsuperior wraps each LDP operation in a transaction. The indices are
+updated synchronously within the same transaction in order to guarantee
+consistency. If a system loses power or crashes, only the last
+transaction is lost, and the last successful write will include primary
+and index data.
+
+Concurrency
+-----------
+
+LMDB employs
+`MVCC <https://en.wikipedia.org/wiki/Multiversion_concurrency_control>`__
+to achieve fully ACID transactions. This implies that during a write,
+the whole database is locked. Multiple writes can be initiated
+concurrently, but the performance gain of doing so may be little because
+only one write operation can be performed at a time. Reasonable efforts
+have been put to make write transactions as short as possible (and more
+can be done). Also, this excludes a priori the option to implement
+long-running atomic operations, unless one is willing to block writes on
+the application for an indefinite length of time. On the other hand,
+write operations never block and are never blocked, so an application
+with a high read-to-write ratio may still benefit from multi-threaded
+requests.
+
+Performance
+-----------
+
+The `Performance Benchmark Report <performance.md>`__ contains benchmark
+results.
+
+Write performance is lower than Modeshape/Fedora4; this may be mostly
+due to the fact that indices are written synchronously in a blocking
+transaction; also, the LMDB B+Tree structure is optimized for read
+performance rather than write performance. Some optimizations on the
+application layer could be made.
+
+Reads are faster than Modeshape/Fedora.
+
+All tests so far have been performed in a single thread.

+ 117 - 0
docs/usage.rst

@@ -0,0 +1,117 @@
+Sample Usage
+------------
+
+LDP API
+=======
+
+The following are very basic examples of LDP interaction. For a more complete
+reference, please consult the `Fedora API guide
+<https://wiki.duraspace.org/display/FEDORA4x/RESTful+HTTP+API+-+Containers>`__.
+
+**Note**: At the moment the LDP API only support the Turtle format for
+serializing and deserializing RDF.
+
+Create an empty LDP container (LDPC)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl -X POST http://localhost:8000/ldp
+
+
+Create a resource with RDF payload
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl -X POST -H'Content-Type:text/turtle' --data-binary '<> <urn:ns:p1> <urn:ns:o1> .' http://localhost:8000/ldp
+
+
+Create a resource at a specific location
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl -X PUT http://localhost:8000/ldp/res1
+
+
+Create a binary resource
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl -X PUT -H'Content-Type:image/png' --data-binary '@/home/me/image.png' http://localhost:8000/ldp/bin1
+
+
+Retrieve an RDF resource (LDP-RS)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl http://localhost:8000/ldp/res1
+
+Retrieve a non-RDF source (LDP-NR)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl http://localhost:8000/ldp/bin1
+
+Or::
+
+    curl http://localhost:8000/ldp/bin1/fcr:content
+
+Or::
+
+    curl -H'Accept:image/png' http://localhost:8000/ldp/bin1
+
+Retrieve RDF metadata of a LDP-NR
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl http://localhost:8000/ldp/bin1/fcr:metadata
+
+Or::
+
+    curl -H'Accept:text/turtle' http://localhost:8000/ldp/bin1
+
+
+Soft-delete a resource
+~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl -X DELETE http://localhost:8000/ldp/bin1
+
+
+Restore ("resurrect") a resource
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl -X POST http://localhost:8000/ldp/bin1/fcr:tombstone
+
+
+Permanently delete ("forget") a soft-deleted resource
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Note**: the following command cannot be issued after the previous one. It has
+to be issued on a soft-deleted, non-resurrected resource.
+
+::
+
+    curl -X DELETE http://localhost:8000/ldp/bin1/fcr:tombstone
+
+Immediately forget a resource
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+    curl -X DELETE -H'Prefer:no-tombstone' http://localhost:8000/ldp/res1
+
+
+Python API
+----------
+
+**TODO**

+ 0 - 32
etc.defaults/gunicorn.py

@@ -1,32 +0,0 @@
-# See: http://docs.gunicorn.org/en/stable/settings.html
-
-# Directory where to store logs, PIDfile, etc.
-_data_dir = 'data/'
-
-# Set app_mode to either 'prod', 'test' or 'dev'.
-# 'prod' is normal running mode. 'test' is used for running test suites.
-# 'dev' is similar to normal mode but with reload and debug enabled.
-_app_mode = 'dev'
-
-
-bind = "0.0.0.0:8000"
-workers = 4
-worker_class = 'gevent'
-max_requests = 512
-
-#user = "user"
-#group = "group"
-
-raw_env = 'APP_MODE={}'.format(_app_mode)
-
-# Set this to the directory containing logs, etc.
-# The path must end with a slash.
-#chdir = "/usr/local/lakesuperior/"
-
-daemon = _app_mode=='prod'
-pidfile = _data_dir + "run/fcrepo.pid"
-reload = _app_mode=='dev'
-
-accesslog = _data_dir + "log/gunicorn-access.log"
-errorlog = _data_dir + "log/gunicorn-error.log"
-

+ 24 - 0
etc.defaults/gunicorn.yml

@@ -0,0 +1,24 @@
+# Set up main GUnicorn options.
+# See: http://docs.gunicorn.org/en/stable/settings.html
+
+# Commented values are the application defaults.
+
+# Directory where the WSGI server data are stored.
+data_dir: 'data'
+
+# Set app_mode to either 'prod', 'test' or 'dev'.
+# 'prod' is normal running mode. 'test' is used for running test suites.
+# 'dev' is similar to normal mode but with reload and debug enabled.
+app_mode: 'dev'
+
+#listen_addr: '0.0.0.0'
+#listen_port: 8000
+#workers: 4
+#worker_class: 'gevent'
+#max_requests: 512
+
+#user: ''
+#group: ''
+
+#preload_app: True
+

+ 0 - 5
fcrepo

@@ -1,5 +0,0 @@
-#!/bin/bash
-default_conf_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/etc.defaults"
-conf_dir=${FCREPO_CONFIG_DIR:-$default_conf_dir}
-
-gunicorn -c "${conf_dir}/gunicorn.py" server:fcrepo --preload

+ 0 - 0
lakesuperior/api/__init__.py


+ 9 - 8
lakesuperior/api/query.py

@@ -14,17 +14,18 @@ rdf_store = env.app_globals.rdf_store
 
 
 
 
 def sparql_query(qry_str, fmt):
 def sparql_query(qry_str, fmt):
-    '''
+    """
     Send a SPARQL query to the triplestore.
     Send a SPARQL query to the triplestore.
 
 
-    @param qry_str (str) SPARQL query string. SPARQL 1.1 Query Language
-    (https://www.w3.org/TR/sparql11-query/) is supported.
-    @param fmt(string) Serialization format. This varies depending on the
-    query type (SELECT, ASK, CONSTRUCT, etc.). [@TODO Add reference to RDFLib
-    serialization formats]
+    :param str qry_str: SPARQL query string. SPARQL 1.1 Query Language
+        (https://www.w3.org/TR/sparql11-query/) is supported.
+    :param str fmt: Serialization format. This varies depending on the
+        query type (SELECT, ASK, CONSTRUCT, etc.). [TODO Add reference to
+        RDFLib serialization formats]
 
 
-    @return BytesIO
-    '''
+    :rtype: BytesIO
+    :return: Serialized SPARQL results.
+    """
     with TxnManager(rdf_store) as txn:
     with TxnManager(rdf_store) as txn:
         qres = rdfly.raw_query(qry_str)
         qres = rdfly.raw_query(qry_str)
         out_stream = BytesIO(qres.serialize(format=fmt))
         out_stream = BytesIO(qres.serialize(format=fmt))

+ 65 - 62
lakesuperior/api/resource.py

@@ -21,7 +21,7 @@ from lakesuperior.store.ldp_rs.lmdb_store import TxnManager
 
 
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 
 
-__doc__ = '''
+__doc__ = """
 Primary API for resource manipulation.
 Primary API for resource manipulation.
 
 
 Quickstart:
 Quickstart:
@@ -54,10 +54,10 @@ Quickstart:
  (rdflib.term.URIRef('info:fcres/'),
  (rdflib.term.URIRef('info:fcres/'),
   rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
   rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
   rdflib.term.URIRef('http://www.w3.org/ns/ldp#RDFSource'))}
   rdflib.term.URIRef('http://www.w3.org/ns/ldp#RDFSource'))}
-'''
+"""
 
 
 def transaction(write=False):
 def transaction(write=False):
-    '''
+    """
     Handle atomic operations in a store.
     Handle atomic operations in a store.
 
 
     This wrapper ensures that a write operation is performed atomically. It
     This wrapper ensures that a write operation is performed atomically. It
@@ -66,7 +66,7 @@ def transaction(write=False):
 
 
     ALL write operations on the LDP-RS and LDP-NR stores go through this
     ALL write operations on the LDP-RS and LDP-NR stores go through this
     wrapper.
     wrapper.
-    '''
+    """
     def _transaction_deco(fn):
     def _transaction_deco(fn):
         @wraps(fn)
         @wraps(fn)
         def _wrapper(*args, **kwargs):
         def _wrapper(*args, **kwargs):
@@ -87,9 +87,9 @@ def transaction(write=False):
 
 
 
 
 def process_queue():
 def process_queue():
-    '''
+    """
     Process the message queue on a separate thread.
     Process the message queue on a separate thread.
-    '''
+    """
     lock = Lock()
     lock = Lock()
     lock.acquire()
     lock.acquire()
     while len(env.app_globals.changelog):
     while len(env.app_globals.changelog):
@@ -98,14 +98,14 @@ def process_queue():
 
 
 
 
 def send_event_msg(remove_trp, add_trp, metadata):
 def send_event_msg(remove_trp, add_trp, metadata):
-    '''
+    """
     Send messages about a changed LDPR.
     Send messages about a changed LDPR.
 
 
     A single LDPR message packet can contain multiple resource subjects, e.g.
     A single LDPR message packet can contain multiple resource subjects, e.g.
     if the resource graph contains hash URIs or even other subjects. This
     if the resource graph contains hash URIs or even other subjects. This
     method groups triples by subject and sends a message for each of the
     method groups triples by subject and sends a message for each of the
     subjects found.
     subjects found.
-    '''
+    """
     # Group delta triples by subject.
     # Group delta triples by subject.
     remove_grp = groupby(remove_trp, lambda x : x[0])
     remove_grp = groupby(remove_trp, lambda x : x[0])
     remove_dict = {k[0]: k[1] for k in remove_grp}
     remove_dict = {k[0]: k[1] for k in remove_grp}
@@ -123,11 +123,11 @@ def send_event_msg(remove_trp, add_trp, metadata):
 
 
 @transaction()
 @transaction()
 def exists(uid):
 def exists(uid):
-    '''
+    """
     Return whether a resource exists (is stored) in the repository.
     Return whether a resource exists (is stored) in the repository.
 
 
-    @param uid (string) Resource UID.
-    '''
+    :param string uid: Resource UID.
+    """
     try:
     try:
         exists = LdpFactory.from_stored(uid).is_stored
         exists = LdpFactory.from_stored(uid).is_stored
     except ResourceNotExistsError:
     except ResourceNotExistsError:
@@ -137,31 +137,32 @@ def exists(uid):
 
 
 @transaction()
 @transaction()
 def get_metadata(uid):
 def get_metadata(uid):
-    '''
+    """
     Get metadata (admin triples) of an LDPR resource.
     Get metadata (admin triples) of an LDPR resource.
 
 
-    @param uid (string) Resource UID.
-    '''
+    :param string uid: Resource UID.
+    """
     return LdpFactory.from_stored(uid).metadata
     return LdpFactory.from_stored(uid).metadata
 
 
 
 
 @transaction()
 @transaction()
 def get(uid, repr_options={}):
 def get(uid, repr_options={}):
-    '''
+    """
     Get an LDPR resource.
     Get an LDPR resource.
 
 
     The resource comes preloaded with user data and metadata as indicated by
     The resource comes preloaded with user data and metadata as indicated by
     the `repr_options` argument. Any further handling of this resource is done
     the `repr_options` argument. Any further handling of this resource is done
     outside of a transaction.
     outside of a transaction.
 
 
-    @param uid (string) Resource UID.
-    @param repr_options (dict(bool)) Representation options. This is a dict
-    that is unpacked downstream in the process. The default empty dict results
-    in default values. The accepted dict keys are:
+    :param string uid: Resource UID.
+    :param  repr_options: (dict(bool)) Representation options. This is a dict
+        that is unpacked downstream in the process. The default empty dict
+        results in default values. The accepted dict keys are:
+
     - incl_inbound: include inbound references. Default: False.
     - incl_inbound: include inbound references. Default: False.
     - incl_children: include children URIs. Default: True.
     - incl_children: include children URIs. Default: True.
     - embed_children: Embed full graph of all child resources. Default: False
     - embed_children: Embed full graph of all child resources. Default: False
-    '''
+    """
     rsrc = LdpFactory.from_stored(uid, repr_options)
     rsrc = LdpFactory.from_stored(uid, repr_options)
     # Load graph before leaving the transaction.
     # Load graph before leaving the transaction.
     rsrc.imr
     rsrc.imr
@@ -171,23 +172,23 @@ def get(uid, repr_options={}):
 
 
 @transaction()
 @transaction()
 def get_version_info(uid):
 def get_version_info(uid):
-    '''
+    """
     Get version metadata (fcr:versions).
     Get version metadata (fcr:versions).
-    '''
+    """
     return LdpFactory.from_stored(uid).version_info
     return LdpFactory.from_stored(uid).version_info
 
 
 
 
 @transaction()
 @transaction()
 def get_version(uid, ver_uid):
 def get_version(uid, ver_uid):
-    '''
+    """
     Get version metadata (fcr:versions).
     Get version metadata (fcr:versions).
-    '''
+    """
     return LdpFactory.from_stored(uid).get_version(ver_uid)
     return LdpFactory.from_stored(uid).get_version(ver_uid)
 
 
 
 
 @transaction(True)
 @transaction(True)
 def create(parent, slug, **kwargs):
 def create(parent, slug, **kwargs):
-    '''
+    r"""
     Mint a new UID and create a resource.
     Mint a new UID and create a resource.
 
 
     The UID is computed from a given parent UID and a "slug", a proposed path
     The UID is computed from a given parent UID and a "slug", a proposed path
@@ -195,14 +196,14 @@ def create(parent, slug, **kwargs):
     path but it may use a different one if a conflict with an existing resource
     path but it may use a different one if a conflict with an existing resource
     arises.
     arises.
 
 
-    @param parent (string) UID of the parent resource.
-    @param slug (string) Tentative path relative to the parent UID.
-    @param **kwargs Other parameters are passed to the
-    LdpFactory.from_provided method. Please see the documentation for that
-    method for explanation of individual parameters.
+    :param str parent: UID of the parent resource.
+    :param str slug: Tentative path relative to the parent UID.
+    :param \*\*kwargs: Other parameters are passed to the
+      :meth:`LdpFactory.from_provided` method.
 
 
-    @return string UID of the new resource.
-    '''
+    :rtype: str
+    :return: UID of the new resource.
+    """
     uid = LdpFactory.mint_uid(parent, slug)
     uid = LdpFactory.mint_uid(parent, slug)
     logger.debug('Minted UID for new resource: {}'.format(uid))
     logger.debug('Minted UID for new resource: {}'.format(uid))
     rsrc = LdpFactory.from_provided(uid, **kwargs)
     rsrc = LdpFactory.from_provided(uid, **kwargs)
@@ -214,7 +215,7 @@ def create(parent, slug, **kwargs):
 
 
 @transaction(True)
 @transaction(True)
 def create_or_replace(uid, stream=None, **kwargs):
 def create_or_replace(uid, stream=None, **kwargs):
-    '''
+    r"""
     Create or replace a resource with a specified UID.
     Create or replace a resource with a specified UID.
 
 
     If the resource already exists, all user-provided properties of the
     If the resource already exists, all user-provided properties of the
@@ -222,15 +223,15 @@ def create_or_replace(uid, stream=None, **kwargs):
     content is empty, an exception is raised (not sure why, but that's how
     content is empty, an exception is raised (not sure why, but that's how
     FCREPO4 handles it).
     FCREPO4 handles it).
 
 
-    @param uid (string) UID of the resource to be created or updated.
-    @param stream (BytesIO) Content stream. If empty, an empty container is
-    created.
-    @param **kwargs Other parameters are passed to the
-    LdpFactory.from_provided method. Please see the documentation for that
-    method for explanation of individual parameters.
+    :param string uid: UID of the resource to be created or updated.
+    :param BytesIO stream: Content stream. If empty, an empty container is
+        created.
+    :param \*\*kwargs: Other parameters are passed to the
+        :meth:`LdpFactory.from_provided` method.
 
 
-    @return string Event type: whether the resource was created or updated.
-    '''
+    :rtype: str
+    :return: Event type: whether the resource was created or updated.
+    """
     rsrc = LdpFactory.from_provided(uid, stream=stream, **kwargs)
     rsrc = LdpFactory.from_provided(uid, stream=stream, **kwargs)
 
 
     if not stream and rsrc.is_stored:
     if not stream and rsrc.is_stored:
@@ -242,14 +243,15 @@ def create_or_replace(uid, stream=None, **kwargs):
 
 
 @transaction(True)
 @transaction(True)
 def update(uid, update_str, is_metadata=False):
 def update(uid, update_str, is_metadata=False):
-    '''
+    """
     Update a resource with a SPARQL-Update string.
     Update a resource with a SPARQL-Update string.
 
 
-    @param uid (string) Resource UID.
-    @param update_str (string) SPARQL-Update statements.
-    @param is_metadata (bool) Whether the resource metadata is being updated.
-    If False, and the resource being updated is a LDP-NR, an error is raised.
-    '''
+    :param string uid: Resource UID.
+    :param string update_str: SPARQL-Update statements.
+    :param bool is_metadata: Whether the resource metadata is being updated.
+        If False, and the resource being updated is a LDP-NR, an error is
+        raised.
+    """
     rsrc = LdpFactory.from_stored(uid)
     rsrc = LdpFactory.from_stored(uid)
     if LDP_NR_TYPE in rsrc.ldp_types and not is_metadata:
     if LDP_NR_TYPE in rsrc.ldp_types and not is_metadata:
         raise InvalidResourceError(uid)
         raise InvalidResourceError(uid)
@@ -261,28 +263,29 @@ def update(uid, update_str, is_metadata=False):
 
 
 @transaction(True)
 @transaction(True)
 def create_version(uid, ver_uid):
 def create_version(uid, ver_uid):
-    '''
+    """
     Create a resource version.
     Create a resource version.
 
 
-    @param uid (string) Resource UID.
-    @param ver_uid (string) Version UID to be appended to the resource URI.
-    NOTE: this is a "slug", i.e. the version URI is not guaranteed to be the
-    one indicated.
+    :param string uid: Resource UID.
+    :param string ver_uid: Version UID to be appended to the resource URI.
+      NOTE: this is a "slug", i.e. the version URI is not guaranteed to be the
+      one indicated.
 
 
-    @return string Version UID.
-    '''
+    :rtype: str
+    :return: Version UID.
+    """
     return LdpFactory.from_stored(uid).create_version(ver_uid)
     return LdpFactory.from_stored(uid).create_version(ver_uid)
 
 
 
 
 @transaction(True)
 @transaction(True)
 def delete(uid, soft=True):
 def delete(uid, soft=True):
-    '''
+    """
     Delete a resource.
     Delete a resource.
 
 
-    @param uid (string) Resource UID.
-    @param soft (bool) Whether to perform a soft-delete and leave a
-    tombstone resource, or wipe any memory of the resource.
-    '''
+    :param string uid: Resource UID.
+    :param bool soft: Whether to perform a soft-delete and leave a
+      tombstone resource, or wipe any memory of the resource.
+    """
     # If referential integrity is enforced, grab all inbound relationships
     # If referential integrity is enforced, grab all inbound relationships
     # to break them.
     # to break them.
     refint = env.app_globals.rdfly.config['referential_integrity']
     refint = env.app_globals.rdfly.config['referential_integrity']
@@ -314,9 +317,9 @@ def delete(uid, soft=True):
 
 
 @transaction(True)
 @transaction(True)
 def resurrect(uid):
 def resurrect(uid):
-    '''
+    """
     Reinstate a buried (soft-deleted) resource.
     Reinstate a buried (soft-deleted) resource.
 
 
-    @param uid (string) Resource UID.
-    '''
+    :param str uid: Resource UID.
+    """
     return LdpFactory.from_stored(uid).resurrect_rsrc()
     return LdpFactory.from_stored(uid).resurrect_rsrc()

+ 2 - 2
lakesuperior/config_parser.py

@@ -19,8 +19,8 @@ def parse_config(config_dir=None):
     ``etc.defaults``.
     ``etc.defaults``.
 
 
     :param config_dir: Location on the filesystem of the configuration
     :param config_dir: Location on the filesystem of the configuration
-    directory. The default is set by the ``FCREPO_CONFIG_DIR`` environment
-    variable or, if this is not set, the ``etc.defaults`` stock directory.
+        directory. The default is set by the ``FCREPO_CONFIG_DIR`` environment
+        variable or, if this is not set, the ``etc.defaults`` stock directory.
     """
     """
     configs = (
     configs = (
         'application',
         'application',

+ 0 - 0
lakesuperior/dictionaries/__init__.py


+ 0 - 0
lakesuperior/endpoints/__init__.py


+ 8 - 8
lakesuperior/endpoints/admin.py

@@ -13,21 +13,21 @@ admin = Blueprint('admin', __name__)
 
 
 @admin.route('/stats', methods=['GET'])
 @admin.route('/stats', methods=['GET'])
 def stats():
 def stats():
-    '''
+    """
     Get repository statistics.
     Get repository statistics.
-    '''
+    """
     def fsize_fmt(num, suffix='b'):
     def fsize_fmt(num, suffix='b'):
-        '''
+        """
         Format an integer into 1024-block file size format.
         Format an integer into 1024-block file size format.
 
 
         Adapted from Python 2 code on
         Adapted from Python 2 code on
         https://stackoverflow.com/a/1094933/3758232
         https://stackoverflow.com/a/1094933/3758232
 
 
-        @param num (int) Size value in bytes.
-        @param suffix (string) Suffix label (defaults to `B`).
+        :param int num: Size value in bytes.
+        :param string suffix: Suffix label (defaults to `B`).
 
 
         @return string Formatted size to largest fitting unit.
         @return string Formatted size to largest fitting unit.
-        '''
+        """
         for unit in ['','K','M','G','T','P','E','Z']:
         for unit in ['','K','M','G','T','P','E','Z']:
             if abs(num) < 1024.0:
             if abs(num) < 1024.0:
                 return "{:3.1f} {}{}".format(num, unit, suffix)
                 return "{:3.1f} {}{}".format(num, unit, suffix)
@@ -42,9 +42,9 @@ def stats():
 
 
 @admin.route('/tools', methods=['GET'])
 @admin.route('/tools', methods=['GET'])
 def admin_tools():
 def admin_tools():
-    '''
+    """
     Admin tools.
     Admin tools.
 
 
     @TODO stub.
     @TODO stub.
-    '''
+    """
     return render_template('admin_tools.html')
     return render_template('admin_tools.html')

+ 50 - 47
lakesuperior/endpoints/ldp.py

@@ -37,7 +37,7 @@ logger = logging.getLogger(__name__)
 
 
 ldp = Blueprint(
 ldp = Blueprint(
         'ldp', __name__, template_folder='templates',
         'ldp', __name__, template_folder='templates',
-        static_url_path='/static', static_folder='../../static')
+        static_url_path='/static', static_folder='templates/static')
 
 
 accept_patch = (
 accept_patch = (
     'application/sparql-update',
     'application/sparql-update',
@@ -62,7 +62,7 @@ std_headers = {
     #'Allow' : ','.join(allow),
     #'Allow' : ','.join(allow),
 }
 }
 
 
-'''Predicates excluded by view.'''
+"""Predicates excluded by view."""
 vw_blacklist = {
 vw_blacklist = {
 }
 }
 
 
@@ -112,17 +112,18 @@ def log_request_end(rsp):
 @ldp.route('/<path:uid>/fcr:content', defaults={'out_fmt' : 'non_rdf'},
 @ldp.route('/<path:uid>/fcr:content', defaults={'out_fmt' : 'non_rdf'},
         methods=['GET'])
         methods=['GET'])
 def get_resource(uid, out_fmt=None):
 def get_resource(uid, out_fmt=None):
-    '''
+    r"""
     https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
     https://www.w3.org/TR/ldp/#ldpr-HTTP_GET
 
 
     Retrieve RDF or binary content.
     Retrieve RDF or binary content.
 
 
-    @param uid (string) UID of resource to retrieve. The repository root has
-    an empty string for UID.
-    @param out_fmt (string) Force output to RDF or non-RDF if the resource is
-    a LDP-NR. This is not available in the API but is used e.g. by the
-    `*/fcr:metadata` and `*/fcr:content` endpoints. The default is False.
-    '''
+    :param str uid: UID of resource to retrieve. The repository root has
+        an empty string for UID.
+    :param str out_fmt: Force output to RDF or non-RDF if the resource is
+        a LDP-NR. This is not available in the API but is used e.g. by the
+        ``\*/fcr:metadata`` and ``\*/fcr:content`` endpoints. The default is
+        False.
+    """
     logger.info('UID: {}'.format(uid))
     logger.info('UID: {}'.format(uid))
     out_headers = std_headers
     out_headers = std_headers
     repr_options = defaultdict(dict)
     repr_options = defaultdict(dict)
@@ -169,9 +170,11 @@ def get_resource(uid, out_fmt=None):
 
 
 @ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
 @ldp.route('/<path:uid>/fcr:versions', methods=['GET'])
 def get_version_info(uid):
 def get_version_info(uid):
-    '''
+    """
     Get version info (`fcr:versions`).
     Get version info (`fcr:versions`).
-    '''
+
+    :param str uid: UID of resource to retrieve versions for.
+    """
     try:
     try:
         gr = rsrc_api.get_version_info(uid)
         gr = rsrc_api.get_version_info(uid)
     except ResourceNotExistsError as e:
     except ResourceNotExistsError as e:
@@ -186,12 +189,12 @@ def get_version_info(uid):
 
 
 @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
 @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['GET'])
 def get_version(uid, ver_uid):
 def get_version(uid, ver_uid):
-    '''
+    """
     Get an individual resource version.
     Get an individual resource version.
 
 
-    @param uid (string) Resource UID.
-    @param ver_uid (string) Version UID.
-    '''
+    :param str uid: Resource UID.
+    :param str ver_uid: Version UID.
+    """
     try:
     try:
         gr = rsrc_api.get_version(uid, ver_uid)
         gr = rsrc_api.get_version(uid, ver_uid)
     except ResourceNotExistsError as e:
     except ResourceNotExistsError as e:
@@ -208,11 +211,11 @@ def get_version(uid, ver_uid):
 @ldp.route('/', defaults={'parent_uid': '/'}, methods=['POST'],
 @ldp.route('/', defaults={'parent_uid': '/'}, methods=['POST'],
         strict_slashes=False)
         strict_slashes=False)
 def post_resource(parent_uid):
 def post_resource(parent_uid):
-    '''
+    """
     https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
     https://www.w3.org/TR/ldp/#ldpr-HTTP_POST
 
 
     Add a new resource in a new URI.
     Add a new resource in a new URI.
-    '''
+    """
     out_headers = std_headers
     out_headers = std_headers
     try:
     try:
         slug = request.headers['Slug']
         slug = request.headers['Slug']
@@ -261,11 +264,11 @@ def post_resource(parent_uid):
 @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
 @ldp.route('/<path:uid>/fcr:metadata', defaults={'force_rdf' : True},
         methods=['PUT'])
         methods=['PUT'])
 def put_resource(uid):
 def put_resource(uid):
-    '''
+    """
     https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
     https://www.w3.org/TR/ldp/#ldpr-HTTP_PUT
 
 
     Add or replace a new resource at a specified URI.
     Add or replace a new resource at a specified URI.
-    '''
+    """
     # Parse headers.
     # Parse headers.
     logger.debug('Request headers: {}'.format(request.headers))
     logger.debug('Request headers: {}'.format(request.headers))
 
 
@@ -310,11 +313,11 @@ def put_resource(uid):
 
 
 @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
 @ldp.route('/<path:uid>', methods=['PATCH'], strict_slashes=False)
 def patch_resource(uid, is_metadata=False):
 def patch_resource(uid, is_metadata=False):
-    '''
+    """
     https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
     https://www.w3.org/TR/ldp/#ldpr-HTTP_PATCH
 
 
     Update an existing resource with a SPARQL-UPDATE payload.
     Update an existing resource with a SPARQL-UPDATE payload.
-    '''
+    """
     rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
     rsp_headers = {'Content-Type' : 'text/plain; charset=utf-8'}
     if request.mimetype != 'application/sparql-update':
     if request.mimetype != 'application/sparql-update':
         return 'Provided content type is not a valid parsable format: {}'\
         return 'Provided content type is not a valid parsable format: {}'\
@@ -344,7 +347,7 @@ def patch_resource_metadata(uid):
 
 
 @ldp.route('/<path:uid>', methods=['DELETE'])
 @ldp.route('/<path:uid>', methods=['DELETE'])
 def delete_resource(uid):
 def delete_resource(uid):
-    '''
+    """
     Delete a resource and optionally leave a tombstone.
     Delete a resource and optionally leave a tombstone.
 
 
     This behaves differently from FCREPO. A tombstone indicated that the
     This behaves differently from FCREPO. A tombstone indicated that the
@@ -353,9 +356,9 @@ def delete_resource(uid):
     one more version snapshot of the resource prior to being deleted.
     one more version snapshot of the resource prior to being deleted.
 
 
     In order to completely wipe out all traces of a resource, the tombstone
     In order to completely wipe out all traces of a resource, the tombstone
-    must be deleted as well, or the `Prefer:no-tombstone` header can be used.
+    must be deleted as well, or the ``Prefer:no-tombstone`` header can be used.
     The latter will forget (completely delete) the resource immediately.
     The latter will forget (completely delete) the resource immediately.
-    '''
+    """
     headers = std_headers
     headers = std_headers
 
 
     if 'prefer' in request.headers:
     if 'prefer' in request.headers:
@@ -377,12 +380,12 @@ def delete_resource(uid):
 @ldp.route('/<path:uid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
 @ldp.route('/<path:uid>/fcr:tombstone', methods=['GET', 'POST', 'PUT',
         'PATCH', 'DELETE'])
         'PATCH', 'DELETE'])
 def tombstone(uid):
 def tombstone(uid):
-    '''
+    """
     Handle all tombstone operations.
     Handle all tombstone operations.
 
 
     The only allowed methods are POST and DELETE; any other verb will return a
     The only allowed methods are POST and DELETE; any other verb will return a
     405.
     405.
-    '''
+    """
     try:
     try:
         rsrc = rsrc_api.get(uid)
         rsrc = rsrc_api.get(uid)
     except TombstoneError as e:
     except TombstoneError as e:
@@ -409,9 +412,9 @@ def tombstone(uid):
 
 
 @ldp.route('/<path:uid>/fcr:versions', methods=['POST', 'PUT'])
 @ldp.route('/<path:uid>/fcr:versions', methods=['POST', 'PUT'])
 def post_version(uid):
 def post_version(uid):
-    '''
+    """
     Create a new resource version.
     Create a new resource version.
-    '''
+    """
     if request.method == 'PUT':
     if request.method == 'PUT':
         return 'Method not allowed.', 405
         return 'Method not allowed.', 405
     ver_uid = request.headers.get('slug', None)
     ver_uid = request.headers.get('slug', None)
@@ -430,14 +433,14 @@ def post_version(uid):
 
 
 @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['PATCH'])
 @ldp.route('/<path:uid>/fcr:versions/<ver_uid>', methods=['PATCH'])
 def patch_version(uid, ver_uid):
 def patch_version(uid, ver_uid):
-    '''
+    """
     Revert to a previous version.
     Revert to a previous version.
 
 
     NOTE: This creates a new version snapshot.
     NOTE: This creates a new version snapshot.
 
 
-    @param uid (string) Resource UID.
-    @param ver_uid (string) Version UID.
-    '''
+    :param str uid: Resource UID.
+    :param str ver_uid: Version UID.
+    """
     try:
     try:
         LdpFactory.from_stored(uid).revert_to_version(ver_uid)
         LdpFactory.from_stored(uid).revert_to_version(ver_uid)
     except ResourceNotExistsError as e:
     except ResourceNotExistsError as e:
@@ -453,9 +456,9 @@ def patch_version(uid, ver_uid):
 ## PRIVATE METHODS ##
 ## PRIVATE METHODS ##
 
 
 def _negotiate_content(gr, headers=None, **vw_kwargs):
 def _negotiate_content(gr, headers=None, **vw_kwargs):
-    '''
+    """
     Return HTML or serialized RDF depending on accept headers.
     Return HTML or serialized RDF depending on accept headers.
-    '''
+    """
     if request.accept_mimetypes.best == 'text/html':
     if request.accept_mimetypes.best == 'text/html':
         return render_template(
         return render_template(
                 'resource.html', gr=gr, nsc=nsc, nsm=nsm,
                 'resource.html', gr=gr, nsc=nsc, nsm=nsm,
@@ -467,9 +470,9 @@ def _negotiate_content(gr, headers=None, **vw_kwargs):
 
 
 
 
 def _bistream_from_req():
 def _bistream_from_req():
-    '''
+    """
     Find how a binary file and its MIMEtype were uploaded in the request.
     Find how a binary file and its MIMEtype were uploaded in the request.
-    '''
+    """
     #logger.debug('Content type: {}'.format(request.mimetype))
     #logger.debug('Content type: {}'.format(request.mimetype))
     #logger.debug('files: {}'.format(request.files))
     #logger.debug('files: {}'.format(request.files))
     #logger.debug('stream: {}'.format(request.stream))
     #logger.debug('stream: {}'.format(request.stream))
@@ -508,9 +511,9 @@ def _tombstone_response(e, uid):
 
 
 
 
 def set_post_put_params():
 def set_post_put_params():
-    '''
+    """
     Sets handling and content disposition for POST and PUT by parsing headers.
     Sets handling and content disposition for POST and PUT by parsing headers.
-    '''
+    """
     handling = 'strict'
     handling = 'strict'
     if 'prefer' in request.headers:
     if 'prefer' in request.headers:
         prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
         prefer = g.tbox.parse_rfc7240(request.headers['prefer'])
@@ -528,10 +531,10 @@ def set_post_put_params():
 
 
 
 
 def is_accept_hdr_rdf_parsable():
 def is_accept_hdr_rdf_parsable():
-    '''
+    """
     Check if any of the 'Accept' header values provided is a RDF parsable
     Check if any of the 'Accept' header values provided is a RDF parsable
     format.
     format.
-    '''
+    """
     for mimetype in request.accept_mimetypes.values():
     for mimetype in request.accept_mimetypes.values():
         if LdpFactory.is_rdf_parsable(mimetype):
         if LdpFactory.is_rdf_parsable(mimetype):
             return True
             return True
@@ -539,14 +542,14 @@ def is_accept_hdr_rdf_parsable():
 
 
 
 
 def parse_repr_options(retr_opts):
 def parse_repr_options(retr_opts):
-    '''
+    """
     Set options to retrieve IMR.
     Set options to retrieve IMR.
 
 
     Ideally, IMR retrieval is done once per request, so all the options
     Ideally, IMR retrieval is done once per request, so all the options
     are set once in the `imr()` property.
     are set once in the `imr()` property.
 
 
-    @param retr_opts (dict): Options parsed from `Prefer` header.
-    '''
+    :param dict retr_opts:: Options parsed from `Prefer` header.
+    """
     logger.debug('Parsing retrieval options: {}'.format(retr_opts))
     logger.debug('Parsing retrieval options: {}'.format(retr_opts))
     imr_options = {}
     imr_options = {}
 
 
@@ -591,12 +594,12 @@ def parse_repr_options(retr_opts):
 
 
 
 
 def _headers_from_metadata(rsrc):
 def _headers_from_metadata(rsrc):
-    '''
+    """
     Create a dict of headers from a metadata graph.
     Create a dict of headers from a metadata graph.
 
 
-    @param rsrc (lakesuperior.model.ldpr.Ldpr) Resource to extract metadata
-    from.
-    '''
+    :param lakesuperior.model.ldpr.Ldpr rsrc: Resource to extract metadata
+        from.
+    """
     out_headers = defaultdict(list)
     out_headers = defaultdict(list)
 
 
     digest = rsrc.metadata.value(nsc['premis'].hasMessageDigest)
     digest = rsrc.metadata.value(nsc['premis'].hasMessageDigest)

+ 3 - 7
lakesuperior/endpoints/main.py

@@ -7,23 +7,19 @@ logger = logging.getLogger(__name__)
 # Blueprint for main pages. Not much here.
 # Blueprint for main pages. Not much here.
 
 
 main = Blueprint('main', __name__, template_folder='templates',
 main = Blueprint('main', __name__, template_folder='templates',
-        static_folder='../../static')
+        static_folder='templates/static')
 
 
 ## GENERIC ROUTES ##
 ## GENERIC ROUTES ##
 
 
 @main.route('/', methods=['GET'])
 @main.route('/', methods=['GET'])
 def index():
 def index():
-    '''
-    Homepage.
-    '''
+    """Homepage."""
     return render_template('index.html')
     return render_template('index.html')
 
 
 
 
 @main.route('/debug', methods=['GET'])
 @main.route('/debug', methods=['GET'])
 def debug():
 def debug():
-    '''
-    Debug page.
-    '''
+    """Debug page."""
     raise RuntimeError()
     raise RuntimeError()
 
 
 
 

+ 5 - 5
lakesuperior/endpoints/query.py

@@ -20,9 +20,9 @@ query = Blueprint('query', __name__)
 
 
 @query.route('/term_search', methods=['GET'])
 @query.route('/term_search', methods=['GET'])
 def term_search():
 def term_search():
-    '''
+    """
     Search by entering a search term and optional property and comparison term.
     Search by entering a search term and optional property and comparison term.
-    '''
+    """
     valid_operands = (
     valid_operands = (
         ('=', 'Equals'),
         ('=', 'Equals'),
         ('>', 'Greater Than'),
         ('>', 'Greater Than'),
@@ -40,11 +40,11 @@ def term_search():
 
 
 @query.route('/sparql', methods=['GET', 'POST'])
 @query.route('/sparql', methods=['GET', 'POST'])
 def sparql():
 def sparql():
-    '''
+    """
     Perform a direct SPARQL query on the underlying triplestore.
     Perform a direct SPARQL query on the underlying triplestore.
 
 
-    @param qry SPARQL query string.
-    '''
+    :param str qry: SPARQL query string.
+    """
     accept_mimetypes = {
     accept_mimetypes = {
         'text/csv': 'csv',
         'text/csv': 'csv',
         'application/sparql-results+json': 'json',
         'application/sparql-results+json': 'json',

+ 0 - 0
static/assets/css/bootstrap-theme.css → lakesuperior/endpoints/templates/static/assets/css/bootstrap-theme.css


+ 0 - 0
static/assets/css/bootstrap-theme.css.map → lakesuperior/endpoints/templates/static/assets/css/bootstrap-theme.css.map


+ 0 - 0
static/assets/css/bootstrap-theme.min.css → lakesuperior/endpoints/templates/static/assets/css/bootstrap-theme.min.css


+ 0 - 0
static/assets/css/bootstrap-theme.min.css.map → lakesuperior/endpoints/templates/static/assets/css/bootstrap-theme.min.css.map


+ 0 - 0
static/assets/css/bootstrap.css → lakesuperior/endpoints/templates/static/assets/css/bootstrap.css


+ 0 - 0
static/assets/css/bootstrap.css.map → lakesuperior/endpoints/templates/static/assets/css/bootstrap.css.map


+ 0 - 0
static/assets/css/bootstrap.min.css → lakesuperior/endpoints/templates/static/assets/css/bootstrap.min.css


+ 0 - 0
static/assets/css/bootstrap.min.css.map → lakesuperior/endpoints/templates/static/assets/css/bootstrap.min.css.map


+ 0 - 0
static/assets/css/yasgui.min.css → lakesuperior/endpoints/templates/static/assets/css/yasgui.min.css


+ 0 - 0
static/assets/fonts/glyphicons-halflings-regular.eot → lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.eot


+ 0 - 0
static/assets/fonts/glyphicons-halflings-regular.svg → lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.svg


+ 0 - 0
static/assets/fonts/glyphicons-halflings-regular.ttf → lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.ttf


+ 0 - 0
static/assets/fonts/glyphicons-halflings-regular.woff → lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.woff


+ 0 - 0
static/assets/fonts/glyphicons-halflings-regular.woff2 → lakesuperior/endpoints/templates/static/assets/fonts/glyphicons-halflings-regular.woff2


+ 0 - 0
static/assets/js/bootstrap.js → lakesuperior/endpoints/templates/static/assets/js/bootstrap.js


+ 0 - 0
static/assets/js/bootstrap.min.js → lakesuperior/endpoints/templates/static/assets/js/bootstrap.min.js


+ 0 - 0
static/assets/js/jquery-3.2.1.min.js → lakesuperior/endpoints/templates/static/assets/js/jquery-3.2.1.min.js


+ 0 - 0
static/assets/js/npm.js → lakesuperior/endpoints/templates/static/assets/js/npm.js


+ 0 - 0
static/assets/js/yasgui.min.js → lakesuperior/endpoints/templates/static/assets/js/yasgui.min.js


+ 10 - 3
lakesuperior/env_setup.py

@@ -2,8 +2,15 @@ from lakesuperior.config_parser import config
 from lakesuperior.globals import AppGlobals
 from lakesuperior.globals import AppGlobals
 from lakesuperior.env import env
 from lakesuperior.env import env
 
 
-'''
-Import this module to initialize the configuration for a production setup.
-'''
+__doc__="""
+Default configuration.
+
+Import this module to initialize the configuration for a production setup::
+
+    >>>from lakesuperior import env_setup
+
+Will load the default configuration.
+"""
+
 env.config = config
 env.config = config
 env.app_globals = AppGlobals(config)
 env.app_globals = AppGlobals(config)

+ 59 - 9
lakesuperior/globals.py

@@ -5,25 +5,48 @@ from importlib import import_module
 
 
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 from lakesuperior.dictionaries.namespaces import ns_collection as nsc
 
 
-'''
-Constants used in messaging to identify an event type.
-'''
 RES_CREATED = '_create_'
 RES_CREATED = '_create_'
+"""A resource was created."""
 RES_DELETED = '_delete_'
 RES_DELETED = '_delete_'
+"""A resource was deleted."""
 RES_UPDATED = '_update_'
 RES_UPDATED = '_update_'
+"""A resource was updated."""
 
 
 ROOT_UID = '/'
 ROOT_UID = '/'
+"""Root node UID."""
 ROOT_RSRC_URI = nsc['fcres'][ROOT_UID]
 ROOT_RSRC_URI = nsc['fcres'][ROOT_UID]
+"""Internal URI of root resource."""
 
 
 
 
 class AppGlobals:
 class AppGlobals:
-    '''
+    """
     Application Globals.
     Application Globals.
 
 
-    This class sets up all connections and exposes them across the application
-    outside of the Flask app context.
-    '''
+    This class is instantiated and used as a carrier for all connections and
+    various global variables outside of the Flask app context.
+
+    The variables are set on initialization by passing a configuration dict.
+    Usually this is done when starting an application. The instance with the
+    loaded variables is then assigned to the :data:`lakesuperior.env.env`
+    global variable.
+
+    You can either load the default configuration::
+
+        >>>from lakesuperior import env_setup
+
+    Or set up an environment with a custom configuration::
+
+        >>>from lakesuperior.env import env
+        >>>from lakesuperior.app_globals import AppGlobals
+        >>>my_config = {'name': 'value', '...': '...'}
+        >>>env.config = my_config
+        >>>env.app_globals = AppGlobals(my_config)
+
+    """
     def __init__(self, conf):
     def __init__(self, conf):
+        """
+        Generate global variables from configuration.
+        """
         from lakesuperior.messaging.messenger import Messenger
         from lakesuperior.messaging.messenger import Messenger
 
 
         app_conf = conf['application']
         app_conf = conf['application']
@@ -53,18 +76,45 @@ class AppGlobals:
 
 
     @property
     @property
     def rdfly(self):
     def rdfly(self):
+        """
+        Current RDF layout.
+
+        This is an instance of
+        :class:`~lakesuperior.store.ldp_rs.rsrc_centric_layout.RsrcCentricLayout`.
+
+        *TODO:* Update class reference when interface will be separated from
+        implementation.
+        """
         return self._rdfly
         return self._rdfly
 
 
     @property
     @property
     def rdf_store(self):
     def rdf_store(self):
+        """
+        Current RDF low-level store.
+
+        This is an instance of
+        :class:`~lakesuperior.store.ldp_rs.lmdb_store.LmdbStore`.
+        """
         return self._rdfly.store
         return self._rdfly.store
 
 
     @property
     @property
     def nonrdfly(self):
     def nonrdfly(self):
         return self._nonrdfly
         return self._nonrdfly
+        """
+        Current non-RDF (binary contents) layout.
+
+        This is an instance of
+        :class:`~lakesuperior.store.ldp_nr.base_non_rdf_layout.BaseNonRdfLayout`.
+        """
 
 
     @property
     @property
     def messenger(self):
     def messenger(self):
+        """
+        Current message handler.
+
+        This is an instance of
+        :class:`~lakesuperior.messaging.messenger.Messenger`.
+        """
         return self._messenger
         return self._messenger
 
 
     @property
     @property
@@ -73,9 +123,9 @@ class AppGlobals:
 
 
 
 
     def camelcase(self, word):
     def camelcase(self, word):
-        '''
+        """
         Convert a string with underscores to a camel-cased one.
         Convert a string with underscores to a camel-cased one.
 
 
         Ripped from https://stackoverflow.com/a/6425628
         Ripped from https://stackoverflow.com/a/6425628
-        '''
+        """
         return ''.join(x.capitalize() or '_' for x in word.split('_'))
         return ''.join(x.capitalize() or '_' for x in word.split('_'))

+ 0 - 1
lsup-admin → lakesuperior/lsup_admin.py

@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 import click
 import click
 import click_log
 import click_log
 import json
 import json

+ 0 - 0
lakesuperior/messaging/__init__.py


+ 14 - 18
lakesuperior/messaging/formatters.py

@@ -8,12 +8,12 @@ from lakesuperior.globals import RES_CREATED, RES_DELETED, RES_UPDATED
 
 
 
 
 class BaseASFormatter(metaclass=ABCMeta):
 class BaseASFormatter(metaclass=ABCMeta):
-    '''
+    """
     Format message as ActivityStreams.
     Format message as ActivityStreams.
 
 
     This is not really a `logging.Formatter` subclass, but a plain string
     This is not really a `logging.Formatter` subclass, but a plain string
     builder.
     builder.
-    '''
+    """
     ev_types = {
     ev_types = {
         RES_CREATED : 'Create',
         RES_CREATED : 'Create',
         RES_DELETED : 'Delete',
         RES_DELETED : 'Delete',
@@ -28,7 +28,7 @@ class BaseASFormatter(metaclass=ABCMeta):
 
 
     def __init__(
     def __init__(
             self, rsrc_uri, ev_type, timestamp, rsrc_type, actor, data=None):
             self, rsrc_uri, ev_type, timestamp, rsrc_type, actor, data=None):
-        '''
+        """
         Format output according to granularity level.
         Format output according to granularity level.
 
 
         NOTE: Granularity level does not refer to the logging levels, i.e.
         NOTE: Granularity level does not refer to the logging levels, i.e.
@@ -36,14 +36,14 @@ class BaseASFormatter(metaclass=ABCMeta):
         are logged under the same level. This it is rather about *what* gets
         are logged under the same level. This it is rather about *what* gets
         logged in a message.
         logged in a message.
 
 
-        @param rsrc_uri (rdflib.URIRef) URI of the resource.
-        @param ev_type (string) one of `create`, `delete` or `update`
-        @param timestamp (string) Timestamp of the event.
-        @param data (tuple(set)) if messaging is configured with `provenance`
+        :param rdflib.URIRef rsrc_uri: URI of the resource.
+        :param str ev_type: one of `create`, `delete` or `update`
+        :param str timestamp: Timestamp of the event.
+        :param  data: (tuple(set)) if messaging is configured with `provenance`
         level, this is a 2-tuple with one set (as 3-tuples of
         level, this is a 2-tuple with one set (as 3-tuples of
         RDFlib.Identifier instances) for removed triples, and one set for
         RDFlib.Identifier instances) for removed triples, and one set for
         added triples.
         added triples.
-        '''
+        """
         self.rsrc_uri = rsrc_uri
         self.rsrc_uri = rsrc_uri
         self.ev_type = ev_type
         self.ev_type = ev_type
         self.timestamp = timestamp
         self.timestamp = timestamp
@@ -59,15 +59,13 @@ class BaseASFormatter(metaclass=ABCMeta):
 
 
 
 
 class ASResourceFormatter(BaseASFormatter):
 class ASResourceFormatter(BaseASFormatter):
-    '''
+    """
     Sends information about a resource being created, updated or deleted, by
     Sends information about a resource being created, updated or deleted, by
     who and when, with no further information about what changed.
     who and when, with no further information about what changed.
-    '''
+    """
 
 
     def __str__(self):
     def __str__(self):
-        '''
-        Output structured data as string.
-        '''
+        """Output structured data as string."""
         ret = {
         ret = {
             '@context': 'https://www.w3.org/ns/activitystreams',
             '@context': 'https://www.w3.org/ns/activitystreams',
             'id' : 'urn:uuid:{}'.format(uuid.uuid4()),
             'id' : 'urn:uuid:{}'.format(uuid.uuid4()),
@@ -86,15 +84,13 @@ class ASResourceFormatter(BaseASFormatter):
 
 
 
 
 class ASDeltaFormatter(BaseASFormatter):
 class ASDeltaFormatter(BaseASFormatter):
-    '''
+    """
     Sends the same information as `ASResourceFormatter` with the addition of
     Sends the same information as `ASResourceFormatter` with the addition of
     the triples that were added and the ones that were removed in the request.
     the triples that were added and the ones that were removed in the request.
     This may be used to send rich provenance data to a preservation system.
     This may be used to send rich provenance data to a preservation system.
-    '''
+    """
     def __str__(self):
     def __str__(self):
-        '''
-        Output structured data as string.
-        '''
+        """Output structured data as string."""
         ret = {
         ret = {
             '@context': 'https://www.w3.org/ns/activitystreams',
             '@context': 'https://www.w3.org/ns/activitystreams',
             'id' : 'urn:uuid:{}'.format(uuid.uuid4()),
             'id' : 'urn:uuid:{}'.format(uuid.uuid4()),

+ 24 - 16
lakesuperior/messaging/handlers.py

@@ -3,14 +3,17 @@ import logging
 import stomp
 import stomp
 
 
 
 
+logger = logging.getLogger(__name__)
+
+
 class StompHandler(logging.Handler):
 class StompHandler(logging.Handler):
-    '''
+    """
     Send messages to a remote queue broker using the STOMP protocol.
     Send messages to a remote queue broker using the STOMP protocol.
 
 
     This module is named and configured separately from
     This module is named and configured separately from
     standard logging for clarity about its scope: while logging has an
     standard logging for clarity about its scope: while logging has an
     informational purpose, this module has a functional one.
     informational purpose, this module has a functional one.
-    '''
+    """
     def __init__(self, conf):
     def __init__(self, conf):
         self.conf = conf
         self.conf = conf
         if self.conf['protocol'] == '11':
         if self.conf['protocol'] == '11':
@@ -22,25 +25,30 @@ class StompHandler(logging.Handler):
 
 
         self.conn = conn_cls([(self.conf['host'], self.conf['port'])])
         self.conn = conn_cls([(self.conf['host'], self.conf['port'])])
         self.conn.start()
         self.conn.start()
-        self.conn.connect(
-            username=self.conf['username'],
-            passcode=self.conf['password'],
-            wait=True
-        )
+        try:
+            self.conn.connect(
+                username=self.conf['username'],
+                passcode=self.conf['password'],
+                wait=True
+            )
+        except stomp.exception.ConnectFailedException:
+            logger.warning(
+                    'Could not connect to the STOMP server. Your messages '
+                    'will be ditched.')
 
 
         return super().__init__()
         return super().__init__()
 
 
 
 
     def __del_(self):
     def __del_(self):
-        '''
-        Disconnect the client.
-        '''
-        self.conn.disconnect()
+        """Disconnect the client."""
+        if self.conn.is_connected():
+            self.conn.disconnect()
 
 
     def emit(self, record):
     def emit(self, record):
-        '''
-        Send the message to the destination endpoint.
-        '''
-        self.conn.send(destination=self.conf['destination'],
-                body=bytes(self.format(record), 'utf-8'))
+        """Send the message to the destination endpoint."""
+        if self.conn.is_connected():
+            self.conn.send(destination=self.conf['destination'],
+                    body=bytes(self.format(record), 'utf-8'))
+        else:
+            logger.warning('STOMP server not connected. Message dropped.')
 
 

+ 6 - 8
lakesuperior/messaging/messenger.py

@@ -7,15 +7,15 @@ messenger = logging.getLogger('_messenger')
 
 
 
 
 class Messenger:
 class Messenger:
-    '''
+    """
     Very simple message sender using the standard Python logging facility.
     Very simple message sender using the standard Python logging facility.
-    '''
+    """
     def __init__(self, config):
     def __init__(self, config):
-        '''
+        """
         Set up the messenger.
         Set up the messenger.
 
 
-        @param config (dict) Messenger configuration.
-        '''
+        :param dict config: Messenger configuration.
+        """
         def msg_routes():
         def msg_routes():
             for route in config['routes']:
             for route in config['routes']:
                 handler_cls = getattr(handlers, route['handler'])
                 handler_cls = getattr(handlers, route['handler'])
@@ -31,8 +31,6 @@ class Messenger:
 
 
 
 
     def send(self, *args, **kwargs):
     def send(self, *args, **kwargs):
-        '''
-        Send one or more external messages.
-        '''
+        """Send one or more external messages."""
         for msg, fn in self.msg_routes:
         for msg, fn in self.msg_routes:
             msg.info(fn(*args, **kwargs))
             msg.info(fn(*args, **kwargs))

+ 27 - 25
lakesuperior/migrator.py

@@ -48,24 +48,24 @@ class Migrator:
     regular intervals.
     regular intervals.
     """
     """
 
 
-    """
-    LMDB database parameters.
-
-    See :meth:`lmdb.Environment.__init__`
-    """
     db_params = {
     db_params = {
         'map_size': 1024 ** 4,
         'map_size': 1024 ** 4,
         'metasync': False,
         'metasync': False,
         'readahead': False,
         'readahead': False,
         'meminit': False,
         'meminit': False,
     }
     }
+    """
+    LMDB database parameters.
+
+    See :meth:`lmdb.Environment.__init__`
+    """
 
 
-    """List of predicates to ignore when looking for links."""
     ignored_preds = (
     ignored_preds = (
         nsc['fcrepo'].hasParent,
         nsc['fcrepo'].hasParent,
         nsc['fcrepo'].hasTransactionProvider,
         nsc['fcrepo'].hasTransactionProvider,
         nsc['fcrepo'].hasFixityService,
         nsc['fcrepo'].hasFixityService,
     )
     )
+    """List of predicates to ignore when looking for links."""
 
 
 
 
     def __init__(
     def __init__(
@@ -75,22 +75,23 @@ class Migrator:
         Set up base paths and clean up existing directories.
         Set up base paths and clean up existing directories.
 
 
         :param rdflib.URIRef src: Webroot of source repository. This must
         :param rdflib.URIRef src: Webroot of source repository. This must
-        correspond to the LDP root node (for Fedora it can be e.g.
-        ``http://localhost:8080fcrepo/rest/``) and is used to determine if URIs
-        retrieved are managed by this repository.
+            correspond to the LDP root node (for Fedora it can be e.g.
+            ``http://localhost:8080fcrepo/rest/``) and is used to determine if
+            URIs retrieved are managed by this repository.
         :param str dest: Destination repository path. If the location exists
         :param str dest: Destination repository path. If the location exists
-        it must be a writable directory. It will be deleted and recreated. If
-        it does not exist, it will be created along with its parents if
-        missing.
+            it must be a writable directory. It will be deleted and recreated.
+            If it does not exist, it will be created along with its parents if
+            missing.
         :param str binary_handling: One of ``include``, ``truncate`` or
         :param str binary_handling: One of ``include``, ``truncate`` or
-        ``split``.
+            ``split``.
         :param bool compact_uris: NOT IMPLEMENTED. Whether the process should
         :param bool compact_uris: NOT IMPLEMENTED. Whether the process should
-        attempt to compact URIs generated with broken up path segments. If the
-        UID matches a pattern such as `/12/34/56/123456...` it is converted to
-        `/123456...`. This would remove a lot of cruft caused by the pairtree
-        segments. Note that this will change the publicly exposed URIs. If
-        durability is a concern, a rewrite directive can be added to the HTTP
-        server that proxies the WSGI endpoint.
+            attempt to compact URIs generated with broken up path segments. If
+            the UID matches a pattern such as ``/12/34/56/123456...`` it is
+            converted to ``/123456...``. This would remove a lot of cruft
+            caused by the pairtree segments. Note that this will change the
+            publicly exposed URIs. If durability is a concern, a rewrite
+            directive can be added to the HTTP server that proxies the WSGI
+            endpoint.
         """
         """
         # Set up repo folder structure and copy default configuration to
         # Set up repo folder structure and copy default configuration to
         # destination file.
         # destination file.
@@ -137,11 +138,12 @@ class Migrator:
         data set contained in a folder from an LDP repository.
         data set contained in a folder from an LDP repository.
 
 
         :param start_pts: List of starting points to retrieve
         :param start_pts: List of starting points to retrieve
-        :type start_pts: tuple or list 
-        resources from. It would typically be the repository root in case of a
-        full dump or one or more resources in the repository for a partial one.
-        :param str listf_ile: path to a local file containing a list of URIs,
-        one per line.
+            resources from. It would typically be the repository root in case
+            of a full dump or one or more resources in the repository for a
+            partial one.
+        :type start_pts: tuple or list
+        :param str list_file: path to a local file containing a list of URIs,
+            one per line.
         """
         """
         from lakesuperior.api import resource as rsrc_api
         from lakesuperior.api import resource as rsrc_api
         self._ct = 0
         self._ct = 0
@@ -179,7 +181,7 @@ class Migrator:
         managed by the repository is encountered.
         managed by the repository is encountered.
 
 
         :param str uid: The path relative to the source server webroot
         :param str uid: The path relative to the source server webroot
-        pointing to the resource to crawl, effectively the resource UID.
+            pointing to the resource to crawl, effectively the resource UID.
         """
         """
         ibase = str(nsc['fcres'])
         ibase = str(nsc['fcres'])
         # Public URI of source repo.
         # Public URI of source repo.

+ 28 - 26
lakesuperior/model/ldp_factory.py

@@ -26,10 +26,10 @@ logger = logging.getLogger(__name__)
 
 
 
 
 class LdpFactory:
 class LdpFactory:
-    '''
+    """
     Generate LDP instances.
     Generate LDP instances.
     The instance classes are based on provided client data or on stored data.
     The instance classes are based on provided client data or on stored data.
-    '''
+    """
     @staticmethod
     @staticmethod
     def new_container(uid):
     def new_container(uid):
         if not uid.startswith('/') or uid == '/':
         if not uid.startswith('/') or uid == '/':
@@ -43,7 +43,7 @@ class LdpFactory:
 
 
     @staticmethod
     @staticmethod
     def from_stored(uid, repr_opts={}, **kwargs):
     def from_stored(uid, repr_opts={}, **kwargs):
-        '''
+        """
         Create an instance for retrieval purposes.
         Create an instance for retrieval purposes.
 
 
         This factory method creates and returns an instance of an LDPR subclass
         This factory method creates and returns an instance of an LDPR subclass
@@ -52,8 +52,8 @@ class LdpFactory:
 
 
         N.B. The resource must exist.
         N.B. The resource must exist.
 
 
-        @param uid UID of the instance.
-        '''
+        :param  uid: UID of the instance.
+        """
         #logger.info('Retrieving stored resource: {}'.format(uid))
         #logger.info('Retrieving stored resource: {}'.format(uid))
         imr_urn = nsc['fcres'][uid]
         imr_urn = nsc['fcres'][uid]
 
 
@@ -80,16 +80,17 @@ class LdpFactory:
     @staticmethod
     @staticmethod
     def from_provided(
     def from_provided(
             uid, mimetype=None, stream=None, provided_imr=None, **kwargs):
             uid, mimetype=None, stream=None, provided_imr=None, **kwargs):
-        '''
+        r"""
         Determine LDP type from request content.
         Determine LDP type from request content.
 
 
-        @param uid (string) UID of the resource to be created or updated.
-        @param mimetype (string) The provided content MIME type.
-        @param stream (IOStream | None) The provided data stream. This can be
-        RDF or non-RDF content, or None. In the latter case, an empty container
-        is created.
-        @param **kwargs Arguments passed to the LDP class constructor.
-        '''
+        :param str uid: UID of the resource to be created or updated.
+        :param str mimetype: The provided content MIME type.
+        :param stream: The provided data stream. This can be
+            RDF or non-RDF content, or None. In the latter case, an empty
+            container is created.
+        :type stream: IOStream or None
+        :param \*\*kwargs: Arguments passed to the LDP class constructor.
+        """
         uri = nsc['fcres'][uid]
         uri = nsc['fcres'][uid]
 
 
         if not stream and not mimetype:
         if not stream and not mimetype:
@@ -149,11 +150,11 @@ class LdpFactory:
 
 
     @staticmethod
     @staticmethod
     def is_rdf_parsable(mimetype):
     def is_rdf_parsable(mimetype):
-        '''
+        """
         Checks whether a MIME type support RDF parsing by a RDFLib plugin.
         Checks whether a MIME type support RDF parsing by a RDFLib plugin.
 
 
-        @param mimetype (string) MIME type to check.
-        '''
+        :param str mimetype: MIME type to check.
+        """
         try:
         try:
             plugin.get(mimetype, parser.Parser)
             plugin.get(mimetype, parser.Parser)
         except plugin.PluginException:
         except plugin.PluginException:
@@ -164,11 +165,11 @@ class LdpFactory:
 
 
     @staticmethod
     @staticmethod
     def is_rdf_serializable(mimetype):
     def is_rdf_serializable(mimetype):
-        '''
+        """
         Checks whether a MIME type support RDF serialization by a RDFLib plugin
         Checks whether a MIME type support RDF serialization by a RDFLib plugin
 
 
-        @param mimetype (string) MIME type to check.
-        '''
+        :param str mimetype: MIME type to check.
+        """
         try:
         try:
             plugin.get(mimetype, serializer.Serializer)
             plugin.get(mimetype, serializer.Serializer)
         except plugin.PluginException:
         except plugin.PluginException:
@@ -179,7 +180,7 @@ class LdpFactory:
 
 
     @staticmethod
     @staticmethod
     def mint_uid(parent_uid, path=None):
     def mint_uid(parent_uid, path=None):
-        '''
+        """
         Mint a new resource UID based on client directives.
         Mint a new resource UID based on client directives.
 
 
         This method takes a parent ID and a tentative path and returns an LDP
         This method takes a parent ID and a tentative path and returns an LDP
@@ -188,13 +189,14 @@ class LdpFactory:
         This may raise an exception resulting in a 404 if the parent is not
         This may raise an exception resulting in a 404 if the parent is not
         found or a 409 if the parent is not a valid container.
         found or a 409 if the parent is not a valid container.
 
 
-        @param parent_uid (string) UID of the parent resource. It must be an
-        existing LDPC.
-        @param path (string) path to the resource, relative to the parent.
+        :param str parent_uid: UID of the parent resource. It must be an
+            existing LDPC.
+        :param str path: path to the resource, relative to the parent.
 
 
-        @return string The confirmed resource UID. This may be different from
-        what has been indicated.
-        '''
+        :rtype: str
+        :return: The confirmed resource UID. This may be different from
+            what has been indicated.
+        """
         def split_if_legacy(uid):
         def split_if_legacy(uid):
             if config['application']['store']['ldp_rs']['legacy_ptree_split']:
             if config['application']['store']['ldp_rs']['legacy_ptree_split']:
                 uid = tbox.split_uuid(uid)
                 uid = tbox.split_uuid(uid)

+ 14 - 13
lakesuperior/model/ldp_nr.py

@@ -17,10 +17,10 @@ logger = logging.getLogger(__name__)
 
 
 
 
 class LdpNr(Ldpr):
 class LdpNr(Ldpr):
-    '''LDP-NR (Non-RDF Source).
+    """LDP-NR (Non-RDF Source).
 
 
     Definition: https://www.w3.org/TR/ldp/#ldpnr
     Definition: https://www.w3.org/TR/ldp/#ldpnr
-    '''
+    """
 
 
     base_types = {
     base_types = {
         nsc['fcrepo'].Binary,
         nsc['fcrepo'].Binary,
@@ -31,9 +31,9 @@ class LdpNr(Ldpr):
 
 
     def __init__(self, uuid, stream=None, mimetype=None,
     def __init__(self, uuid, stream=None, mimetype=None,
             disposition=None, **kwargs):
             disposition=None, **kwargs):
-        '''
+        """
         Extends Ldpr.__init__ by adding LDP-NR specific parameters.
         Extends Ldpr.__init__ by adding LDP-NR specific parameters.
-        '''
+        """
         super().__init__(uuid, **kwargs)
         super().__init__(uuid, **kwargs)
 
 
         self._imr_options = {}
         self._imr_options = {}
@@ -68,11 +68,12 @@ class LdpNr(Ldpr):
 
 
 
 
     def create_or_replace(self, create_only=False):
     def create_or_replace(self, create_only=False):
-        '''
+        """
         Create a new binary resource with a corresponding RDF representation.
         Create a new binary resource with a corresponding RDF representation.
 
 
-        @param file (Stream) A Stream resource representing the uploaded file.
-        '''
+        :param bool create_only: Whether the resource is being created or
+            updated.
+        """
         # Persist the stream.
         # Persist the stream.
         self.digest, self.size = nonrdfly.persist(self.stream)
         self.digest, self.size = nonrdfly.persist(self.stream)
 
 
@@ -91,14 +92,14 @@ class LdpNr(Ldpr):
     ## PROTECTED METHODS ##
     ## PROTECTED METHODS ##
 
 
     def _add_srv_mgd_triples(self, create=False):
     def _add_srv_mgd_triples(self, create=False):
-        '''
+        """
         Add all metadata for the RDF representation of the LDP-NR.
         Add all metadata for the RDF representation of the LDP-NR.
 
 
-        @param stream (BufferedIO) The uploaded data stream.
-        @param mimetype (string) MIME type of the uploaded file.
-        @param disposition (defaultdict) The `Content-Disposition` header
-        content, parsed through `parse_rfc7240`.
-        '''
+        :param BufferedIO stream: The uploaded data stream.
+        :param str mimetype: MIME type of the uploaded file.
+        :param defaultdict disposition: The ``Content-Disposition`` header
+            content, parsed through ``parse_rfc7240``.
+        """
         super()._add_srv_mgd_triples(create)
         super()._add_srv_mgd_triples(create)
 
 
         # File size.
         # File size.

+ 15 - 17
lakesuperior/model/ldp_rs.py

@@ -12,20 +12,21 @@ logger = logging.getLogger(__name__)
 
 
 
 
 class LdpRs(Ldpr):
 class LdpRs(Ldpr):
-    '''
+    """
     LDP-RS (LDP RDF source).
     LDP-RS (LDP RDF source).
 
 
     https://www.w3.org/TR/ldp/#ldprs
     https://www.w3.org/TR/ldp/#ldprs
-    '''
+    """
     def __init__(self, uuid, repr_opts={}, handling='lenient', **kwargs):
     def __init__(self, uuid, repr_opts={}, handling='lenient', **kwargs):
-        '''
-        Extends Ldpr.__init__ by adding LDP-RS specific parameters.
-
-        @param handling (string) One of `strict`, `lenient` (the default) or
-        `none`. `strict` raises an error if a server-managed term is in the
-        graph. `lenient` removes all sever-managed triples encountered. `none`
-        skips all server-managed checks. It is used for internal modifications.
-        '''
+        """
+        Extends :meth:`Ldpr.__init__`by adding LDP-RS specific parameters.
+
+        :param str handling: One of ``strict``, ``lenient`` (the default) or
+        ``none``. ``strict`` raises an error if a server-managed term is in the
+        graph. ``lenient`` removes all sever-managed triples encountered.
+        ``none`` skips all server-managed checks. It is used for internal
+        modifications.
+        """
         super().__init__(uuid, **kwargs)
         super().__init__(uuid, **kwargs)
         self.base_types = super().base_types | {
         self.base_types = super().base_types | {
             nsc['fcrepo'].Container,
             nsc['fcrepo'].Container,
@@ -44,8 +45,7 @@ class LdpRs(Ldpr):
 
 
 
 
 class Ldpc(LdpRs):
 class Ldpc(LdpRs):
-    '''LDPC (LDP Container).'''
-
+    """LDPC (LDP Container)."""
     def __init__(self, uuid, *args, **kwargs):
     def __init__(self, uuid, *args, **kwargs):
         super().__init__(uuid, *args, **kwargs)
         super().__init__(uuid, *args, **kwargs)
         self.base_types |= {
         self.base_types |= {
@@ -56,7 +56,7 @@ class Ldpc(LdpRs):
 
 
 
 
 class LdpBc(Ldpc):
 class LdpBc(Ldpc):
-    '''LDP-BC (LDP Basic Container).'''
+    """LDP-BC (LDP Basic Container)."""
     def __init__(self, uuid, *args, **kwargs):
     def __init__(self, uuid, *args, **kwargs):
         super().__init__(uuid, *args, **kwargs)
         super().__init__(uuid, *args, **kwargs)
         self.base_types |= {
         self.base_types |= {
@@ -66,8 +66,7 @@ class LdpBc(Ldpc):
 
 
 
 
 class LdpDc(Ldpc):
 class LdpDc(Ldpc):
-    '''LDP-DC (LDP Direct Container).'''
-
+    """LDP-DC (LDP Direct Container)."""
     def __init__(self, uuid, *args, **kwargs):
     def __init__(self, uuid, *args, **kwargs):
         super().__init__(uuid, *args, **kwargs)
         super().__init__(uuid, *args, **kwargs)
         self.base_types |= {
         self.base_types |= {
@@ -77,8 +76,7 @@ class LdpDc(Ldpc):
 
 
 
 
 class LdpIc(Ldpc):
 class LdpIc(Ldpc):
-    '''LDP-IC (LDP Indirect Container).'''
-
+    """LDP-IC (LDP Indirect Container)."""
     def __init__(self, uuid, *args, **kwargs):
     def __init__(self, uuid, *args, **kwargs):
         super().__init__(uuid, *args, **kwargs)
         super().__init__(uuid, *args, **kwargs)
         self.base_types |= {
         self.base_types |= {

Some files were not shown because too many files changed in this diff