Browse Source

Add Dockerfile and support files (#13)

* Add Dockerfile and support files

* Move to /usr/local and provide instructions for running under Docker

* Prioritize Docker install instructions.
Michael B. Klein 6 years ago
parent
commit
561784bf08
6 changed files with 191 additions and 2 deletions
  1. 12 0
      Dockerfile
  2. 22 2
      README.md
  3. 12 0
      docker-compose.yml
  4. 9 0
      docker/docker_entrypoint
  5. 105 0
      docker/etc/application.yml
  6. 31 0
      docker/etc/gunicorn.py

+ 12 - 0
Dockerfile

@@ -0,0 +1,12 @@
+FROM        python:3.5
+MAINTAINER  Michael B. Klein <michael.klein@northwestern.edu>
+RUN         mkdir -p /usr/local /data
+WORKDIR     /usr/local
+ADD         . lakesuperior
+WORKDIR     /usr/local/lakesuperior
+RUN         cp ./docker/etc/* ./etc.defaults/
+RUN         pip install -r requirements.txt
+CMD         ./docker/docker_entrypoint
+EXPOSE      8000
+HEALTHCHECK --interval=30s --timeout=5s \
+  CMD curl -X OPTIONS -f http://localhost:8000/ || exit 1

+ 22 - 2
README.md

@@ -57,10 +57,30 @@ a full-fledged triplestore at the moment.
 In its current status, LAKEsuperior is aimed at developers and
 hands-on managers who are interested in evaluating this project.
 
-## Installation
+## Quick Install: Running in Docker
+
+You can run LAKEsuperior in Docker for a hands-off quickstart.
+
+[Docker](http://docker.com/) is a containerization platform that allows you to run
+services in lightweight virtual machine environments without having to worry about
+installing all of the prerequisites on your host machine.
+
+1. Install the correct [Docker Community Edition](https://www.docker.com/community-edition)
+   for your operating system.
+1. Clone this repo: `git clone https://github.com/scossu/lakesuperior.git`
+1. `cd` into repo folder
+1. Run `docker-compose up`
+
+LAKEsuperior should now be available at `http://localhost:8000/`.
+
+The provided Docker configuration includes persistent storage as a self-container Docker
+volume, meaning your data will persist between runs. If you want to clear the decks,
+simply run `docker-compose down -v`.
+
+## Manual Install (a bit less quick, a bit more power)
 
 **Note:** These instructions have been tested on Linux. They may work on Darwin
-with little or no modification, and possibly on Windows with some
+with little modification, and possibly on Windows with some
 modifications. Feedback is welcome.
 
 ### Dependencies

+ 12 - 0
docker-compose.yml

@@ -0,0 +1,12 @@
+version: '2'
+
+volumes:
+  lakesuperior:
+
+services:
+  lakesuperior:
+    build: .
+    volumes:
+      - lakesuperior:/data
+    ports:
+      - "8000:8000"

+ 9 - 0
docker/docker_entrypoint

@@ -0,0 +1,9 @@
+#!/bin/sh
+
+mkdir -p /data/log /data/run /data/bootstrap
+cp ./data/bootstrap/* /data/bootstrap
+coilmq &
+if [ ! -d /data/ldpnr_store ] && [ ! -d /data/ldprs_store ]; then
+  echo yes | ./lsup-admin bootstrap
+fi
+exec ./fcrepo

+ 105 - 0
docker/etc/application.yml

@@ -0,0 +1,105 @@
+# Default repository configuration.
+#
+# Copy this file or parts of it in a separate location to override these
+# settings. Individual items can be selectively overridden as long as the YAML
+# hierarchical structure is kept.
+
+# Configuration for binary path and fixity check generation. The hash is a
+# checksumn of the contents of the file.
+uuid:
+    # Algorithm used to calculate the hash that generates the content path.
+    # One of: sha1, sha224, sha256, sha384, or sha512, corresponding to the
+    # omonymous hashlib function:
+    # https://docs.python.org/3/library/hashlib.html
+    algo: sha1
+
+# Data store configuration.
+store:
+    # The semantic store used for persisting LDP-RS (RDF Source) resources.
+    # MUST support SPARQL 1.1 query and update.
+    ldp_rs:
+        # Directory where the RDF data files are stored.
+        location: /data/ldprs_store
+
+        # store layout. At the moment, only `rsrc_centric_layout`is supported.
+        layout: rsrc_centric_layout
+
+        # whether to check if the object of a client-provided triple is the uri
+        # of a repository-managed resource and veify if that exists.
+        # if set to false, properties are allowed to point to resources in the
+        # repositoy that do not exist. also, if a resource is deleted, inbound
+        # relationships may not be cleaned up.
+        # this can be one of `False` (boolean), `lenient` or `strict`. `False`
+        # does not check for referential integrity. `lenient` quietly drops a
+        # user-provided triple if its object violates referential integrity.
+        # `strict` raises an exception.
+        #
+        # Changes to this parameter require a full migration.
+        referential_integrity: lenient
+
+        # this mimics Fedora4 behavior which segments an identifier on POST.
+        legacy_ptree_split: False
+
+    # The path used to persist LDP-NR (bitstreams).
+    # This is for now a POSIX filesystem. Other solutions such as HDFS may be
+    # possible in the future.
+    ldp_nr:
+        # See store.ldp_rs.layout.
+        layout: default_layout
+
+        # The filesystem path to the root of the binary store.
+        path: /data/ldpnr_store
+
+        # How to split the balanced pairtree to generate a path. The hash
+        # string is defined by the uuid.algo parameter value.
+        # This parameter defines how many characters are in each branch. 2-4 is
+        # the recommended setting. NOTE: a value of 2 will generate up to 256
+        # sub-folders in a folder; 3 will generate max. 4096 and 4 will
+        # generate max. 65536. Check your filesystem capabilities before
+        # setting this to a non-default value.
+        #
+        # Changes to this parameter require a full migration.
+        pairtree_branch_length: 2
+
+        # Max. number of branches to generate. 0 will split the string until
+        # it reaches the end.
+        # E.g. if the hash value is 0123456789abcdef01234565789abcdef and the
+        # branch length value is 2, and the branch number is 4, the path will
+        # be 01/23/45/67/89abcdef01234565789abcdef. For a value of 0 it will be
+        # 01/23/45/67/89/ab/cd/ef/01/23/45/67/89/ab/cd/ef. Be aware that deeply
+        # nested directory structures may tax some of the operating system's
+        # services that scan for files, such as `updatedb`. Check your system
+        # capabilities for maximum nested directories before changing the
+        # default.
+        #
+        # Changes to this parameter require a full migration.
+        pairtree_branches: 4
+
+# Configuration for messaging.
+messaging:
+    # List of channels to send messages to.
+    # Each channel must define the `endpoint` and the `level` parameters.
+    routes:
+          # Output handler. Currently only `StompHandler` is supported.
+        - handler: StompHandler
+
+          # Whether this route is used. It False, no messages will be emitted
+          # for this route.
+          active: True
+
+          host: 127.0.0.1
+          port: 61613
+          username:
+          password:
+          protocol: '12'
+          destination: '/topic/fcrepo'
+
+          # Message format: at the moment the following are supported:
+          # - `ASResourceFormatter`: Sends information about a resource being
+          #   created, updated or deleted, by who and when, with no further
+          #   information about what changed.
+          # - `ASDeltaFormatter`: Sends the same information as
+          #   `ASResourceFormatter` with the addition of the triples that were
+          #   added and the ones that were removed in the request. This may be
+          #   used to send rich provenance data to a preservation system.
+          formatter: ASResourceFormatter

+ 31 - 0
docker/etc/gunicorn.py

@@ -0,0 +1,31 @@
+# See: http://docs.gunicorn.org/en/stable/settings.html
+
+# Directory where to store logs, PIDfile, etc.
+_data_dir = '/data/'
+
+# Set app_mode to either 'prod', 'test' or 'dev'.
+# 'prod' is normal running mode. 'test' is used for running test suites.
+# 'dev' is similar to normal mode but with reload and debug enabled.
+_app_mode = 'dev'
+
+
+bind = "0.0.0.0:8000"
+workers = 4
+worker_class = 'gevent'
+max_requests = 512
+
+#user = "user"
+#group = "group"
+
+raw_env = 'APP_MODE={}'.format(_app_mode)
+
+# Set this to the directory containing logs, etc.
+# The path must end with a slash.
+#chdir = "/usr/local/lakesuperior/"
+
+daemon = _app_mode=='prod'
+pidfile = _data_dir + "run/fcrepo.pid"
+reload = _app_mode=='dev'
+
+accesslog = _data_dir + "log/gunicorn-access.log"
+errorlog = _data_dir + "log/gunicorn-error.log"