3 Commits da1a38820d ... 85b53cd98b

Author SHA1 Message Date
  scossu 85b53cd98b Add Quickstart in README. 5 days ago
  scossu ebc5e31dde Watcher: use logger to write to file. 5 days ago
  scossu 6db9b4a36f Dump archive RDF; fix volksdata binding errors. 5 days ago
9 changed files with 186 additions and 32 deletions
  1. 89 0
      README.md
  2. 8 6
      doc/roadmap.md
  3. 4 4
      src/core.lua
  4. 15 8
      src/generator.lua
  5. 22 5
      src/repo.lua
  6. 1 1
      src/submission.lua
  7. 1 1
      src/transformers.lua
  8. 22 2
      src/util/pkar.lua
  9. 24 5
      src/util/watcher.lua

+ 89 - 0
README.md

@@ -52,6 +52,95 @@ the place one day, they could be periodically harvested, linked together, and
 presented in one large, central archive (it's Linked Data, after all), without
 presented in one large, central archive (it's Linked Data, after all), without
 any detriment to the indepencence of the individual archives.
 any detriment to the indepencence of the individual archives.
 
 
+## Quickstart
+
+This has been tested on Linux only. It's not guaranteed to work on other
+systems at the moment.
+
+### System prerequisites
+
+- A build environment (at least Git, libc, a C compiler, and Make)
+- UUID library (`uuid/uuid.h` - util-linux or linux-headers in most distros)
+- xxhash development package
+- lmdb development package
+- libvips development package
+- Lua 5.4 development package (lua-dev in some distros)
+- Luarocks 5.4
+
+If using Arch Linux:
+
+```
+pacman -Syu
+pacman -S base-devel util-linux-libs git xxhash lmdb libvips lua luarocks
+```
+
+### Install Volksdata & Pocket Archive
+
+Pocket Archive and Volksdata are still alpha and not in the Luarocks artifact
+repo yet, so the rocks must be installed manually for the time being.
+
+Installing locally or in a dedicated container is strongly recommended at this
+stage.
+
+```
+# Note: tested on Archlinux. Other distros (especially Alpine) may need tweaks.
+eval $(luarocks path)  # for local install
+luarocks install --local debugger  # Not in dependencies file but temporarily required
+git clone --recurse-submodules https://git.knowledgetx.com/scossu/volksdata_lua.git
+cd volksdata_lua
+luarocks build --local
+lua test.lua  # optional
+cd ../
+git clone --recurse-submodules https://git.knowledgetx.com/scossu/pocket_archive.git
+cd pocket_archive
+luarocks build --local
+luarocks path >> ~/.bashrc  # or whatever shell init script you use at login
+```
+
+### Run demo submission
+
+Initialize the archive first:
+
+```
+pkar init
+```
+
+This will create the required folders and database file in the archive root
+(a temporary folder by default). Then:
+
+```
+pkar submission test/sample_submission/demo01/pkar_submission.csv
+```
+
+### Generate static site from archive
+
+```
+pkar gen-site
+```
+
+Will generate the static site in `out/http/`. Note that this is static HTML but
+it needs a web server to resolve links (completely server-less version is in
+the works).
+
+If you don't have a configured web server yet, the provided `darkhttpd` will
+work in a pinch:
+
+```
+cd ext/darkhttpd/
+make
+cd -
+```
+
+Serve the site:
+
+```
+./ext/darkhttpd/darkhttpd out/html
+```
+
+(see more options with `darkhttpd --help`)
+
+Point your browser to `localhost:8080` and enjoy.
+
 ## Basic concepts
 ## Basic concepts
 
 
 Until some proper reference is written, this should serve as a high-level
 Until some proper reference is written, this should serve as a high-level

+ 8 - 6
doc/roadmap.md

@@ -54,17 +54,21 @@ usage and opportunities for expanding adoption in relevant areas.
   - ✓ Watch local folder and trigger deposit
   - ✓ Watch local folder and trigger deposit
   - ✓ Option to regenerate site after deposit
   - ✓ Option to regenerate site after deposit
   - ✓ Option to clean up sources & LL on success
   - ✓ Option to clean up sources & LL on success
--  Proper collection handling
+-  Proper collection handling
   - ✓ Dedicated template
   - ✓ Dedicated template
   - ✓ Link to markdown doc for presentation page
   - ✓ Link to markdown doc for presentation page
-  - ❏ Handle artifacts as members
+  - ✓ Handle artifacts as members
+- ⚒ Preservation
+  - ✓ Dump archive RDF
+  - ❏ Backup full environment (including config)
+  - ❏ Restore whole archive from RDF & data folder
 - ❏ Content model
 - ❏ Content model
   - ❏ Multilingual support
   - ❏ Multilingual support
   - ❏ Local overrides
   - ❏ Local overrides
   - ❏ Relatioships inference
   - ❏ Relatioships inference
   - ❏ Markdown support for individual fields
   - ❏ Markdown support for individual fields
--  Generator
-  -  Generate site for one collection only
+-  Generator
+  -  Generate site for one collection only
   - ❏ htmlgen option for local file or webserver URL generation
   - ❏ htmlgen option for local file or webserver URL generation
   - ❏ Generate LL (multi)
   - ❏ Generate LL (multi)
   - ❏ Generate RDF (multi)
   - ❏ Generate RDF (multi)
@@ -72,8 +76,6 @@ usage and opportunities for expanding adoption in relevant areas.
   - ❏ Category browsing
   - ❏ Category browsing
   - ❏ Improve search indexing
   - ❏ Improve search indexing
   - ❏ Enhanced styling and access
   - ❏ Enhanced styling and access
-- ❏ CLI
-  - ❏ Dump & restore (whole archive & individual resources)
 - ❏ Testing
 - ❏ Testing
     - ❏ Unit tests
     - ❏ Unit tests
     - ❏ >100 resource data set
     - ❏ >100 resource data set

+ 4 - 4
src/core.lua

@@ -39,10 +39,10 @@ local M = {
       {"warn",  "%T PKAR %-5L ",          "%n", io.stderr},
       {"warn",  "%T PKAR %-5L ",          "%n", io.stderr},
       {"info",  "%T PKAR %-5L ",          "%n", io.stderr},
       {"info",  "%T PKAR %-5L ",          "%n", io.stderr},
       {"debug", "%T PKAR %-5L%f (%S) ",   "%n", io.stderr},
       {"debug", "%T PKAR %-5L%f (%S) ",   "%n", io.stderr},
-      timefn=(socket or {}).gettime,  -- use socket.gettime if available
-      report="debug",   -- to which level should internal log events be passed?
-      hookrequire=true, -- also report calls to require()
-      level="debug",    -- output levels up to and including "dbg"
+      timefn=(require "socket" or {}).gettime,
+      report="debug",
+      hookrequire=true,
+      level="debug",
     },
     },
 
 
     -- Commonly used terms.
     -- Commonly used terms.

+ 15 - 8
src/generator.lua

@@ -25,9 +25,6 @@ local dbg = require "debugger"
 -- "nil" table - for missing key fallback in chaining.
 -- "nil" table - for missing key fallback in chaining.
 local NT = {}
 local NT = {}
 
 
--- All resource subjects.
-local subjects
-
 -- Extension for type-based icon files.
 -- Extension for type-based icon files.
 local ICON_EXT = ".svg"
 local ICON_EXT = ".svg"
 
 
@@ -584,7 +581,7 @@ M.generate_resource = function(s)
     local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".ttl")
     local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".ttl")
     dir.makepath(path.dirname(res_path))
     dir.makepath(path.dirname(res_path))
     local ofh = assert(io.open(res_path, "w"))
     local ofh = assert(io.open(res_path, "w"))
-    ofh:write(repo.serialize_rsrc(s, "ttl"))
+    for chunk in repo.serialize_rsrc(s, "ttl") do ofh:write(chunk) end
     ofh:close()
     ofh:close()
 
 
     -- Generate HTML doc.
     -- Generate HTML doc.
@@ -604,9 +601,19 @@ M.generate_resource = function(s)
 end
 end
 
 
 
 
-M.generate_resources = function()
-    -- Look up if subjects are already populated.
-    subjects = subjects or repo.gr:unique_terms(triple.POS_S)
+M.generate_resources = function(coll_id)
+    -- TODO It's more complicated than this. Each member in the collection
+    -- must be scanned recursively for outbound links and visited links must
+    -- be noted down to avoid loops.
+    --[[
+    if coll_id then
+        subject_ts = repo.gr:term_set(
+            term.new_iriref_ns("par:" .. coll_id), triple.POS_S,
+            model.id_to_uri.has_member, triple.POS_P
+        )
+    else subjects_ts = repo.gr:unique_terms(triple.POS_S) end
+    --]]
+    subjects_ts = repo.gr:unique_terms(triple.POS_S)
 
 
     -- Initialize the JSON template with an opening brace.
     -- Initialize the JSON template with an opening brace.
     local ofh = assert(io.open(index_path, "w"))
     local ofh = assert(io.open(index_path, "w"))
@@ -614,7 +621,7 @@ M.generate_resources = function()
     ofh:close()
     ofh:close()
 
 
     -- TODO parallelize
     -- TODO parallelize
-    for s in subjects:iter() do assert(M.generate_resource(s)) end
+    for s in subjects_ts:iter() do assert(M.generate_resource(s)) end
 
 
     -- Close the open list brace in the JSON template after all the resources
     -- Close the open list brace in the JSON template after all the resources
     -- have been added.
     -- have been added.

+ 22 - 5
src/repo.lua

@@ -48,15 +48,32 @@ end
 
 
 M.store_updates = function(tmp_gr, s)
 M.store_updates = function(tmp_gr, s)
     -- TODO use a transaction when volksdata_lua supports it.
     -- TODO use a transaction when volksdata_lua supports it.
-    logger:debug("Graph: ", tmp_gr:encode("ttl"))
-
-    local stored_gr = graph.new(pkar.store, term.DEFAULT_CTX)
+    logger:debug("Graph: ")
+    for chunk in tmp_gr:encode("ttl") do logger:debug(chunk) end
 
 
     logger:debug("Removing stored triples.")
     logger:debug("Removing stored triples.")
-    stored_gr:remove(s)
+    M.gr:remove(s)
 
 
     logger:info("Storing triples.")
     logger:info("Storing triples.")
-    return tmp_gr:copy(stored_gr)
+    return tmp_gr:copy(M.gr)
+end
+
+
+--[[ Dump the whole archive RDF to a file stream.
+
+    The result is a software-agnostic RDF representation of the metadata
+    (Turtle) compressed with GZip. The ores data folder can be backed up via
+    OS-level file operations.
+
+    The restore() function, combined with a copy of
+    the ores folder, shall create a fully functional repo.
+
+    TODO configuration backup is not yet implemented and should be included for
+    a completely self-sufficient backup.
+]]
+M.dump = function(fpath, codec)
+    local fh = assert(io.open(fpath, "wb"))
+    for chunk in M.gr:encode(codec or "ttl") do fh:write(chunk) end
 end
 end
 
 
 
 

+ 1 - 1
src/submission.lua

@@ -190,7 +190,7 @@ end
 --]]
 --]]
 local function rsrc_to_graph(rsrc)
 local function rsrc_to_graph(rsrc)
     local rmod = model.types[rsrc.content_type]
     local rmod = model.types[rsrc.content_type]
-    logger:info("Updating resource md: ", pp.write(rsrc))
+    logger:debug("Updating resource md: ", pp.write(rsrc))
 
 
     local s = term.new_iriref_ns(rsrc.id)
     local s = term.new_iriref_ns(rsrc.id)
     local gr = graph.new(nil)
     local gr = graph.new(nil)

+ 1 - 1
src/transformers.lua

@@ -11,7 +11,7 @@ M = {}
 
 
 -- Resize an image to a maximum size on either dimension.
 -- Resize an image to a maximum size on either dimension.
 M.img_resize = function(src, dest, size)
 M.img_resize = function(src, dest, size)
-    print(("Resizing image %s with size %d to %s"):format(src, size, dest))
+    logger:debug(("Resizing image %s with size %d to %s"):format(src, size, dest))
     -- TODO Make streaming if possible.
     -- TODO Make streaming if possible.
     dir.makepath(path.dirname(dest))
     dir.makepath(path.dirname(dest))
     local img = vips.Image.thumbnail(src, size)
     local img = vips.Image.thumbnail(src, size)

+ 22 - 2
src/util/pkar.lua

@@ -63,7 +63,7 @@ gen_site = cli.command {
     function(args) gen.generate_site() end
     function(args) gen.generate_site() end
 }
 }
 
 
-gen_rdf = cli.command {
+dump_res = cli.command {
     "Generate an RDF representation of a resource.",
     "Generate an RDF representation of a resource.",
 
 
     cli.positional "id" {
     cli.positional "id" {
@@ -93,7 +93,7 @@ gen_rdf = cli.command {
     end,
     end,
 }
 }
 
 
-gen_ll = cli.command {
+dump_ll = cli.command {
     "Generate a laundry list for a stored resource.",
     "Generate a laundry list for a stored resource.",
 
 
     cli.positional "id" {
     cli.positional "id" {
@@ -118,6 +118,26 @@ gen_ll = cli.command {
     end
     end
 }
 }
 
 
+dump_archive = cli.command {
+    "Generate a RDF representation of the full archive.",
+
+    cli.positional "path" {
+        "Destination file path.",
+        type = cli.string,
+    },
+    cli.flag "f,format" {
+        "RDF serialization format. One of `ttl` [default], `nt`.",
+        type = cli.string,
+        default = "ttl",
+    },
+
+    function(args)
+        repo.dump(args.path, args.format)
+        print ("File written to ", args.path)
+    end,
+}
+
+
 cli.program {
 cli.program {
     "Pocket Archive command line interface.",
     "Pocket Archive command line interface.",
 }
 }

+ 24 - 5
src/util/watcher.lua

@@ -3,6 +3,7 @@
 local cli = require "cli"
 local cli = require "cli"
 local plpath = require "pl.path"
 local plpath = require "pl.path"
 local signal = require "posix.signal"
 local signal = require "posix.signal"
+local sllog = require "sllog"
 local watchdog = require "watchdog"
 local watchdog = require "watchdog"
 
 
 local sub = require "pocket_archive.submission"
 local sub = require "pocket_archive.submission"
@@ -30,12 +31,18 @@ cli.program {
 
 
     cli.positional "path" {"Local path of folder to watch."},
     cli.positional "path" {"Local path of folder to watch."},
 
 
-    cli.flag "l,logfile" {
+    cli.flag "f,logfile" {
         "Log file.",
         "Log file.",
         type = cli.string,
         type = cli.string,
         default="/dev/stderr",
         default="/dev/stderr",
     },
     },
 
 
+    cli.flag "l,loglevel" {
+        "Log level: 1 = error, 2 = warning, 3 = info [default], 4 = debug.",
+        type = cli.number,
+        default = 3,
+    },
+
     cli.flag "g,gen-site" {
     cli.flag "g,gen-site" {
         "(Re-)generate the website after each submission.",
         "(Re-)generate the website after each submission.",
         type = cli.boolean,
         type = cli.boolean,
@@ -49,12 +56,24 @@ cli.program {
     function(args)
     function(args)
         local wd = watchdog.init()
         local wd = watchdog.init()
         local logfile = assert(io.open(args.logfile, "a"))
         local logfile = assert(io.open(args.logfile, "a"))
-        logfile:write("Starting Pocket Archive watchdog on ")
-        logfile:write(args.path, "\n")
+
+        local logger = sllog:init{
+            {"err",   "%T %-5L ",          "%n", logfile},
+            {"warn",  "%T %-5L ",          "%n", logfile},
+            {"info",  "%T %-5L ",          "%n", logfile},
+            {"debug", "%T %-5L%f (%S) ",   "%n", logfile},
+            timefn=(require "socket" or {}).gettime,
+            report="debug",
+            hookrequire=false,
+            level=args.loglevel,
+        }
+
+        logger:info("Starting Pocket Archive watchdog on ")
+        logger:info(args.path, "\n")
 
 
         wd:add(args.path, watchdog.IN_CLOSE_WRITE, function(ev)
         wd:add(args.path, watchdog.IN_CLOSE_WRITE, function(ev)
-            logfile:write("Created new file: ", ev.name, " mask: ", ev.mask)
-            logfile:write("\n")
+            logger:info("Created new file: ", ev.name, " mask: ", ev.mask)
+            logger:info("\n")
             if ev.name:find("pkar_submission.*%.csv") then
             if ev.name:find("pkar_submission.*%.csv") then
                 -- TODO spawn subprocess
                 -- TODO spawn subprocess
                 sub.deposit(plpath.join(args.path, ev.name), args.cleanup)
                 sub.deposit(plpath.join(args.path, ev.name), args.cleanup)