1
0

3 کامیت‌ها da1a38820d ... 85b53cd98b

نویسنده SHA1 پیام تاریخ
  scossu 85b53cd98b Add Quickstart in README. 5 روز پیش
  scossu ebc5e31dde Watcher: use logger to write to file. 5 روز پیش
  scossu 6db9b4a36f Dump archive RDF; fix volksdata binding errors. 5 روز پیش
9فایلهای تغییر یافته به همراه186 افزوده شده و 32 حذف شده
  1. 89 0
      README.md
  2. 8 6
      doc/roadmap.md
  3. 4 4
      src/core.lua
  4. 15 8
      src/generator.lua
  5. 22 5
      src/repo.lua
  6. 1 1
      src/submission.lua
  7. 1 1
      src/transformers.lua
  8. 22 2
      src/util/pkar.lua
  9. 24 5
      src/util/watcher.lua

+ 89 - 0
README.md

@@ -52,6 +52,95 @@ the place one day, they could be periodically harvested, linked together, and
 presented in one large, central archive (it's Linked Data, after all), without
 any detriment to the indepencence of the individual archives.
 
+## Quickstart
+
+This has been tested on Linux only. It's not guaranteed to work on other
+systems at the moment.
+
+### System prerequisites
+
+- A build environment (at least Git, libc, a C compiler, and Make)
+- UUID library (`uuid/uuid.h` - util-linux or linux-headers in most distros)
+- xxhash development package
+- lmdb development package
+- libvips development package
+- Lua 5.4 development package (lua-dev in some distros)
+- Luarocks 5.4
+
+If using Arch Linux:
+
+```
+pacman -Syu
+pacman -S base-devel util-linux-libs git xxhash lmdb libvips lua luarocks
+```
+
+### Install Volksdata & Pocket Archive
+
+Pocket Archive and Volksdata are still alpha and not in the Luarocks artifact
+repo yet, so the rocks must be installed manually for the time being.
+
+Installing locally or in a dedicated container is strongly recommended at this
+stage.
+
+```
+# Note: tested on Archlinux. Other distros (especially Alpine) may need tweaks.
+eval $(luarocks path)  # for local install
+luarocks install --local debugger  # Not in dependencies file but temporarily required
+git clone --recurse-submodules https://git.knowledgetx.com/scossu/volksdata_lua.git
+cd volksdata_lua
+luarocks build --local
+lua test.lua  # optional
+cd ../
+git clone --recurse-submodules https://git.knowledgetx.com/scossu/pocket_archive.git
+cd pocket_archive
+luarocks build --local
+luarocks path >> ~/.bashrc  # or whatever shell init script you use at login
+```
+
+### Run demo submission
+
+Initialize the archive first:
+
+```
+pkar init
+```
+
+This will create the required folders and database file in the archive root
+(a temporary folder by default). Then:
+
+```
+pkar submission test/sample_submission/demo01/pkar_submission.csv
+```
+
+### Generate static site from archive
+
+```
+pkar gen-site
+```
+
+Will generate the static site in `out/http/`. Note that this is static HTML but
+it needs a web server to resolve links (completely server-less version is in
+the works).
+
+If you don't have a configured web server yet, the provided `darkhttpd` will
+work in a pinch:
+
+```
+cd ext/darkhttpd/
+make
+cd -
+```
+
+Serve the site:
+
+```
+./ext/darkhttpd/darkhttpd out/html
+```
+
+(see more options with `darkhttpd --help`)
+
+Point your browser to `localhost:8080` and enjoy.
+
 ## Basic concepts
 
 Until some proper reference is written, this should serve as a high-level

+ 8 - 6
doc/roadmap.md

@@ -54,17 +54,21 @@ usage and opportunities for expanding adoption in relevant areas.
   - ✓ Watch local folder and trigger deposit
   - ✓ Option to regenerate site after deposit
   - ✓ Option to clean up sources & LL on success
--  Proper collection handling
+-  Proper collection handling
   - ✓ Dedicated template
   - ✓ Link to markdown doc for presentation page
-  - ❏ Handle artifacts as members
+  - ✓ Handle artifacts as members
+- ⚒ Preservation
+  - ✓ Dump archive RDF
+  - ❏ Backup full environment (including config)
+  - ❏ Restore whole archive from RDF & data folder
 - ❏ Content model
   - ❏ Multilingual support
   - ❏ Local overrides
   - ❏ Relatioships inference
   - ❏ Markdown support for individual fields
--  Generator
-  -  Generate site for one collection only
+-  Generator
+  -  Generate site for one collection only
   - ❏ htmlgen option for local file or webserver URL generation
   - ❏ Generate LL (multi)
   - ❏ Generate RDF (multi)
@@ -72,8 +76,6 @@ usage and opportunities for expanding adoption in relevant areas.
   - ❏ Category browsing
   - ❏ Improve search indexing
   - ❏ Enhanced styling and access
-- ❏ CLI
-  - ❏ Dump & restore (whole archive & individual resources)
 - ❏ Testing
     - ❏ Unit tests
     - ❏ >100 resource data set

+ 4 - 4
src/core.lua

@@ -39,10 +39,10 @@ local M = {
       {"warn",  "%T PKAR %-5L ",          "%n", io.stderr},
       {"info",  "%T PKAR %-5L ",          "%n", io.stderr},
       {"debug", "%T PKAR %-5L%f (%S) ",   "%n", io.stderr},
-      timefn=(socket or {}).gettime,  -- use socket.gettime if available
-      report="debug",   -- to which level should internal log events be passed?
-      hookrequire=true, -- also report calls to require()
-      level="debug",    -- output levels up to and including "dbg"
+      timefn=(require "socket" or {}).gettime,
+      report="debug",
+      hookrequire=true,
+      level="debug",
     },
 
     -- Commonly used terms.

+ 15 - 8
src/generator.lua

@@ -25,9 +25,6 @@ local dbg = require "debugger"
 -- "nil" table - for missing key fallback in chaining.
 local NT = {}
 
--- All resource subjects.
-local subjects
-
 -- Extension for type-based icon files.
 local ICON_EXT = ".svg"
 
@@ -584,7 +581,7 @@ M.generate_resource = function(s)
     local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".ttl")
     dir.makepath(path.dirname(res_path))
     local ofh = assert(io.open(res_path, "w"))
-    ofh:write(repo.serialize_rsrc(s, "ttl"))
+    for chunk in repo.serialize_rsrc(s, "ttl") do ofh:write(chunk) end
     ofh:close()
 
     -- Generate HTML doc.
@@ -604,9 +601,19 @@ M.generate_resource = function(s)
 end
 
 
-M.generate_resources = function()
-    -- Look up if subjects are already populated.
-    subjects = subjects or repo.gr:unique_terms(triple.POS_S)
+M.generate_resources = function(coll_id)
+    -- TODO It's more complicated than this. Each member in the collection
+    -- must be scanned recursively for outbound links and visited links must
+    -- be noted down to avoid loops.
+    --[[
+    if coll_id then
+        subject_ts = repo.gr:term_set(
+            term.new_iriref_ns("par:" .. coll_id), triple.POS_S,
+            model.id_to_uri.has_member, triple.POS_P
+        )
+    else subjects_ts = repo.gr:unique_terms(triple.POS_S) end
+    --]]
+    subjects_ts = repo.gr:unique_terms(triple.POS_S)
 
     -- Initialize the JSON template with an opening brace.
     local ofh = assert(io.open(index_path, "w"))
@@ -614,7 +621,7 @@ M.generate_resources = function()
     ofh:close()
 
     -- TODO parallelize
-    for s in subjects:iter() do assert(M.generate_resource(s)) end
+    for s in subjects_ts:iter() do assert(M.generate_resource(s)) end
 
     -- Close the open list brace in the JSON template after all the resources
     -- have been added.

+ 22 - 5
src/repo.lua

@@ -48,15 +48,32 @@ end
 
 M.store_updates = function(tmp_gr, s)
     -- TODO use a transaction when volksdata_lua supports it.
-    logger:debug("Graph: ", tmp_gr:encode("ttl"))
-
-    local stored_gr = graph.new(pkar.store, term.DEFAULT_CTX)
+    logger:debug("Graph: ")
+    for chunk in tmp_gr:encode("ttl") do logger:debug(chunk) end
 
     logger:debug("Removing stored triples.")
-    stored_gr:remove(s)
+    M.gr:remove(s)
 
     logger:info("Storing triples.")
-    return tmp_gr:copy(stored_gr)
+    return tmp_gr:copy(M.gr)
+end
+
+
+--[[ Dump the whole archive RDF to a file stream.
+
+    The result is a software-agnostic RDF representation of the metadata
+    (Turtle) compressed with GZip. The ores data folder can be backed up via
+    OS-level file operations.
+
+    The restore() function, combined with a copy of
+    the ores folder, shall create a fully functional repo.
+
+    TODO configuration backup is not yet implemented and should be included for
+    a completely self-sufficient backup.
+]]
+M.dump = function(fpath, codec)
+    local fh = assert(io.open(fpath, "wb"))
+    for chunk in M.gr:encode(codec or "ttl") do fh:write(chunk) end
 end
 
 

+ 1 - 1
src/submission.lua

@@ -190,7 +190,7 @@ end
 --]]
 local function rsrc_to_graph(rsrc)
     local rmod = model.types[rsrc.content_type]
-    logger:info("Updating resource md: ", pp.write(rsrc))
+    logger:debug("Updating resource md: ", pp.write(rsrc))
 
     local s = term.new_iriref_ns(rsrc.id)
     local gr = graph.new(nil)

+ 1 - 1
src/transformers.lua

@@ -11,7 +11,7 @@ M = {}
 
 -- Resize an image to a maximum size on either dimension.
 M.img_resize = function(src, dest, size)
-    print(("Resizing image %s with size %d to %s"):format(src, size, dest))
+    logger:debug(("Resizing image %s with size %d to %s"):format(src, size, dest))
     -- TODO Make streaming if possible.
     dir.makepath(path.dirname(dest))
     local img = vips.Image.thumbnail(src, size)

+ 22 - 2
src/util/pkar.lua

@@ -63,7 +63,7 @@ gen_site = cli.command {
     function(args) gen.generate_site() end
 }
 
-gen_rdf = cli.command {
+dump_res = cli.command {
     "Generate an RDF representation of a resource.",
 
     cli.positional "id" {
@@ -93,7 +93,7 @@ gen_rdf = cli.command {
     end,
 }
 
-gen_ll = cli.command {
+dump_ll = cli.command {
     "Generate a laundry list for a stored resource.",
 
     cli.positional "id" {
@@ -118,6 +118,26 @@ gen_ll = cli.command {
     end
 }
 
+dump_archive = cli.command {
+    "Generate a RDF representation of the full archive.",
+
+    cli.positional "path" {
+        "Destination file path.",
+        type = cli.string,
+    },
+    cli.flag "f,format" {
+        "RDF serialization format. One of `ttl` [default], `nt`.",
+        type = cli.string,
+        default = "ttl",
+    },
+
+    function(args)
+        repo.dump(args.path, args.format)
+        print ("File written to ", args.path)
+    end,
+}
+
+
 cli.program {
     "Pocket Archive command line interface.",
 }

+ 24 - 5
src/util/watcher.lua

@@ -3,6 +3,7 @@
 local cli = require "cli"
 local plpath = require "pl.path"
 local signal = require "posix.signal"
+local sllog = require "sllog"
 local watchdog = require "watchdog"
 
 local sub = require "pocket_archive.submission"
@@ -30,12 +31,18 @@ cli.program {
 
     cli.positional "path" {"Local path of folder to watch."},
 
-    cli.flag "l,logfile" {
+    cli.flag "f,logfile" {
         "Log file.",
         type = cli.string,
         default="/dev/stderr",
     },
 
+    cli.flag "l,loglevel" {
+        "Log level: 1 = error, 2 = warning, 3 = info [default], 4 = debug.",
+        type = cli.number,
+        default = 3,
+    },
+
     cli.flag "g,gen-site" {
         "(Re-)generate the website after each submission.",
         type = cli.boolean,
@@ -49,12 +56,24 @@ cli.program {
     function(args)
         local wd = watchdog.init()
         local logfile = assert(io.open(args.logfile, "a"))
-        logfile:write("Starting Pocket Archive watchdog on ")
-        logfile:write(args.path, "\n")
+
+        local logger = sllog:init{
+            {"err",   "%T %-5L ",          "%n", logfile},
+            {"warn",  "%T %-5L ",          "%n", logfile},
+            {"info",  "%T %-5L ",          "%n", logfile},
+            {"debug", "%T %-5L%f (%S) ",   "%n", logfile},
+            timefn=(require "socket" or {}).gettime,
+            report="debug",
+            hookrequire=false,
+            level=args.loglevel,
+        }
+
+        logger:info("Starting Pocket Archive watchdog on ")
+        logger:info(args.path, "\n")
 
         wd:add(args.path, watchdog.IN_CLOSE_WRITE, function(ev)
-            logfile:write("Created new file: ", ev.name, " mask: ", ev.mask)
-            logfile:write("\n")
+            logger:info("Created new file: ", ev.name, " mask: ", ev.mask)
+            logger:info("\n")
             if ev.name:find("pkar_submission.*%.csv") then
                 -- TODO spawn subprocess
                 sub.deposit(plpath.join(args.path, ev.name), args.cleanup)