瀏覽代碼

Deposit updates:

* Add gen-site to watchdog.
* Add cleanup to pkar deposit and watchdog.
* Use IN_CLOSE_WRITE instead of IN_CREATE to watch new files (remotely
  uploaded files fired up too early).
scossu 6 天之前
父節點
當前提交
da1a38820d
共有 4 個文件被更改,包括 67 次插入30 次删除
  1. 3 3
      doc/roadmap.md
  2. 39 17
      src/submission.lua
  3. 8 6
      src/util/pkar.lua
  4. 17 4
      src/util/watcher.lua

+ 3 - 3
doc/roadmap.md

@@ -50,10 +50,10 @@ usage and opportunities for expanding adoption in relevant areas.
 
 - ✖︎ Management UI & API
   - ✖︎ Deposit via single tar or zip file submission
--  Deposit via local hot folder
+-  Deposit via local hot folder
   - ✓ Watch local folder and trigger deposit
-  -  Option to regenerate site after deposit
-  -  Option to clean up sources & LL on success
+  -  Option to regenerate site after deposit
+  -  Option to clean up sources & LL on success
 - ⚒ Proper collection handling
   - ✓ Dedicated template
   - ✓ Link to markdown doc for presentation page

+ 39 - 17
src/submission.lua

@@ -12,17 +12,20 @@ The deposit process is carried out in several steps:
 - File staging (`deposit()`): scan through the generated SIP, identifies the
   files, calculates their checksums, and moves them to temporary storage; adds
   checksums to the metadata. TODO allow user-provided metadata and validation
-- graph generation: generate an RDF graph for each resource in the SIP.
-- permanent storage: push the RDF graph to permanent store (via functions in
+- Graph generation: generate an RDF graph for each resource in the SIP.
+- Permanent storage: push the RDF graph to permanent store (via functions in
   the `repo` module), which includes content model validation; if this
   succeeds, related files are also moved from the staging area to the archival
   store.
+- Cleanup (optional): if requested, the laundry list and resource folder are
+  deleted from their original location.
 
 --]]
 local io = io
 
 local csv = require "ftcsv"
 local dir = require "pl.dir"
+local file = require "pl.file"
 local libmagic = require "libmagic"
 local path = require "pl.path"
 local pp = require "pl.pretty"
@@ -46,8 +49,6 @@ local NT = {}
 -- Local path to URI mapping. For linking between newly created resources.
 local path_to_uri
 
-local M = {}  -- Submission module
-
 
 -- Initialize libmagic database.
 local magic = libmagic.open(libmagic.MIME_TYPE, libmagic.NO_CHECK_COMPRESS )
@@ -67,7 +68,7 @@ Generate a random, reader-friendly ID.
 A 16-character ID with the above defined #chpool of 60 smybols has an entropy
 of 94.5 bits, which should be plenty for a medium-sized repository.
 ]]
-M.idgen = function(len)
+local function idgen(len)
     local charlist = {}
     for i = 1, (len or pkar.config.id.len) do
         table.insert(charlist, string.char(chpool[math.random(1, #chpool)]))
@@ -77,9 +78,8 @@ M.idgen = function(len)
 end
 
 
-M.generate_sip = function(src_path)
-    local sip = {root_path = src_path:match("(.*/)")}
-    local src_dir = path.dirname(src_path)
+local function generate_sip(ll_path)
+    local sip = {root_path = path.dirname(ll_path)}
     path_to_uri = {}
 
     local tn_dir = path.join(sip.root_path, "proc", "tn")
@@ -88,7 +88,7 @@ M.generate_sip = function(src_path)
     local prev_path
 
     local i = 0
-    for row_n, row in csv.parseLine(src_path) do
+    for row_n, row in csv.parseLine(ll_path) do
         local has_content
         for k, v in pairs(row) do
             -- Change "" to nil.
@@ -107,7 +107,7 @@ M.generate_sip = function(src_path)
                     :format(i, row_n))
             prev_path = row.source_path
             -- New row.
-            sip[i] = {id = "par:" .. M.idgen()}
+            sip[i] = {id = "par:" .. idgen()}
             -- Add to path to URI map for later referencing.
             path_to_uri[row.source_path] = sip[i].id
             for k, v in pairs(row) do
@@ -148,12 +148,12 @@ M.generate_sip = function(src_path)
     for i, v in ipairs(sip) do
         local rmod = model.types[v.content_type]
         dbg.assert(v.source_path)
-        local fpath = path.join(src_dir, v.source_path)
+        local fpath = path.join(sip.root_path, v.source_path)
         --dbg.assert(rmod)
         v.has_member = v.has_member or {}
         -- Create implicit members from single-file artifact.
         if rmod.types.artifact and path.isfile(fpath) then
-            local file_id = "par:" .. M.idgen()
+            local file_id = "par:" .. idgen()
             -- Insert file resource and move it into a new sub-folder.
             table.insert(sip, {
                 content_type = rmod.default_fmodel or "file",
@@ -188,7 +188,7 @@ end
 --[[  Convert a SIP resource table to an in-memory Volksdata graph.
 
 --]]
-M.rsrc_to_graph = function(rsrc)
+local function rsrc_to_graph(rsrc)
     local rmod = model.types[rsrc.content_type]
     logger:info("Updating resource md: ", pp.write(rsrc))
 
@@ -237,7 +237,7 @@ M.rsrc_to_graph = function(rsrc)
             local proxy_s
             for i, vv in ipairs(v) do
                 -- Add linked list proxies.
-                local brick_uri = term.new_iriref_ns("par:" .. M.idgen())
+                local brick_uri = term.new_iriref_ns("par:" .. idgen())
                 if i == 1 then
                     proxy_s = s
                     it:add_iter(triple.new(
@@ -287,7 +287,13 @@ M.rsrc_to_graph = function(rsrc)
 end
 
 
-M.deposit = function(sip)
+-- Submission module.
+local M = {}
+
+
+M.deposit = function(ll_path, cleanup)
+    local sip = generate_sip(ll_path)
+
     for i, rsrc in ipairs(sip) do
         -- TODO Wrap this chunk into a txn. Each row is atomic.
         logger:debug(("Processing resource #%d of %d: %s"):format(
@@ -296,7 +302,7 @@ M.deposit = function(sip)
         local in_path, fext
         if not rsrc.source_path then goto continue end
 
-        in_path = sip.root_path .. rsrc.source_path
+        in_path = path.join(sip.root_path, rsrc.source_path)
         fext = path.extension(in_path)
         -- If it's a directory, skip file processing.
         if not path.isfile(in_path) then goto continue end
@@ -359,7 +365,7 @@ M.deposit = function(sip)
         rsrc.last_modified = tstamp
 
         local tmp_gr, s
-        tmp_gr, s = M.rsrc_to_graph(rsrc)
+        tmp_gr, s = rsrc_to_graph(rsrc)
 
         local val_report = validator.validate(tmp_gr, s)
         if val_report.max_level == "ERROR" then error(
@@ -374,6 +380,22 @@ M.deposit = function(sip)
 
     -- Remove processing directory.
     dir.rmtree(path.join(sip.root_path, "proc"))
+
+    if cleanup then
+        -- Gather all top-level directories and delete them.
+        rsrc_paths = {}
+        for i, rsrc in ipairs(sip) do
+            rsrc_paths[rsrc.source_path:match("[^/]+")] = true
+        end
+        for tlf in pairs(rsrc_paths) do
+            local target = path.join(sip.root_path, tlf)
+            logger:info("Cleaning up: " .. target)
+            if path.isdir(target) then dir.rmtree(target)
+            elseif path.isfile(target) then file.delete(target) end
+        end
+        logger:info("Cleaning up: " .. ll_path)
+        file.delete(ll_path)
+    end
 end
 
 

+ 8 - 6
src/util/pkar.lua

@@ -13,6 +13,8 @@ local sub = require "pocket_archive.submission"
 local repo = require "pocket_archive.repo"
 local gen = require "pocket_archive.generator"
 
+local dbg = require "debugger"
+
 
 cli.locale "en_US"  -- TODO set with multilingual support.
 
@@ -43,15 +45,15 @@ list = cli.command {
 deposit = cli.command {
     "Deposit a package.",
 
-    cli.positional "path" {
-        [[Path of the package root. It must be a directory containing
-        all the files and folders to be submitted and a `pkar_submission.csv`
-        file at the top of the folder.]]
+    cli.positional "path" { "Path of the laundry list file." },
+
+    cli.flag "c,cleanup" {
+        "Remove laundry list and SIP after successful submission.",
+        type = cli.boolean,
     },
 
     function(args)
-        sip = sub.generate_sip(plpath.join(args.path, "pkar_submission.csv"))
-        sub.deposit(sip)
+        sub.deposit(args.path, args.cleanup)
     end
 }
 

+ 17 - 4
src/util/watcher.lua

@@ -6,6 +6,7 @@ local signal = require "posix.signal"
 local watchdog = require "watchdog"
 
 local sub = require "pocket_archive.submission"
+local gen = require "pocket_archive.generator"
 
 
 local running = true
@@ -35,17 +36,29 @@ cli.program {
         default="/dev/stderr",
     },
 
+    cli.flag "g,gen-site" {
+        "(Re-)generate the website after each submission.",
+        type = cli.boolean,
+    },
+
+    cli.flag "c,cleanup" {
+        "Remove laundry list and SIP after successful submission.",
+        type = cli.boolean,
+    },
+
     function(args)
         local wd = watchdog.init()
         local logfile = assert(io.open(args.logfile, "a"))
+        logfile:write("Starting Pocket Archive watchdog on ")
+        logfile:write(args.path, "\n")
 
-        wd:add(args.path, watchdog.IN_CREATE, function(ev)
-            logfile:write("Created new file:", ev.name, ev.mask)
+        wd:add(args.path, watchdog.IN_CLOSE_WRITE, function(ev)
+            logfile:write("Created new file: ", ev.name, " mask: ", ev.mask)
             logfile:write("\n")
             if ev.name:find("pkar_submission.*%.csv") then
                 -- TODO spawn subprocess
-                sip = sub.generate_sip(plpath.join(args.path, ev.name))
-                sub.deposit(sip)
+                sub.deposit(plpath.join(args.path, ev.name), args.cleanup)
+                if args.gen_site then gen.generate_site() end
             end
         end)