Procházet zdrojové kódy

Generate submission report.

scossu před 3 dny
rodič
revize
02eb0b5011
2 změnil soubory, kde provedl 206 přidání a 96 odebrání
  1. 199 94
      src/submission.lua
  2. 7 2
      src/util/pkar.lua

+ 199 - 94
src/submission.lua

@@ -26,6 +26,7 @@ local io = io
 local csv = require "ftcsv"
 local dir = require "pl.dir"
 local file = require "pl.file"
+local json = require "cjson"
 local libmagic = require "libmagic"
 local path = require "pl.path"
 local pp = require "pl.pretty"
@@ -344,95 +345,109 @@ local M = {
 }
 
 
-M.deposit = function(ll_path, cleanup)
-    local sip = generate_sip(ll_path)
-    local tstamp
+--[[--
+Process SIP files and metadata.
+During this step, files are moved to a staging area, their checksums are
+calculated, and some more technical metadata are extracted and added to the
+D-Res.
 
-    for i, rsrc in ipairs(sip) do
-        -- TODO Wrap this chunk into a txn. Each row is atomic.
-        logger:debug(("Processing resource #%d of %d: %s"):format(
-                i, #sip, rsrc.id))
-
-        local in_path, fext
-        if not rsrc.source_path then goto continue end
-
-        in_path = path.join(sip.root_path, rsrc.source_path)
-        fext = path.extension(in_path)
-        -- If it's a directory, skip file processing.
-        if not path.isfile(in_path) then goto continue end
-
-        do
-            local tmp_dir = path.join(pkar.config.fs.ores_path, "tmp/")
-            local file_ext
-            _, file_ext = path.splitext(in_path)
-            local tmp_path = tmp_dir .. rsrc.id .. file_ext
-            dir.makepath(tmp_dir)
-
-            local ifh = assert(io.open(in_path, "r"))
-
-            rsrc.format = {magic:filehandle(ifh)}
-            local hash_it = mc.new_blake2b()
-            local fsize = 0
-            logger:debug("Hashing ", in_path)
-            local ofh = assert(io.open(tmp_path, "w"))
-            while true do
-                chunk = ifh:read(pkar.config.fs.stream_chunk_size)
-                if not chunk then break end
-                hash_it:update(chunk)
-                ofh:write(chunk)
-                fsize = fsize + #chunk
-            end
-            local checksum = hash_it:final(true)
-            rsrc.checksum = {"blake2:" .. checksum}
-            rsrc.size = fsize
-
-            ofh:close()
-            ifh:close()
-
-            -- Copy file and calculate checksum.
-            local out_dir, out_path
-            out_dir = path.join(
-                    pkar.config.fs.ores_path,
-                    checksum:sub(1, 2),
-                    checksum:sub(3, 4))
-            out_path = path.join(out_dir, checksum:sub(1,32) .. fext)
-            dir.makepath(out_dir)
-            logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
-            dir.movefile(tmp_path, out_path)
-            rsrc.archive_path = out_path
-
-            -- Copy thumbnail if existing.
-            if rsrc.thumbnail then
-                src_path = rsrc.thumbnail
-                out_path = path.join(
-                        out_dir, path.basename(src_path))
-                logger:debug(("Moving file %s to %s"):format(src_path, out_path))
-                dir.movefile(src_path, out_path)
-                rsrc.thumbnail = out_path
-            end
+@tparam table rsrc Resource from the SIP parsed by #{generate_sip}.
+--]]
+local function process_rsrc(rsrc, root_path)
+    local in_path, fext
+    if not rsrc.source_path then goto continue end
+
+    in_path = path.join(root_path, rsrc.source_path)
+    fext = path.extension(in_path)
+    -- If it's a directory, skip file processing.
+    if not path.isfile(in_path) then goto continue end
+
+    do
+        local tmp_dir = path.join(pkar.config.fs.ores_path, "tmp/")
+        local file_ext
+        _, file_ext = path.splitext(in_path)
+        local tmp_path = tmp_dir .. rsrc.id .. file_ext
+        dir.makepath(tmp_dir)
+
+        local ifh = assert(io.open(in_path, "r"))
+
+        rsrc.format = {magic:filehandle(ifh)}
+        local hash_it = mc.new_blake2b()
+        local fsize = 0
+        logger:debug("Hashing ", in_path)
+        local ofh = assert(io.open(tmp_path, "w"))
+        while true do
+            chunk = ifh:read(pkar.config.fs.stream_chunk_size)
+            if not chunk then break end
+            hash_it:update(chunk)
+            ofh:write(chunk)
+            fsize = fsize + #chunk
+        end
+        local checksum = hash_it:final(true)
+        rsrc.checksum = {"blake2:" .. checksum}
+        rsrc.size = fsize
+
+        ofh:close()
+        ifh:close()
+
+        -- Copy file and calculate checksum.
+        local out_dir, out_path
+        out_dir = path.join(
+                pkar.config.fs.ores_path,
+                checksum:sub(1, 2),
+                checksum:sub(3, 4))
+        out_path = path.join(out_dir, checksum:sub(1,32) .. fext)
+        dir.makepath(out_dir)
+        logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
+        dir.movefile(tmp_path, out_path)
+        rsrc.archive_path = out_path
+
+        -- Copy thumbnail if existing.
+        if rsrc.thumbnail then
+            src_path = rsrc.thumbnail
+            out_path = path.join(
+                    out_dir, path.basename(src_path))
+            logger:debug(("Moving file %s to %s"):format(src_path, out_path))
+            dir.movefile(src_path, out_path)
+            rsrc.thumbnail = out_path
         end
+    end
 
-        ::continue::
+    ::continue::
 
-        tstamp = os.date("!%Y-%m-%dT%TZ")
-        rsrc.submitted = tstamp
-        rsrc.last_modified = tstamp
+    local tstamp = os.date("!%Y-%m-%dT%TZ")
+    rsrc.submitted = tstamp
+    rsrc.last_modified = tstamp
 
-        local tmp_gr, s
-        tmp_gr, s = rsrc_to_graph(rsrc)
+    local tmp_gr, s
+    tmp_gr, s = rsrc_to_graph(rsrc)
 
-        local val_report = validator.validate(tmp_gr, s)
-        if val_report.max_level == "ERROR" then error(
-            "Validation raised errors: " .. pp.write(val_report))
-        elseif val_report.max_level == "WARN" then logger:warn(
-            "Validation raised warnings: " .. pp.write(val_report))
-        elseif val_report.max_level == "NOTICE" then logger:warn(
-            "Validation raised notices: " .. pp.write(val_report)) end
+    local val_report = validator.validate(tmp_gr, s)
+    if val_report.max_level == "ERROR" then error(
+        "Validation raised errors: " .. pp.write(val_report))
+    elseif val_report.max_level == "WARN" then logger:warn(
+        "Validation raised warnings: " .. pp.write(val_report))
+    elseif val_report.max_level == "NOTICE" then logger:warn(
+        "Validation raised notices: " .. pp.write(val_report)) end
 
-        repo.store_updates(tmp_gr, s)
-        logger:info("Stored: ", s.data)
-    end
+    repo.store_updates(tmp_gr, s)
+    logger:info("Stored: ", s.data)
+end
 
+
+local function generate_report(rpath, report)
+    report.timestamp = os.date("!%Y-%m-%dT%TZ")
+
+    if rpath then
+        local fh = io.open(rpath, "w")
+        fh:write(json.encode(report))
+        fh:close()
+        return true
+    else return report end
+end
+
+
+local function add_sub_meta()
     -- Add triples for submission metadata directly to the stored graph.
     local it = repo.gr:add_init()
     local sub_uri = term.new_iriref_ns(sub_id)
@@ -448,7 +463,7 @@ M.deposit = function(ll_path, cleanup)
             term.new_lit(sub_name)
         ))
     end
-    tstamp = os.date("!%Y-%m-%dT%TZ")
+    local tstamp = os.date("!%Y-%m-%dT%TZ")
     it:add_iter(triple.new(
         sub_uri,
         model.id_to_uri.submitted,
@@ -460,26 +475,116 @@ M.deposit = function(ll_path, cleanup)
         term.new_lit(tstamp, "xsd:dateTime", nil, true)
     ))
     it:add_done()
+end
+
+
+local function cleanup_src(sip)
+    -- Gather all top-level directories and delete them.
+    rsrc_paths = {}
+    for i, rsrc in ipairs(sip) do
+        rsrc_paths[rsrc.source_path:match("[^/]+")] = true
+    end
+    for tlf in pairs(rsrc_paths) do
+        local target = path.join(sip.root_path, tlf)
+        logger:info("Cleaning up: " .. target)
+        if path.isdir(target) then dir.rmtree(target)
+        elseif path.isfile(target) then file.delete(target) end
+    end
+    logger:info("Cleaning up: " .. ll_path)
+    file.delete(ll_path)
+end
+
+
+--[[--
+Deposit resources from a SIP.
+@tparam string ll_path Path of the laundry list. All SIP source references are
+    relative to this path.
+@tparam boolean cleanup If set to `true`, it removes the SIP if the submission
+    is successful.
+@param string report_path Optional path for the report file.
+
+@treturn bool Whether the process completed successfully.
+--]]
+M.deposit = function(ll_path, cleanup, report_path)
+    local sip, rc, ret
+
+    rc, ret = xpcall(generate_sip, debug.traceback, ll_path)
+    if rc then sip = ret
+    else
+        return generate_report(report_path, {
+            result = "failure",
+            message = "An error occurred while parsing the SIP.",
+            traceback = ret,
+            metadata = {
+                ll = ll_path,
+            },
+        })
+    end
+
+    for i, rsrc in ipairs(sip) do
+        -- TODO Wrap this chunk into a txn. Each row is atomic.
+        logger:debug(("Processing resource #%d of %d: %s"):format(
+                i, #sip, rsrc.id))
+
+        local rc, ret = xpcall(
+            process_rsrc,
+            debug.traceback,
+            rsrc, sip.root_path)
+        if not rc then
+            return generate_report(report_path, {
+                result = "failure",
+                message = "An error occurred while processing a resource \z
+                    in the SIP.",
+                traceback = ret,
+                metadata = {
+                    rsrc_id = rsrc.id,
+                    source_path = rsrc.source_path,
+                },
+            })
+        end
+    end
+
+    rc, ret = xpcall(add_sub_meta, debug.traceback)
+    if not rc then
+        return generate_report(report_path, {
+            result = "failure",
+            message = "An error occurred while updating submission metadata.",
+            traceback = ret,
+            metadata = {
+                sub_id = sub_id,
+            },
+        })
+    end
+
+    -- After this point, the outcome is either `success` or `warnings`.
 
     -- Remove processing directory.
     local proc_dir = path.join(sip.root_path, "proc")
     if path.isdir(proc_dir) then dir.rmtree(proc_dir) end
 
+    local report = {
+        metadata = {
+            sub_id = sub_id,
+            resources = {},
+        },
+    }
+
     if cleanup then
-        -- Gather all top-level directories and delete them.
-        rsrc_paths = {}
-        for i, rsrc in ipairs(sip) do
-            rsrc_paths[rsrc.source_path:match("[^/]+")] = true
-        end
-        for tlf in pairs(rsrc_paths) do
-            local target = path.join(sip.root_path, tlf)
-            logger:info("Cleaning up: " .. target)
-            if path.isdir(target) then dir.rmtree(target)
-            elseif path.isfile(target) then file.delete(target) end
+        rc, ret = xpcall(cleanup_src, debug.traceback, sip)
+        if not rc then
+            report.result = "warnings"
+            report.message = "An error occurred while deleting the SIP source."
         end
-        logger:info("Cleaning up: " .. ll_path)
-        file.delete(ll_path)
     end
+
+    report.result = "success"
+    report.message = "The SIP has been successfully submitted."
+
+    for _, rsrc in ipairs(sip) do
+        report.metadata.resources[rsrc.id] = rsrc.source_path
+    end
+
+    return generate_report(report_path, report)
 end
 
 

+ 7 - 2
src/util/pkar.lua

@@ -49,7 +49,10 @@ list = cli.command {
 }
 
 deposit = cli.command {
-    "Deposit a package.",
+    [[Deposit a Submission Information Package (SIP).
+
+    The command outputs the submission report as JSON to standard output, that
+    can be piped to `jq` or another parsing tool.]],
 
     cli.positional "path" { "Path of the laundry list file." },
 
@@ -59,7 +62,9 @@ deposit = cli.command {
     },
 
     function(args)
-        sub.deposit(args.path, args.cleanup)
+        --require "debugger"()
+        local report = sub.deposit(args.path, args.cleanup)
+        io.write(json.encode(report))
     end
 }