|
@@ -26,6 +26,7 @@ local io = io
|
|
local csv = require "ftcsv"
|
|
local csv = require "ftcsv"
|
|
local dir = require "pl.dir"
|
|
local dir = require "pl.dir"
|
|
local file = require "pl.file"
|
|
local file = require "pl.file"
|
|
|
|
+local json = require "cjson"
|
|
local libmagic = require "libmagic"
|
|
local libmagic = require "libmagic"
|
|
local path = require "pl.path"
|
|
local path = require "pl.path"
|
|
local pp = require "pl.pretty"
|
|
local pp = require "pl.pretty"
|
|
@@ -344,95 +345,109 @@ local M = {
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
-M.deposit = function(ll_path, cleanup)
|
|
|
|
- local sip = generate_sip(ll_path)
|
|
|
|
- local tstamp
|
|
|
|
|
|
+--[[--
|
|
|
|
+Process SIP files and metadata.
|
|
|
|
+During this step, files are moved to a staging area, their checksums are
|
|
|
|
+calculated, and some more technical metadata are extracted and added to the
|
|
|
|
+D-Res.
|
|
|
|
|
|
- for i, rsrc in ipairs(sip) do
|
|
|
|
- -- TODO Wrap this chunk into a txn. Each row is atomic.
|
|
|
|
- logger:debug(("Processing resource #%d of %d: %s"):format(
|
|
|
|
- i, #sip, rsrc.id))
|
|
|
|
-
|
|
|
|
- local in_path, fext
|
|
|
|
- if not rsrc.source_path then goto continue end
|
|
|
|
-
|
|
|
|
- in_path = path.join(sip.root_path, rsrc.source_path)
|
|
|
|
- fext = path.extension(in_path)
|
|
|
|
- -- If it's a directory, skip file processing.
|
|
|
|
- if not path.isfile(in_path) then goto continue end
|
|
|
|
-
|
|
|
|
- do
|
|
|
|
- local tmp_dir = path.join(pkar.config.fs.ores_path, "tmp/")
|
|
|
|
- local file_ext
|
|
|
|
- _, file_ext = path.splitext(in_path)
|
|
|
|
- local tmp_path = tmp_dir .. rsrc.id .. file_ext
|
|
|
|
- dir.makepath(tmp_dir)
|
|
|
|
-
|
|
|
|
- local ifh = assert(io.open(in_path, "r"))
|
|
|
|
-
|
|
|
|
- rsrc.format = {magic:filehandle(ifh)}
|
|
|
|
- local hash_it = mc.new_blake2b()
|
|
|
|
- local fsize = 0
|
|
|
|
- logger:debug("Hashing ", in_path)
|
|
|
|
- local ofh = assert(io.open(tmp_path, "w"))
|
|
|
|
- while true do
|
|
|
|
- chunk = ifh:read(pkar.config.fs.stream_chunk_size)
|
|
|
|
- if not chunk then break end
|
|
|
|
- hash_it:update(chunk)
|
|
|
|
- ofh:write(chunk)
|
|
|
|
- fsize = fsize + #chunk
|
|
|
|
- end
|
|
|
|
- local checksum = hash_it:final(true)
|
|
|
|
- rsrc.checksum = {"blake2:" .. checksum}
|
|
|
|
- rsrc.size = fsize
|
|
|
|
-
|
|
|
|
- ofh:close()
|
|
|
|
- ifh:close()
|
|
|
|
-
|
|
|
|
- -- Copy file and calculate checksum.
|
|
|
|
- local out_dir, out_path
|
|
|
|
- out_dir = path.join(
|
|
|
|
- pkar.config.fs.ores_path,
|
|
|
|
- checksum:sub(1, 2),
|
|
|
|
- checksum:sub(3, 4))
|
|
|
|
- out_path = path.join(out_dir, checksum:sub(1,32) .. fext)
|
|
|
|
- dir.makepath(out_dir)
|
|
|
|
- logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
|
|
|
|
- dir.movefile(tmp_path, out_path)
|
|
|
|
- rsrc.archive_path = out_path
|
|
|
|
-
|
|
|
|
- -- Copy thumbnail if existing.
|
|
|
|
- if rsrc.thumbnail then
|
|
|
|
- src_path = rsrc.thumbnail
|
|
|
|
- out_path = path.join(
|
|
|
|
- out_dir, path.basename(src_path))
|
|
|
|
- logger:debug(("Moving file %s to %s"):format(src_path, out_path))
|
|
|
|
- dir.movefile(src_path, out_path)
|
|
|
|
- rsrc.thumbnail = out_path
|
|
|
|
- end
|
|
|
|
|
|
+@tparam table rsrc Resource from the SIP parsed by #{generate_sip}.
|
|
|
|
+--]]
|
|
|
|
+local function process_rsrc(rsrc, root_path)
|
|
|
|
+ local in_path, fext
|
|
|
|
+ if not rsrc.source_path then goto continue end
|
|
|
|
+
|
|
|
|
+ in_path = path.join(root_path, rsrc.source_path)
|
|
|
|
+ fext = path.extension(in_path)
|
|
|
|
+ -- If it's a directory, skip file processing.
|
|
|
|
+ if not path.isfile(in_path) then goto continue end
|
|
|
|
+
|
|
|
|
+ do
|
|
|
|
+ local tmp_dir = path.join(pkar.config.fs.ores_path, "tmp/")
|
|
|
|
+ local file_ext
|
|
|
|
+ _, file_ext = path.splitext(in_path)
|
|
|
|
+ local tmp_path = tmp_dir .. rsrc.id .. file_ext
|
|
|
|
+ dir.makepath(tmp_dir)
|
|
|
|
+
|
|
|
|
+ local ifh = assert(io.open(in_path, "r"))
|
|
|
|
+
|
|
|
|
+ rsrc.format = {magic:filehandle(ifh)}
|
|
|
|
+ local hash_it = mc.new_blake2b()
|
|
|
|
+ local fsize = 0
|
|
|
|
+ logger:debug("Hashing ", in_path)
|
|
|
|
+ local ofh = assert(io.open(tmp_path, "w"))
|
|
|
|
+ while true do
|
|
|
|
+ chunk = ifh:read(pkar.config.fs.stream_chunk_size)
|
|
|
|
+ if not chunk then break end
|
|
|
|
+ hash_it:update(chunk)
|
|
|
|
+ ofh:write(chunk)
|
|
|
|
+ fsize = fsize + #chunk
|
|
|
|
+ end
|
|
|
|
+ local checksum = hash_it:final(true)
|
|
|
|
+ rsrc.checksum = {"blake2:" .. checksum}
|
|
|
|
+ rsrc.size = fsize
|
|
|
|
+
|
|
|
|
+ ofh:close()
|
|
|
|
+ ifh:close()
|
|
|
|
+
|
|
|
|
+ -- Copy file and calculate checksum.
|
|
|
|
+ local out_dir, out_path
|
|
|
|
+ out_dir = path.join(
|
|
|
|
+ pkar.config.fs.ores_path,
|
|
|
|
+ checksum:sub(1, 2),
|
|
|
|
+ checksum:sub(3, 4))
|
|
|
|
+ out_path = path.join(out_dir, checksum:sub(1,32) .. fext)
|
|
|
|
+ dir.makepath(out_dir)
|
|
|
|
+ logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
|
|
|
|
+ dir.movefile(tmp_path, out_path)
|
|
|
|
+ rsrc.archive_path = out_path
|
|
|
|
+
|
|
|
|
+ -- Copy thumbnail if existing.
|
|
|
|
+ if rsrc.thumbnail then
|
|
|
|
+ src_path = rsrc.thumbnail
|
|
|
|
+ out_path = path.join(
|
|
|
|
+ out_dir, path.basename(src_path))
|
|
|
|
+ logger:debug(("Moving file %s to %s"):format(src_path, out_path))
|
|
|
|
+ dir.movefile(src_path, out_path)
|
|
|
|
+ rsrc.thumbnail = out_path
|
|
end
|
|
end
|
|
|
|
+ end
|
|
|
|
|
|
- ::continue::
|
|
|
|
|
|
+ ::continue::
|
|
|
|
|
|
- tstamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
|
- rsrc.submitted = tstamp
|
|
|
|
- rsrc.last_modified = tstamp
|
|
|
|
|
|
+ local tstamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
|
+ rsrc.submitted = tstamp
|
|
|
|
+ rsrc.last_modified = tstamp
|
|
|
|
|
|
- local tmp_gr, s
|
|
|
|
- tmp_gr, s = rsrc_to_graph(rsrc)
|
|
|
|
|
|
+ local tmp_gr, s
|
|
|
|
+ tmp_gr, s = rsrc_to_graph(rsrc)
|
|
|
|
|
|
- local val_report = validator.validate(tmp_gr, s)
|
|
|
|
- if val_report.max_level == "ERROR" then error(
|
|
|
|
- "Validation raised errors: " .. pp.write(val_report))
|
|
|
|
- elseif val_report.max_level == "WARN" then logger:warn(
|
|
|
|
- "Validation raised warnings: " .. pp.write(val_report))
|
|
|
|
- elseif val_report.max_level == "NOTICE" then logger:warn(
|
|
|
|
- "Validation raised notices: " .. pp.write(val_report)) end
|
|
|
|
|
|
+ local val_report = validator.validate(tmp_gr, s)
|
|
|
|
+ if val_report.max_level == "ERROR" then error(
|
|
|
|
+ "Validation raised errors: " .. pp.write(val_report))
|
|
|
|
+ elseif val_report.max_level == "WARN" then logger:warn(
|
|
|
|
+ "Validation raised warnings: " .. pp.write(val_report))
|
|
|
|
+ elseif val_report.max_level == "NOTICE" then logger:warn(
|
|
|
|
+ "Validation raised notices: " .. pp.write(val_report)) end
|
|
|
|
|
|
- repo.store_updates(tmp_gr, s)
|
|
|
|
- logger:info("Stored: ", s.data)
|
|
|
|
- end
|
|
|
|
|
|
+ repo.store_updates(tmp_gr, s)
|
|
|
|
+ logger:info("Stored: ", s.data)
|
|
|
|
+end
|
|
|
|
|
|
|
|
+
|
|
|
|
+local function generate_report(rpath, report)
|
|
|
|
+ report.timestamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
|
+
|
|
|
|
+ if rpath then
|
|
|
|
+ local fh = io.open(rpath, "w")
|
|
|
|
+ fh:write(json.encode(report))
|
|
|
|
+ fh:close()
|
|
|
|
+ return true
|
|
|
|
+ else return report end
|
|
|
|
+end
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+local function add_sub_meta()
|
|
-- Add triples for submission metadata directly to the stored graph.
|
|
-- Add triples for submission metadata directly to the stored graph.
|
|
local it = repo.gr:add_init()
|
|
local it = repo.gr:add_init()
|
|
local sub_uri = term.new_iriref_ns(sub_id)
|
|
local sub_uri = term.new_iriref_ns(sub_id)
|
|
@@ -448,7 +463,7 @@ M.deposit = function(ll_path, cleanup)
|
|
term.new_lit(sub_name)
|
|
term.new_lit(sub_name)
|
|
))
|
|
))
|
|
end
|
|
end
|
|
- tstamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
|
|
|
+ local tstamp = os.date("!%Y-%m-%dT%TZ")
|
|
it:add_iter(triple.new(
|
|
it:add_iter(triple.new(
|
|
sub_uri,
|
|
sub_uri,
|
|
model.id_to_uri.submitted,
|
|
model.id_to_uri.submitted,
|
|
@@ -460,26 +475,116 @@ M.deposit = function(ll_path, cleanup)
|
|
term.new_lit(tstamp, "xsd:dateTime", nil, true)
|
|
term.new_lit(tstamp, "xsd:dateTime", nil, true)
|
|
))
|
|
))
|
|
it:add_done()
|
|
it:add_done()
|
|
|
|
+end
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+local function cleanup_src(sip)
|
|
|
|
+ -- Gather all top-level directories and delete them.
|
|
|
|
+ rsrc_paths = {}
|
|
|
|
+ for i, rsrc in ipairs(sip) do
|
|
|
|
+ rsrc_paths[rsrc.source_path:match("[^/]+")] = true
|
|
|
|
+ end
|
|
|
|
+ for tlf in pairs(rsrc_paths) do
|
|
|
|
+ local target = path.join(sip.root_path, tlf)
|
|
|
|
+ logger:info("Cleaning up: " .. target)
|
|
|
|
+ if path.isdir(target) then dir.rmtree(target)
|
|
|
|
+ elseif path.isfile(target) then file.delete(target) end
|
|
|
|
+ end
|
|
|
|
+ logger:info("Cleaning up: " .. ll_path)
|
|
|
|
+ file.delete(ll_path)
|
|
|
|
+end
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+--[[--
|
|
|
|
+Deposit resources from a SIP.
|
|
|
|
+@tparam string ll_path Path of the laundry list. All SIP source references are
|
|
|
|
+ relative to this path.
|
|
|
|
+@tparam boolean cleanup If set to `true`, it removes the SIP if the submission
|
|
|
|
+ is successful.
|
|
|
|
+@param string report_path Optional path for the report file.
|
|
|
|
+
|
|
|
|
+@treturn bool Whether the process completed successfully.
|
|
|
|
+--]]
|
|
|
|
+M.deposit = function(ll_path, cleanup, report_path)
|
|
|
|
+ local sip, rc, ret
|
|
|
|
+
|
|
|
|
+ rc, ret = xpcall(generate_sip, debug.traceback, ll_path)
|
|
|
|
+ if rc then sip = ret
|
|
|
|
+ else
|
|
|
|
+ return generate_report(report_path, {
|
|
|
|
+ result = "failure",
|
|
|
|
+ message = "An error occurred while parsing the SIP.",
|
|
|
|
+ traceback = ret,
|
|
|
|
+ metadata = {
|
|
|
|
+ ll = ll_path,
|
|
|
|
+ },
|
|
|
|
+ })
|
|
|
|
+ end
|
|
|
|
+
|
|
|
|
+ for i, rsrc in ipairs(sip) do
|
|
|
|
+ -- TODO Wrap this chunk into a txn. Each row is atomic.
|
|
|
|
+ logger:debug(("Processing resource #%d of %d: %s"):format(
|
|
|
|
+ i, #sip, rsrc.id))
|
|
|
|
+
|
|
|
|
+ local rc, ret = xpcall(
|
|
|
|
+ process_rsrc,
|
|
|
|
+ debug.traceback,
|
|
|
|
+ rsrc, sip.root_path)
|
|
|
|
+ if not rc then
|
|
|
|
+ return generate_report(report_path, {
|
|
|
|
+ result = "failure",
|
|
|
|
+ message = "An error occurred while processing a resource \z
|
|
|
|
+ in the SIP.",
|
|
|
|
+ traceback = ret,
|
|
|
|
+ metadata = {
|
|
|
|
+ rsrc_id = rsrc.id,
|
|
|
|
+ source_path = rsrc.source_path,
|
|
|
|
+ },
|
|
|
|
+ })
|
|
|
|
+ end
|
|
|
|
+ end
|
|
|
|
+
|
|
|
|
+ rc, ret = xpcall(add_sub_meta, debug.traceback)
|
|
|
|
+ if not rc then
|
|
|
|
+ return generate_report(report_path, {
|
|
|
|
+ result = "failure",
|
|
|
|
+ message = "An error occurred while updating submission metadata.",
|
|
|
|
+ traceback = ret,
|
|
|
|
+ metadata = {
|
|
|
|
+ sub_id = sub_id,
|
|
|
|
+ },
|
|
|
|
+ })
|
|
|
|
+ end
|
|
|
|
+
|
|
|
|
+ -- After this point, the outcome is either `success` or `warnings`.
|
|
|
|
|
|
-- Remove processing directory.
|
|
-- Remove processing directory.
|
|
local proc_dir = path.join(sip.root_path, "proc")
|
|
local proc_dir = path.join(sip.root_path, "proc")
|
|
if path.isdir(proc_dir) then dir.rmtree(proc_dir) end
|
|
if path.isdir(proc_dir) then dir.rmtree(proc_dir) end
|
|
|
|
|
|
|
|
+ local report = {
|
|
|
|
+ metadata = {
|
|
|
|
+ sub_id = sub_id,
|
|
|
|
+ resources = {},
|
|
|
|
+ },
|
|
|
|
+ }
|
|
|
|
+
|
|
if cleanup then
|
|
if cleanup then
|
|
- -- Gather all top-level directories and delete them.
|
|
|
|
- rsrc_paths = {}
|
|
|
|
- for i, rsrc in ipairs(sip) do
|
|
|
|
- rsrc_paths[rsrc.source_path:match("[^/]+")] = true
|
|
|
|
- end
|
|
|
|
- for tlf in pairs(rsrc_paths) do
|
|
|
|
- local target = path.join(sip.root_path, tlf)
|
|
|
|
- logger:info("Cleaning up: " .. target)
|
|
|
|
- if path.isdir(target) then dir.rmtree(target)
|
|
|
|
- elseif path.isfile(target) then file.delete(target) end
|
|
|
|
|
|
+ rc, ret = xpcall(cleanup_src, debug.traceback, sip)
|
|
|
|
+ if not rc then
|
|
|
|
+ report.result = "warnings"
|
|
|
|
+ report.message = "An error occurred while deleting the SIP source."
|
|
end
|
|
end
|
|
- logger:info("Cleaning up: " .. ll_path)
|
|
|
|
- file.delete(ll_path)
|
|
|
|
end
|
|
end
|
|
|
|
+
|
|
|
|
+ report.result = "success"
|
|
|
|
+ report.message = "The SIP has been successfully submitted."
|
|
|
|
+
|
|
|
|
+ for _, rsrc in ipairs(sip) do
|
|
|
|
+ report.metadata.resources[rsrc.id] = rsrc.source_path
|
|
|
|
+ end
|
|
|
|
+
|
|
|
|
+ return generate_report(report_path, report)
|
|
end
|
|
end
|
|
|
|
|
|
|
|
|