|
@@ -5,7 +5,7 @@ end storage, which is called here but defined in the repo module).
|
|
|
|
|
|
The deposit process is carried out in several steps:
|
|
|
|
|
|
-- SIP generation (`generate_sip()`): scans the laundry list CSV and builds a
|
|
|
+- SIP generation (`parse_ll()`): scans the laundry list CSV and builds a
|
|
|
temporary data structure with the found metadata; generates unique IDs for
|
|
|
resources; infers some implicit relationships from the position of the CSV
|
|
|
rows and folder layout; adds system-controlled metadata.
|
|
@@ -46,15 +46,6 @@ local logger = pkar.logger
|
|
|
-- "nil" table - for missing key fallback in chaining.
|
|
|
local NT = {}
|
|
|
|
|
|
--- Local path to URI mapping. For linking between newly created resources.
|
|
|
-local path_to_uri
|
|
|
-
|
|
|
--- Track IDs in SIP to validate links created in a submission.
|
|
|
-local sip_ids
|
|
|
-
|
|
|
--- Submission ID and name.
|
|
|
-local sub_id, sub_name
|
|
|
-
|
|
|
-- Initialize libmagic database.
|
|
|
local magic = libmagic.open(libmagic.MIME_TYPE, libmagic.NO_CHECK_COMPRESS )
|
|
|
assert(magic:load())
|
|
@@ -73,7 +64,7 @@ Generate a random, reader-friendly ID.
|
|
|
A 16-character ID with the above defined #chpool of 60 smybols has an entropy
|
|
|
of 94.5 bits, which should be plenty for a medium-sized repository.
|
|
|
]]
|
|
|
-local function idgen(len)
|
|
|
+function idgen(len)
|
|
|
local charlist = {}
|
|
|
for i = 1, (len or pkar.config.id.len) do
|
|
|
table.insert(charlist, string.char(chpool[math.random(1, #chpool)]))
|
|
@@ -83,24 +74,21 @@ local function idgen(len)
|
|
|
end
|
|
|
|
|
|
|
|
|
-local function generate_sip(ll_path)
|
|
|
- if not path.isfile(ll_path) then error(ll_path .. " is not a file.", 2) end
|
|
|
-
|
|
|
- -- Submission ID sticks to all the resources.
|
|
|
- sub_id = "sub:" .. idgen()
|
|
|
- sub_name = ll_path:match("pkar_submission[%-_%.](.*)%.csv")
|
|
|
+--[[--
|
|
|
+Parse laundry list and generate the basic SIP.
|
|
|
|
|
|
- local sip = {root_path = path.dirname(ll_path)}
|
|
|
- path_to_uri = {}
|
|
|
- sip_ids = {}
|
|
|
+@tparam Submission sub Submission object to populate.
|
|
|
|
|
|
- local tn_dir = path.join(sip.root_path, "proc", "tn")
|
|
|
+@return true on success; false, error report on failure.
|
|
|
+]]
|
|
|
+local function parse_ll(sub)
|
|
|
+ local tn_dir = path.join(sub.root_path, "proc", "tn")
|
|
|
dir.makepath(tn_dir)
|
|
|
|
|
|
local prev_id
|
|
|
|
|
|
local i = 0
|
|
|
- for row_n, row in csv.parseLine(ll_path) do
|
|
|
+ for row_n, row in csv.parseLine(sub.ll_path) do
|
|
|
local has_content
|
|
|
for k, v in pairs(row) do
|
|
|
-- Change "" to nil.
|
|
@@ -119,22 +107,23 @@ local function generate_sip(ll_path)
|
|
|
("Processing LL resource #%d at row #%d.")
|
|
|
:format(i, row_n))
|
|
|
|
|
|
- sip[i] = {
|
|
|
+ sub.sip[i] = {
|
|
|
-- Normalize provided ID or generate random ID if not provided.
|
|
|
id = "par:" .. (row.id or idgen()),
|
|
|
- sub_id = sub_id,
|
|
|
+ sub_id = sub.id,
|
|
|
+ sub = sub,
|
|
|
}
|
|
|
prev_id = row.id
|
|
|
- sip_ids[sip[i].id] = true -- Add to common sip ID set.
|
|
|
+ sub.ids[sub.sip[i].id] = true -- Add to common SIP ID set.
|
|
|
for k, v in pairs(row) do
|
|
|
if not v or k == "id" then goto cont1 end -- skip empty strings.
|
|
|
- if pkar.config.md.single_values[k] then sip[i][k] = v
|
|
|
+ if pkar.config.md.single_values[k] then sub.sip[i][k] = v
|
|
|
-- Multi-values are ordered in the SIP for further processing.
|
|
|
- else sip[i][k] = {v} end
|
|
|
+ else sub.sip[i][k] = {v} end
|
|
|
::cont1::
|
|
|
end
|
|
|
-- Add to path to URI map for later referencing.
|
|
|
- path_to_uri[row.source_path] = sip[i].id
|
|
|
+ sub.path_to_uri[row.source_path] = sub.sip[i].id
|
|
|
else
|
|
|
-- Continuation of values from a previous row.
|
|
|
if i < 1 then
|
|
@@ -151,7 +140,7 @@ local function generate_sip(ll_path)
|
|
|
else
|
|
|
logger:debug("Value: ", v)
|
|
|
logger:debug("Inserting at row ", i - 1)
|
|
|
- table.insert(sip[i][k], v)
|
|
|
+ table.insert(sub.sip[i][k], v)
|
|
|
end
|
|
|
::cont2::
|
|
|
end
|
|
@@ -161,63 +150,90 @@ local function generate_sip(ll_path)
|
|
|
::skip::
|
|
|
row_n = row_n + 1
|
|
|
end
|
|
|
- -- Infer structure from paths and row ordering.
|
|
|
- for i, v in ipairs(sip) do
|
|
|
- local rmod = model.types[v.content_type]
|
|
|
- --require "debugger".assert(rmod)
|
|
|
- local fpath = path.join(sip.root_path, v.source_path)
|
|
|
- --dbg.assert(rmod)
|
|
|
- v.has_member = v.has_member or {}
|
|
|
- -- Create implicit members from single-file artifact.
|
|
|
- if rmod.types.artifact and path.isfile(fpath) then
|
|
|
- local file_id = "par:" .. idgen()
|
|
|
- sip_ids[file_id] = true
|
|
|
- -- Insert file resource and move it into a new sub-folder.
|
|
|
- table.insert(sip, {
|
|
|
- content_type = rmod.default_fmodel or "file",
|
|
|
- id = file_id,
|
|
|
- sub_id = sub_id,
|
|
|
- label = path.basename(v.source_path),
|
|
|
- source_path = v.source_path,
|
|
|
- })
|
|
|
- sip[i].has_file = file_id
|
|
|
- sip[i].pref_rep = file_id
|
|
|
- sip[i].source_path = nil
|
|
|
- goto skip
|
|
|
- end
|
|
|
- for j = i + 1, #sip do
|
|
|
- if sip[j].source_path:match("^" .. pkar.escape_ptn(v.source_path))
|
|
|
- then
|
|
|
- local rel_path = sip[j].source_path:sub(#v.source_path + 2)
|
|
|
- logger:debug("rel_path: " .. rel_path)
|
|
|
- if not rel_path:match("/") then
|
|
|
- logger:debug(("Adding member %s to %s"):format(
|
|
|
- rel_path, v.source_path))
|
|
|
- table.insert(v.has_member, sip[j].id)
|
|
|
- end
|
|
|
- end
|
|
|
- end
|
|
|
- ::skip::
|
|
|
- end
|
|
|
- logger:debug("Parsed SIP: ", pp.write(sip))
|
|
|
|
|
|
- return sip
|
|
|
+ return true
|
|
|
end
|
|
|
|
|
|
|
|
|
---[[ Convert a SIP resource table to an in-memory Volksdata graph.
|
|
|
+--[[--
|
|
|
+Process a file resource.
|
|
|
|
|
|
---]]
|
|
|
+During this step, the file is moved to a staging area, its checksum is
|
|
|
+calculated, and some more technical metadata are extracted and added to the
|
|
|
+file's D-Res.
|
|
|
+
|
|
|
+@tparam table rsrc Resource table to be updated.
|
|
|
+]]
|
|
|
+local function process_file(rsrc)
|
|
|
+ local src_path = path.join(rsrc.sub.root_path, rsrc.source_path)
|
|
|
+ local tmp_dir = path.join(pkar.config.fs.ores_path, "tmp/")
|
|
|
+ local fext
|
|
|
+ _, fext = path.splitext(src_path)
|
|
|
+ local tmp_path = tmp_dir .. rsrc.id .. fext
|
|
|
+ dir.makepath(tmp_dir)
|
|
|
+
|
|
|
+ local ifh = io.open(src_path, "r")
|
|
|
+ if not ifh then
|
|
|
+ end
|
|
|
+
|
|
|
+ rsrc.format = {magic:filehandle(ifh)}
|
|
|
+ local hash_it = mc.new_blake2b()
|
|
|
+ local fsize = 0
|
|
|
+ logger:debug("Hashing ", src_path)
|
|
|
+ local ofh = assert(io.open(tmp_path, "w"))
|
|
|
+ while true do
|
|
|
+ chunk = ifh:read(pkar.config.fs.stream_chunk_size)
|
|
|
+ if not chunk then break end
|
|
|
+ hash_it:update(chunk)
|
|
|
+ ofh:write(chunk)
|
|
|
+ fsize = fsize + #chunk
|
|
|
+ end
|
|
|
+ local checksum = hash_it:final(true)
|
|
|
+ rsrc.checksum = {"blake2:" .. checksum}
|
|
|
+ rsrc.size = fsize
|
|
|
+
|
|
|
+ ofh:close()
|
|
|
+ ifh:close()
|
|
|
+
|
|
|
+ -- Copy file and calculate checksum.
|
|
|
+ local out_dir, out_path
|
|
|
+ out_dir = path.join(
|
|
|
+ pkar.config.fs.ores_path,
|
|
|
+ checksum:sub(1, 2),
|
|
|
+ checksum:sub(3, 4))
|
|
|
+ out_path = path.join(out_dir, checksum:sub(1,32) .. fext)
|
|
|
+ dir.makepath(out_dir)
|
|
|
+ logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
|
|
|
+ dir.movefile(tmp_path, out_path)
|
|
|
+ rsrc.archive_path = out_path
|
|
|
+
|
|
|
+ -- Copy thumbnail if existing.
|
|
|
+ if rsrc.thumbnail then
|
|
|
+ src_path = rsrc.thumbnail
|
|
|
+ out_path = path.join(
|
|
|
+ out_dir, path.basename(src_path))
|
|
|
+ logger:debug(("Moving file %s to %s"):format(src_path, out_path))
|
|
|
+ dir.movefile(src_path, out_path)
|
|
|
+ rsrc.thumbnail = out_path
|
|
|
+ end
|
|
|
+
|
|
|
+ return true
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+--[[--
|
|
|
+Convert a SIP resource table to an in-memory Volksdata graph.
|
|
|
+]]
|
|
|
local function rsrc_to_graph(rsrc)
|
|
|
local rmod = model.types[rsrc.content_type]
|
|
|
- logger:debug("Updating resource md: ", pp.write(rsrc))
|
|
|
+ --logger:debug("Updating resource md: ", pp.write(rsrc))
|
|
|
|
|
|
local s = term.new_iriref_ns(rsrc.id)
|
|
|
- local gr = graph.new(nil)
|
|
|
+ local skip_props = {id = true, sub = true}
|
|
|
|
|
|
- it = gr:add_init()
|
|
|
+ it = rsrc.sub.gr:add_init()
|
|
|
for prop, v in pairs(rsrc) do
|
|
|
- if prop == "id" then goto skip end
|
|
|
+ if skip_props[prop] then goto skip end
|
|
|
logger:debug(("Adding attribute: %s = %s"):format(prop, pp.write(v)))
|
|
|
local p = model.id_to_uri[prop]
|
|
|
if not p then
|
|
@@ -247,7 +263,7 @@ local function rsrc_to_graph(rsrc)
|
|
|
-- "par:" could have been added previously.
|
|
|
local rel_id = "par:" .. vv:gsub("^par:", "")
|
|
|
if
|
|
|
- not sip_ids[rel_id]
|
|
|
+ not rsrc.sub.ids[rel_id]
|
|
|
and not repo.gr:contains(triple.new(
|
|
|
term.new_iriref_ns(rel_id),
|
|
|
pkar.RDF_TYPE,
|
|
@@ -255,7 +271,7 @@ local function rsrc_to_graph(rsrc)
|
|
|
))
|
|
|
then
|
|
|
-- Convert local path to URIs.
|
|
|
- local uri = path_to_uri[vv]
|
|
|
+ local uri = rsrc.sub.path_to_uri[vv]
|
|
|
if not uri then error(
|
|
|
("Not a valid path: %s for property: %s on res: %s")
|
|
|
:format(vv, prop, rsrc.id))
|
|
@@ -280,7 +296,7 @@ local function rsrc_to_graph(rsrc)
|
|
|
-- Add linked list proxies.
|
|
|
local brick_id = "par:" .. idgen()
|
|
|
local brick_uri = term.new_iriref_ns(brick_id)
|
|
|
- sip_ids[brick_id] = true
|
|
|
+ rsrc.sub.ids[brick_id] = true
|
|
|
if i == 1 then
|
|
|
proxy_s = s
|
|
|
it:add_iter(triple.new(
|
|
@@ -292,7 +308,8 @@ local function rsrc_to_graph(rsrc)
|
|
|
term.new_iriref_ns(pconf.uri),
|
|
|
term.new_iriref_ns(vv)))
|
|
|
else
|
|
|
- it:add_iter(triple.new(proxy_s, model.id_to_uri.next, brick_uri))
|
|
|
+ it:add_iter(triple.new(
|
|
|
+ proxy_s, model.id_to_uri.next, brick_uri))
|
|
|
end
|
|
|
-- Add the reference.
|
|
|
-- Add basic triples.
|
|
@@ -307,6 +324,22 @@ local function rsrc_to_graph(rsrc)
|
|
|
brick_uri,
|
|
|
model.id_to_uri.content_type,
|
|
|
term.new_iriref_ns("pas:Brick")))
|
|
|
+ it:add_iter(triple.new(
|
|
|
+ brick_uri,
|
|
|
+ model.id_to_uri.sub_id,
|
|
|
+ rsrc.sub.uri))
|
|
|
+
|
|
|
+ local tstamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
+ it:add_iter(triple.new(
|
|
|
+ brick_uri,
|
|
|
+ model.id_to_uri.submitted,
|
|
|
+ term.new_lit(tstamp, "xsd:dateTime", nil, true)
|
|
|
+ ))
|
|
|
+ it:add_iter(triple.new(
|
|
|
+ brick_uri,
|
|
|
+ model.id_to_uri.last_modified,
|
|
|
+ term.new_lit(tstamp, "xsd:dateTime", nil, true)
|
|
|
+ ))
|
|
|
-- Add reference.
|
|
|
it:add_iter(triple.new(
|
|
|
brick_uri,
|
|
@@ -320,6 +353,9 @@ local function rsrc_to_graph(rsrc)
|
|
|
::skip::
|
|
|
end
|
|
|
|
|
|
+ -- Add submission ID.
|
|
|
+ it:add_iter(triple.new(s, model.id_to_uri.sub_id, rsrc.sub.uri))
|
|
|
+
|
|
|
-- Add resource lineage triples.
|
|
|
for i, m in ipairs(rmod.lineage) do
|
|
|
it:add_iter(triple.new(
|
|
@@ -328,150 +364,123 @@ local function rsrc_to_graph(rsrc)
|
|
|
end
|
|
|
it:add_done()
|
|
|
|
|
|
- return gr, s
|
|
|
+ return s
|
|
|
end
|
|
|
|
|
|
|
|
|
--- Submission module.
|
|
|
-local M = {
|
|
|
- idgen = idgen,
|
|
|
- reset_ores = function()
|
|
|
- if path.isdir(pkar.config.fs.ores_path) then
|
|
|
- logger:warn("Removing existing opaque resource store.")
|
|
|
- dir.rmtree(pkar.config.fs.ores_path)
|
|
|
- end
|
|
|
- dir.makepath(pkar.config.fs.ores_path)
|
|
|
- end,
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
--[[--
|
|
|
Process SIP files and metadata.
|
|
|
-During this step, files are moved to a staging area, their checksums are
|
|
|
-calculated, and some more technical metadata are extracted and added to the
|
|
|
-D-Res.
|
|
|
|
|
|
-@tparam table rsrc Resource from the SIP parsed by #{generate_sip}.
|
|
|
+@tparam table rsrc Resource from the SIP parsed by #{parse_ll}.
|
|
|
+@tparam integer i Position in the SIP. Used to look ahead for implicit members
|
|
|
+ by path.
|
|
|
--]]
|
|
|
-local function process_rsrc(rsrc, root_path)
|
|
|
- local in_path, fext
|
|
|
- if not rsrc.source_path then goto continue end
|
|
|
-
|
|
|
- in_path = path.join(root_path, rsrc.source_path)
|
|
|
- fext = path.extension(in_path)
|
|
|
- -- If it's a directory, skip file processing.
|
|
|
- if not path.isfile(in_path) then goto continue end
|
|
|
-
|
|
|
- do
|
|
|
- local tmp_dir = path.join(pkar.config.fs.ores_path, "tmp/")
|
|
|
- local file_ext
|
|
|
- _, file_ext = path.splitext(in_path)
|
|
|
- local tmp_path = tmp_dir .. rsrc.id .. file_ext
|
|
|
- dir.makepath(tmp_dir)
|
|
|
-
|
|
|
- local ifh = assert(io.open(in_path, "r"))
|
|
|
-
|
|
|
- rsrc.format = {magic:filehandle(ifh)}
|
|
|
- local hash_it = mc.new_blake2b()
|
|
|
- local fsize = 0
|
|
|
- logger:debug("Hashing ", in_path)
|
|
|
- local ofh = assert(io.open(tmp_path, "w"))
|
|
|
- while true do
|
|
|
- chunk = ifh:read(pkar.config.fs.stream_chunk_size)
|
|
|
- if not chunk then break end
|
|
|
- hash_it:update(chunk)
|
|
|
- ofh:write(chunk)
|
|
|
- fsize = fsize + #chunk
|
|
|
- end
|
|
|
- local checksum = hash_it:final(true)
|
|
|
- rsrc.checksum = {"blake2:" .. checksum}
|
|
|
- rsrc.size = fsize
|
|
|
-
|
|
|
- ofh:close()
|
|
|
- ifh:close()
|
|
|
-
|
|
|
- -- Copy file and calculate checksum.
|
|
|
- local out_dir, out_path
|
|
|
- out_dir = path.join(
|
|
|
- pkar.config.fs.ores_path,
|
|
|
- checksum:sub(1, 2),
|
|
|
- checksum:sub(3, 4))
|
|
|
- out_path = path.join(out_dir, checksum:sub(1,32) .. fext)
|
|
|
- dir.makepath(out_dir)
|
|
|
- logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
|
|
|
- dir.movefile(tmp_path, out_path)
|
|
|
- rsrc.archive_path = out_path
|
|
|
-
|
|
|
- -- Copy thumbnail if existing.
|
|
|
- if rsrc.thumbnail then
|
|
|
- src_path = rsrc.thumbnail
|
|
|
- out_path = path.join(
|
|
|
- out_dir, path.basename(src_path))
|
|
|
- logger:debug(("Moving file %s to %s"):format(src_path, out_path))
|
|
|
- dir.movefile(src_path, out_path)
|
|
|
- rsrc.thumbnail = out_path
|
|
|
+local function process_rsrc(rsrc, i)
|
|
|
+ local rmod = model.types[rsrc.content_type]
|
|
|
+ --require "debugger".assert(rmod)
|
|
|
+
|
|
|
+ -- BEGIN file check and processing.
|
|
|
+ if rmod.types.file then
|
|
|
+ if rsrc.source_path then
|
|
|
+ local in_path = path.join(rsrc.sub.root_path, rsrc.source_path)
|
|
|
+ if path.isfile(in_path) then process_file(rsrc, in_path)
|
|
|
+ else error(
|
|
|
+ "File type provided for " .. rsrc.id ..
|
|
|
+ "but the source path " .. in_path .. " is not a file.")
|
|
|
+ end
|
|
|
+ else
|
|
|
+ -- If it's a file and no path is provided, look for it in the repo.
|
|
|
+ -- This will obviously fail if no ID was provided either, and
|
|
|
+ -- a random one was just generated.
|
|
|
+ if repo.gr:contains(triple.new(
|
|
|
+ term.new_iriref_ns("par:" .. rsrc.id),
|
|
|
+ pkar.RDF_TYPE,
|
|
|
+ model.id_to_uri("file")
|
|
|
+ )) then
|
|
|
+ -- File is in the repo. This is a metadata-only update.
|
|
|
+ logger:info("Metadata-only update on file: " .. rsrc.id)
|
|
|
+ else
|
|
|
+ -- No file found. That's an error.
|
|
|
+ error(
|
|
|
+ "No source path was provided and no file with this ID \z
|
|
|
+ was found in the archive: " .. rsrc.id)
|
|
|
+ end
|
|
|
end
|
|
|
end
|
|
|
+ -- END file processing.
|
|
|
|
|
|
- ::continue::
|
|
|
+ ::skip_file_proc::
|
|
|
|
|
|
local tstamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
rsrc.submitted = tstamp
|
|
|
rsrc.last_modified = tstamp
|
|
|
|
|
|
- local tmp_gr, s
|
|
|
- tmp_gr, s = rsrc_to_graph(rsrc)
|
|
|
-
|
|
|
- local val_report = validator.validate(tmp_gr, s)
|
|
|
- if val_report.max_level == "ERROR" then error(
|
|
|
- "Validation raised errors: " .. pp.write(val_report))
|
|
|
- elseif val_report.max_level == "WARN" then logger:warn(
|
|
|
- "Validation raised warnings: " .. pp.write(val_report))
|
|
|
- elseif val_report.max_level == "NOTICE" then logger:warn(
|
|
|
- "Validation raised notices: " .. pp.write(val_report)) end
|
|
|
-
|
|
|
- repo.store_updates(tmp_gr, s)
|
|
|
- logger:info("Stored: ", s.data)
|
|
|
-end
|
|
|
-
|
|
|
-
|
|
|
-local function generate_report(rpath, report)
|
|
|
- report.timestamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
+ -- Infer structure from paths and row ordering.
|
|
|
+ --require "debugger".assert(rmod)
|
|
|
+ local fpath = path.join(rsrc.sub.root_path, rsrc.source_path)
|
|
|
+ rsrc.has_member = rsrc.has_member or {}
|
|
|
+ -- Create implicit members from single-file artifact.
|
|
|
+ if rmod.types.artifact and path.isfile(fpath) then
|
|
|
+ local file_id = "par:" .. idgen()
|
|
|
+ rsrc.sub.ids[file_id] = true
|
|
|
+ -- Insert file resource. It will be processed as part of the sip table.
|
|
|
+ table.insert(rsrc.sub.sip, {
|
|
|
+ content_type = rmod.default_fmodel or "file",
|
|
|
+ id = file_id,
|
|
|
+ sub_id = rsrc.sub.id,
|
|
|
+ label = path.basename(rsrc.source_path),
|
|
|
+ source_path = rsrc.source_path,
|
|
|
+ })
|
|
|
+ rsrc.has_file = file_id
|
|
|
+ rsrc.pref_rep = file_id
|
|
|
+ rsrc.source_path = nil
|
|
|
+ goto skip
|
|
|
+ end
|
|
|
+ ::skip::
|
|
|
+
|
|
|
+ -- Look ahead for resources under this container and add membership.
|
|
|
+ for j = i + 1, #rsrc.sub.sip do
|
|
|
+ if rsrc.sub.sip[j].source_path:match(
|
|
|
+ "^" .. pkar.escape_ptn(rsrc.source_path))
|
|
|
+ then
|
|
|
+ local rel_path = rsrc.sub.sip[j].source_path:sub(
|
|
|
+ #rsrc.source_path + 2)
|
|
|
+ logger:debug("rel_path: " .. rel_path)
|
|
|
+ if not rel_path:match("/") then
|
|
|
+ logger:debug(("Adding member %s to %s"):format(
|
|
|
+ rel_path, rsrc.source_path))
|
|
|
+ table.insert(rsrc.has_member, rsrc.sub.sip[j].id)
|
|
|
+ end
|
|
|
+ end
|
|
|
+ end
|
|
|
|
|
|
- if rpath then
|
|
|
- local fh = io.open(rpath, "w")
|
|
|
- fh:write(json.encode(report))
|
|
|
- fh:close()
|
|
|
- logger:info("Report written to " .. rpath)
|
|
|
- return true
|
|
|
- else return report end
|
|
|
+ return rsrc_to_graph(rsrc)
|
|
|
end
|
|
|
|
|
|
|
|
|
-local function add_sub_meta()
|
|
|
+local function add_sub_meta(sub)
|
|
|
-- Add triples for submission metadata directly to the stored graph.
|
|
|
local it = repo.gr:add_init()
|
|
|
- local sub_uri = term.new_iriref_ns(sub_id)
|
|
|
it:add_iter(triple.new(
|
|
|
- sub_uri,
|
|
|
+ sub.uri,
|
|
|
pkar.RDF_TYPE,
|
|
|
term.new_iriref_ns("par:Submission")
|
|
|
))
|
|
|
- if sub_name then
|
|
|
+ if sub.name then
|
|
|
it:add_iter(triple.new(
|
|
|
- sub_uri,
|
|
|
+ sub.uri,
|
|
|
term.new_iriref_ns("rdfs:label"),
|
|
|
- term.new_lit(sub_name)
|
|
|
+ term.new_lit(sub.name)
|
|
|
))
|
|
|
end
|
|
|
local tstamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
it:add_iter(triple.new(
|
|
|
- sub_uri,
|
|
|
+ sub.uri,
|
|
|
model.id_to_uri.submitted,
|
|
|
term.new_lit(tstamp, "xsd:dateTime", nil, true)
|
|
|
))
|
|
|
it:add_iter(triple.new(
|
|
|
- sub_uri,
|
|
|
+ sub.uri,
|
|
|
model.id_to_uri.last_modified,
|
|
|
term.new_lit(tstamp, "xsd:dateTime", nil, true)
|
|
|
))
|
|
@@ -479,58 +488,105 @@ local function add_sub_meta()
|
|
|
end
|
|
|
|
|
|
|
|
|
-local function cleanup_src(sip)
|
|
|
+local function generate_report(rpath, report)
|
|
|
+ report.timestamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
+
|
|
|
+ if rpath then
|
|
|
+ local fh = io.open(rpath, "w")
|
|
|
+ fh:write(json.encode(report))
|
|
|
+ fh:close()
|
|
|
+ logger:info("Report written to " .. rpath)
|
|
|
+ return true
|
|
|
+ else return report end
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+local function cleanup_src(sub)
|
|
|
-- Gather all top-level directories and delete them.
|
|
|
rsrc_paths = {}
|
|
|
- for i, rsrc in ipairs(sip) do
|
|
|
+ for i, rsrc in ipairs(sub.sip) do
|
|
|
rsrc_paths[rsrc.source_path:match("[^/]+")] = true
|
|
|
end
|
|
|
for tlf in pairs(rsrc_paths) do
|
|
|
- local target = path.join(sip.root_path, tlf)
|
|
|
+ local target = path.join(sub.root_path, tlf)
|
|
|
logger:info("Cleaning up: " .. target)
|
|
|
if path.isdir(target) then dir.rmtree(target)
|
|
|
elseif path.isfile(target) then file.delete(target) end
|
|
|
end
|
|
|
- logger:info("Cleaning up: " .. ll_path)
|
|
|
+ logger:info("Cleaning up: " .. sub.ll_path)
|
|
|
file.delete(ll_path)
|
|
|
end
|
|
|
|
|
|
|
|
|
+--
|
|
|
+-- Public class & members
|
|
|
+--
|
|
|
+
|
|
|
+local Submission = {}
|
|
|
+
|
|
|
+
|
|
|
--[[--
|
|
|
-Deposit resources from a SIP.
|
|
|
-@tparam string ll_path Path of the laundry list. All SIP source references are
|
|
|
- relative to this path.
|
|
|
-@tparam boolean cleanup If set to `true`, it removes the SIP if the submission
|
|
|
- is successful.
|
|
|
-@param string report_path Optional path for the report file.
|
|
|
+Create a new submission.
|
|
|
|
|
|
-@treturn bool Whether the process completed successfully.
|
|
|
---]]
|
|
|
-M.deposit = function(ll_path, cleanup, report_path)
|
|
|
- local sip, rc, ret
|
|
|
+@tparam string ll_path Path of laundry list file.
|
|
|
|
|
|
- rc, ret = xpcall(generate_sip, debug.traceback, ll_path)
|
|
|
- if rc then sip = ret
|
|
|
- else
|
|
|
- return generate_report(report_path, {
|
|
|
+@treturn table New Submission object.
|
|
|
+]]
|
|
|
+function Submission:new (ll_path, report_path)
|
|
|
+ -- TODO generate a report.
|
|
|
+ if not path.isfile(ll_path) then error(ll_path .. " is not a file.", 2) end
|
|
|
+
|
|
|
+ local sub = {
|
|
|
+ root_path = path.dirname(ll_path),
|
|
|
+ ll_path = ll_path,
|
|
|
+ report_path = report_path,
|
|
|
+ id = "sub:" .. idgen(),
|
|
|
+ name = ll_path:match("pkar_submission[%-_%.](.*)%.csv"),
|
|
|
+ sip = {},
|
|
|
+ gr = graph.new(),
|
|
|
+ -- Local path to URI mapping.
|
|
|
+ path_to_uri = {},
|
|
|
+ -- Track IDs in SIP to validate links created in a submission.
|
|
|
+ ids = {},
|
|
|
+ }
|
|
|
+ sub.uri = term.new_iriref_ns(sub.id)
|
|
|
+ self.__index = self
|
|
|
+ setmetatable(sub, self)
|
|
|
+
|
|
|
+ local rc, ret = xpcall(parse_ll, debug.traceback, sub)
|
|
|
+ if not rc then
|
|
|
+ return nil, generate_report(report_path, {
|
|
|
result = "failure",
|
|
|
message = "An error occurred while parsing the SIP.",
|
|
|
traceback = ret,
|
|
|
metadata = {
|
|
|
- ll = ll_path,
|
|
|
+ ll = sub.ll_path,
|
|
|
},
|
|
|
})
|
|
|
end
|
|
|
|
|
|
- for i, rsrc in ipairs(sip) do
|
|
|
- -- TODO Wrap this chunk into a txn. Each row is atomic.
|
|
|
+ return sub
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+--[[--
|
|
|
+Deposit resources from a SIP.
|
|
|
+@tparam string ll_path Path of the laundry list. All SIP source references are
|
|
|
+ relative to this path.
|
|
|
+@tparam boolean cleanup If set to `true`, it removes the SIP if the submission
|
|
|
+ is successful.
|
|
|
+@param string report_path Optional path for the report file.
|
|
|
+
|
|
|
+@treturn bool Whether the process completed successfully.
|
|
|
+--]]
|
|
|
+function Submission:deposit(ll_path, cleanup)
|
|
|
+ local rc, ret
|
|
|
+
|
|
|
+ for i, rsrc in ipairs(self.sip) do
|
|
|
logger:debug(("Processing resource #%d of %d: %s"):format(
|
|
|
- i, #sip, rsrc.id))
|
|
|
+ i, #self.sip, rsrc.id))
|
|
|
|
|
|
- local rc, ret = xpcall(
|
|
|
- process_rsrc,
|
|
|
- debug.traceback,
|
|
|
- rsrc, sip.root_path)
|
|
|
+ local rc, ret = xpcall(process_rsrc, debug.traceback, rsrc, i)
|
|
|
if not rc then
|
|
|
return generate_report(report_path, {
|
|
|
result = "failure",
|
|
@@ -545,33 +601,59 @@ M.deposit = function(ll_path, cleanup, report_path)
|
|
|
end
|
|
|
end
|
|
|
|
|
|
- rc, ret = xpcall(add_sub_meta, debug.traceback)
|
|
|
+ rc, ret = xpcall(add_sub_meta, debug.traceback, self)
|
|
|
if not rc then
|
|
|
return generate_report(report_path, {
|
|
|
result = "failure",
|
|
|
message = "An error occurred while updating submission metadata.",
|
|
|
traceback = ret,
|
|
|
metadata = {
|
|
|
- sub_id = sub_id,
|
|
|
+ sub_id = self.id,
|
|
|
},
|
|
|
})
|
|
|
end
|
|
|
|
|
|
+ for id in pairs(self.ids) do
|
|
|
+ local val_report = validator.validate(self.gr, term.new_iriref_ns(id))
|
|
|
+ if val_report.max_level == "ERROR" then
|
|
|
+ return generate_report(report_path, {
|
|
|
+ result = "failure",
|
|
|
+ message = "A resource did not pass validation.",
|
|
|
+ traceback = "",
|
|
|
+ metadata = {
|
|
|
+ sub_id = self.id,
|
|
|
+ rsrc_id = id,
|
|
|
+ validation = val_report,
|
|
|
+ }
|
|
|
+ })
|
|
|
+ elseif val_report.max_level == "WARN" then logger:warn(
|
|
|
+ "Validation raised warnings: " .. pp.write(val_report))
|
|
|
+ elseif val_report.max_level == "NOTICE" then logger:info(
|
|
|
+ "Validation raised notices: " .. pp.write(val_report)) end
|
|
|
+ -- TODO send report for warnings and notices.
|
|
|
+ end
|
|
|
+
|
|
|
+ -- TODO wrap this in an MDB transaction.
|
|
|
+ for id in pairs(self.ids) do
|
|
|
+ repo.store_updates(self.gr, term.new_iriref_ns(id))
|
|
|
+ logger:info("Stored: ", id)
|
|
|
+ end
|
|
|
+
|
|
|
-- After this point, the outcome is either `success` or `warnings`.
|
|
|
|
|
|
-- Remove processing directory.
|
|
|
- local proc_dir = path.join(sip.root_path, "proc")
|
|
|
+ local proc_dir = path.join(self.root_path, "proc")
|
|
|
if path.isdir(proc_dir) then dir.rmtree(proc_dir) end
|
|
|
|
|
|
local report = {
|
|
|
metadata = {
|
|
|
- sub_id = sub_id,
|
|
|
+ sub_id = self.id,
|
|
|
resources = {},
|
|
|
},
|
|
|
}
|
|
|
|
|
|
if cleanup then
|
|
|
- rc, ret = xpcall(cleanup_src, debug.traceback, sip)
|
|
|
+ rc, ret = xpcall(cleanup_src, debug.traceback, self.sip)
|
|
|
if not rc then
|
|
|
report.result = "warnings"
|
|
|
report.message = "An error occurred while deleting the SIP source."
|
|
@@ -581,7 +663,7 @@ M.deposit = function(ll_path, cleanup, report_path)
|
|
|
report.result = "success"
|
|
|
report.message = "The SIP has been successfully submitted."
|
|
|
|
|
|
- for _, rsrc in ipairs(sip) do
|
|
|
+ for _, rsrc in ipairs(self.sip) do
|
|
|
report.metadata.resources[rsrc.id] = rsrc.source_path
|
|
|
end
|
|
|
|
|
@@ -589,4 +671,14 @@ M.deposit = function(ll_path, cleanup, report_path)
|
|
|
end
|
|
|
|
|
|
|
|
|
-return M
|
|
|
+return {
|
|
|
+ Submission = Submission,
|
|
|
+ idgen = idgen,
|
|
|
+ reset_ores = function()
|
|
|
+ if path.isdir(pkar.config.fs.ores_path) then
|
|
|
+ logger:warn("Removing existing opaque resource store.")
|
|
|
+ dir.rmtree(pkar.config.fs.ores_path)
|
|
|
+ end
|
|
|
+ dir.makepath(pkar.config.fs.ores_path)
|
|
|
+ end,
|
|
|
+}
|