|
@@ -52,6 +52,8 @@ local path_to_uri
|
|
|
-- Track IDs in SIP to validate links created in a submission.
|
|
|
local sip_ids
|
|
|
|
|
|
+-- Submission ID and name.
|
|
|
+local sub_id, sub_name
|
|
|
|
|
|
-- Initialize libmagic database.
|
|
|
local magic = libmagic.open(libmagic.MIME_TYPE, libmagic.NO_CHECK_COMPRESS )
|
|
@@ -85,7 +87,8 @@ local function generate_sip(ll_path)
|
|
|
if not path.isfile(ll_path) then error(ll_path .. " is not a file.", 2) end
|
|
|
|
|
|
-- Submission ID sticks to all the resources.
|
|
|
- local sub_id = idgen()
|
|
|
+ sub_id = "sub:" .. idgen()
|
|
|
+ sub_name = ll_path:match("pkar_submission[%-_%.](.*)%.csv")
|
|
|
|
|
|
local sip = {root_path = path.dirname(ll_path)}
|
|
|
path_to_uri = {}
|
|
@@ -94,7 +97,7 @@ local function generate_sip(ll_path)
|
|
|
local tn_dir = path.join(sip.root_path, "proc", "tn")
|
|
|
dir.makepath(tn_dir)
|
|
|
|
|
|
- local prev_path
|
|
|
+ local prev_id
|
|
|
|
|
|
local i = 0
|
|
|
for row_n, row in csv.parseLine(ll_path) do
|
|
@@ -107,20 +110,21 @@ local function generate_sip(ll_path)
|
|
|
-- Skip empty lines.
|
|
|
if not has_content then goto skip end
|
|
|
|
|
|
- logger:debug("Row path: ", row.source_path or "")
|
|
|
logger:debug("Parsing row:", pp.write(row))
|
|
|
- if row.source_path then
|
|
|
+ -- content_type is the only real mandatory entry.
|
|
|
+ if row.content_type then
|
|
|
i = i + 1
|
|
|
+ -- New row.
|
|
|
logger:info(
|
|
|
("Processing LL resource #%d at row #%d.")
|
|
|
:format(i, row_n))
|
|
|
- prev_path = row.source_path
|
|
|
- -- New row.
|
|
|
+
|
|
|
sip[i] = {
|
|
|
-- Normalize provided ID or generate random ID if not provided.
|
|
|
id = "par:" .. (row.id or idgen()),
|
|
|
sub_id = sub_id,
|
|
|
}
|
|
|
+ prev_id = row.id
|
|
|
sip_ids[sip[i].id] = true -- Add to common sip ID set.
|
|
|
for k, v in pairs(row) do
|
|
|
if not v or k == "id" then goto cont1 end -- skip empty strings.
|
|
@@ -135,7 +139,7 @@ local function generate_sip(ll_path)
|
|
|
-- Continuation of values from a previous row.
|
|
|
if i < 1 then
|
|
|
error("First row MUST have a path value.", 2)
|
|
|
- elseif not prev_path then
|
|
|
+ elseif not prev_id then
|
|
|
error(("No path information at row %d"):format(i), 2)
|
|
|
else
|
|
|
for k, v in pairs(row) do
|
|
@@ -151,7 +155,7 @@ local function generate_sip(ll_path)
|
|
|
end
|
|
|
::cont2::
|
|
|
end
|
|
|
- row.source_path = prev_path
|
|
|
+ row.id = prev_id
|
|
|
end
|
|
|
end
|
|
|
::skip::
|
|
@@ -237,6 +241,8 @@ local function rsrc_to_graph(rsrc)
|
|
|
for i, vv in ipairs(v) do
|
|
|
if prop == "content_type" then
|
|
|
o = term.new_iriref_ns(rmod.uri)
|
|
|
+ elseif prop == "sub_id" then
|
|
|
+ o = term.new_iriref_ns(vv)
|
|
|
elseif pconf.type == "resource" then
|
|
|
-- "par:" could have been added previously.
|
|
|
local rel_id = "par:" .. vv:gsub("^par:", "")
|
|
@@ -249,19 +255,21 @@ local function rsrc_to_graph(rsrc)
|
|
|
))
|
|
|
then
|
|
|
-- Convert local path to URIs.
|
|
|
- v[i] = path_to_uri[vv]
|
|
|
- if not v[i] then error(
|
|
|
+ local uri = path_to_uri[vv]
|
|
|
+ if not uri then error(
|
|
|
("Not a valid path: %s for property: %s on res: %s")
|
|
|
:format(vv, prop, rsrc.id))
|
|
|
end
|
|
|
- logger:debug("Converted path ".. vv .. " to URI: " .. v[i])
|
|
|
+ v[i] = uri
|
|
|
+ logger:debug("Converted path ".. vv .. " to URI: " .. uri)
|
|
|
else v[i] = rel_id
|
|
|
end
|
|
|
--if not v[i]:find("^par:") then dbg() end
|
|
|
o = term.new_iriref_ns(v[i])
|
|
|
elseif pconf.type == "ext_resource" then
|
|
|
o = term.new_iriref(vv)
|
|
|
- else o = term.new_lit(vv, rdf_type) end
|
|
|
+ else o = term.new_lit(vv, rdf_type)
|
|
|
+ end
|
|
|
it:add_iter(triple.new(s, p, o))
|
|
|
end
|
|
|
|
|
@@ -311,6 +319,8 @@ local function rsrc_to_graph(rsrc)
|
|
|
end
|
|
|
::skip::
|
|
|
end
|
|
|
+
|
|
|
+ -- Add resource lineage triples.
|
|
|
for i, m in ipairs(rmod.lineage) do
|
|
|
it:add_iter(triple.new(
|
|
|
s, pkar.RDF_TYPE,
|
|
@@ -337,6 +347,7 @@ local M = {
|
|
|
|
|
|
M.deposit = function(ll_path, cleanup)
|
|
|
local sip = generate_sip(ll_path)
|
|
|
+ local tstamp
|
|
|
|
|
|
for i, rsrc in ipairs(sip) do
|
|
|
-- TODO Wrap this chunk into a txn. Each row is atomic.
|
|
@@ -373,7 +384,7 @@ M.deposit = function(ll_path, cleanup)
|
|
|
fsize = fsize + #chunk
|
|
|
end
|
|
|
local checksum = hash_it:final(true)
|
|
|
- rsrc.checksum = {"urn:blake2:" .. checksum}
|
|
|
+ rsrc.checksum = {"blake2:" .. checksum}
|
|
|
rsrc.size = fsize
|
|
|
|
|
|
ofh:close()
|
|
@@ -423,6 +434,34 @@ M.deposit = function(ll_path, cleanup)
|
|
|
logger:info("Stored: ", s.data)
|
|
|
end
|
|
|
|
|
|
+ -- Add triples for submission metadata directly to the stored graph.
|
|
|
+ local it = repo.gr:add_init()
|
|
|
+ local sub_uri = term.new_iriref_ns(sub_id)
|
|
|
+ it:add_iter(triple.new(
|
|
|
+ sub_uri,
|
|
|
+ pkar.RDF_TYPE,
|
|
|
+ term.new_iriref_ns("par:Submission")
|
|
|
+ ))
|
|
|
+ if sub_name then
|
|
|
+ it:add_iter(triple.new(
|
|
|
+ sub_uri,
|
|
|
+ term.new_iriref_ns("rdfs:label"),
|
|
|
+ term.new_lit(sub_name)
|
|
|
+ ))
|
|
|
+ end
|
|
|
+ tstamp = os.date("!%Y-%m-%dT%TZ")
|
|
|
+ it:add_iter(triple.new(
|
|
|
+ sub_uri,
|
|
|
+ model.id_to_uri.submitted,
|
|
|
+ term.new_lit(tstamp, "xsd:dateTime", nil, true)
|
|
|
+ ))
|
|
|
+ it:add_iter(triple.new(
|
|
|
+ sub_uri,
|
|
|
+ model.id_to_uri.last_modified,
|
|
|
+ term.new_lit(tstamp, "xsd:dateTime", nil, true)
|
|
|
+ ))
|
|
|
+ it:add_done()
|
|
|
+
|
|
|
-- Remove processing directory.
|
|
|
local proc_dir = path.join(sip.root_path, "proc")
|
|
|
if path.isdir(proc_dir) then dir.rmtree(proc_dir) end
|