|
@@ -12,17 +12,20 @@ The deposit process is carried out in several steps:
|
|
|
- File staging (`deposit()`): scan through the generated SIP, identifies the
|
|
|
files, calculates their checksums, and moves them to temporary storage; adds
|
|
|
checksums to the metadata. TODO allow user-provided metadata and validation
|
|
|
-- graph generation: generate an RDF graph for each resource in the SIP.
|
|
|
-- permanent storage: push the RDF graph to permanent store (via functions in
|
|
|
+- Graph generation: generate an RDF graph for each resource in the SIP.
|
|
|
+- Permanent storage: push the RDF graph to permanent store (via functions in
|
|
|
the `repo` module), which includes content model validation; if this
|
|
|
succeeds, related files are also moved from the staging area to the archival
|
|
|
store.
|
|
|
+- Cleanup (optional): if requested, the laundry list and resource folder are
|
|
|
+ deleted from their original location.
|
|
|
|
|
|
--]]
|
|
|
local io = io
|
|
|
|
|
|
local csv = require "ftcsv"
|
|
|
local dir = require "pl.dir"
|
|
|
+local file = require "pl.file"
|
|
|
local libmagic = require "libmagic"
|
|
|
local path = require "pl.path"
|
|
|
local pp = require "pl.pretty"
|
|
@@ -46,8 +49,6 @@ local NT = {}
|
|
|
-- Local path to URI mapping. For linking between newly created resources.
|
|
|
local path_to_uri
|
|
|
|
|
|
-local M = {} -- Submission module
|
|
|
-
|
|
|
|
|
|
-- Initialize libmagic database.
|
|
|
local magic = libmagic.open(libmagic.MIME_TYPE, libmagic.NO_CHECK_COMPRESS )
|
|
@@ -67,7 +68,7 @@ Generate a random, reader-friendly ID.
|
|
|
A 16-character ID with the above defined #chpool of 60 smybols has an entropy
|
|
|
of 94.5 bits, which should be plenty for a medium-sized repository.
|
|
|
]]
|
|
|
-M.idgen = function(len)
|
|
|
+local function idgen(len)
|
|
|
local charlist = {}
|
|
|
for i = 1, (len or pkar.config.id.len) do
|
|
|
table.insert(charlist, string.char(chpool[math.random(1, #chpool)]))
|
|
@@ -77,9 +78,8 @@ M.idgen = function(len)
|
|
|
end
|
|
|
|
|
|
|
|
|
-M.generate_sip = function(src_path)
|
|
|
- local sip = {root_path = src_path:match("(.*/)")}
|
|
|
- local src_dir = path.dirname(src_path)
|
|
|
+local function generate_sip(ll_path)
|
|
|
+ local sip = {root_path = path.dirname(ll_path)}
|
|
|
path_to_uri = {}
|
|
|
|
|
|
local tn_dir = path.join(sip.root_path, "proc", "tn")
|
|
@@ -88,7 +88,7 @@ M.generate_sip = function(src_path)
|
|
|
local prev_path
|
|
|
|
|
|
local i = 0
|
|
|
- for row_n, row in csv.parseLine(src_path) do
|
|
|
+ for row_n, row in csv.parseLine(ll_path) do
|
|
|
local has_content
|
|
|
for k, v in pairs(row) do
|
|
|
-- Change "" to nil.
|
|
@@ -107,7 +107,7 @@ M.generate_sip = function(src_path)
|
|
|
:format(i, row_n))
|
|
|
prev_path = row.source_path
|
|
|
-- New row.
|
|
|
- sip[i] = {id = "par:" .. M.idgen()}
|
|
|
+ sip[i] = {id = "par:" .. idgen()}
|
|
|
-- Add to path to URI map for later referencing.
|
|
|
path_to_uri[row.source_path] = sip[i].id
|
|
|
for k, v in pairs(row) do
|
|
@@ -148,12 +148,12 @@ M.generate_sip = function(src_path)
|
|
|
for i, v in ipairs(sip) do
|
|
|
local rmod = model.types[v.content_type]
|
|
|
dbg.assert(v.source_path)
|
|
|
- local fpath = path.join(src_dir, v.source_path)
|
|
|
+ local fpath = path.join(sip.root_path, v.source_path)
|
|
|
--dbg.assert(rmod)
|
|
|
v.has_member = v.has_member or {}
|
|
|
-- Create implicit members from single-file artifact.
|
|
|
if rmod.types.artifact and path.isfile(fpath) then
|
|
|
- local file_id = "par:" .. M.idgen()
|
|
|
+ local file_id = "par:" .. idgen()
|
|
|
-- Insert file resource and move it into a new sub-folder.
|
|
|
table.insert(sip, {
|
|
|
content_type = rmod.default_fmodel or "file",
|
|
@@ -188,7 +188,7 @@ end
|
|
|
--[[ Convert a SIP resource table to an in-memory Volksdata graph.
|
|
|
|
|
|
--]]
|
|
|
-M.rsrc_to_graph = function(rsrc)
|
|
|
+local function rsrc_to_graph(rsrc)
|
|
|
local rmod = model.types[rsrc.content_type]
|
|
|
logger:info("Updating resource md: ", pp.write(rsrc))
|
|
|
|
|
@@ -237,7 +237,7 @@ M.rsrc_to_graph = function(rsrc)
|
|
|
local proxy_s
|
|
|
for i, vv in ipairs(v) do
|
|
|
-- Add linked list proxies.
|
|
|
- local brick_uri = term.new_iriref_ns("par:" .. M.idgen())
|
|
|
+ local brick_uri = term.new_iriref_ns("par:" .. idgen())
|
|
|
if i == 1 then
|
|
|
proxy_s = s
|
|
|
it:add_iter(triple.new(
|
|
@@ -287,7 +287,13 @@ M.rsrc_to_graph = function(rsrc)
|
|
|
end
|
|
|
|
|
|
|
|
|
-M.deposit = function(sip)
|
|
|
+-- Submission module.
|
|
|
+local M = {}
|
|
|
+
|
|
|
+
|
|
|
+M.deposit = function(ll_path, cleanup)
|
|
|
+ local sip = generate_sip(ll_path)
|
|
|
+
|
|
|
for i, rsrc in ipairs(sip) do
|
|
|
-- TODO Wrap this chunk into a txn. Each row is atomic.
|
|
|
logger:debug(("Processing resource #%d of %d: %s"):format(
|
|
@@ -296,7 +302,7 @@ M.deposit = function(sip)
|
|
|
local in_path, fext
|
|
|
if not rsrc.source_path then goto continue end
|
|
|
|
|
|
- in_path = sip.root_path .. rsrc.source_path
|
|
|
+ in_path = path.join(sip.root_path, rsrc.source_path)
|
|
|
fext = path.extension(in_path)
|
|
|
-- If it's a directory, skip file processing.
|
|
|
if not path.isfile(in_path) then goto continue end
|
|
@@ -359,7 +365,7 @@ M.deposit = function(sip)
|
|
|
rsrc.last_modified = tstamp
|
|
|
|
|
|
local tmp_gr, s
|
|
|
- tmp_gr, s = M.rsrc_to_graph(rsrc)
|
|
|
+ tmp_gr, s = rsrc_to_graph(rsrc)
|
|
|
|
|
|
local val_report = validator.validate(tmp_gr, s)
|
|
|
if val_report.max_level == "ERROR" then error(
|
|
@@ -374,6 +380,22 @@ M.deposit = function(sip)
|
|
|
|
|
|
-- Remove processing directory.
|
|
|
dir.rmtree(path.join(sip.root_path, "proc"))
|
|
|
+
|
|
|
+ if cleanup then
|
|
|
+ -- Gather all top-level directories and delete them.
|
|
|
+ rsrc_paths = {}
|
|
|
+ for i, rsrc in ipairs(sip) do
|
|
|
+ rsrc_paths[rsrc.source_path:match("[^/]+")] = true
|
|
|
+ end
|
|
|
+ for tlf in pairs(rsrc_paths) do
|
|
|
+ local target = path.join(sip.root_path, tlf)
|
|
|
+ logger:info("Cleaning up: " .. target)
|
|
|
+ if path.isdir(target) then dir.rmtree(target)
|
|
|
+ elseif path.isfile(target) then file.delete(target) end
|
|
|
+ end
|
|
|
+ logger:info("Cleaning up: " .. ll_path)
|
|
|
+ file.delete(ll_path)
|
|
|
+ end
|
|
|
end
|
|
|
|
|
|
|