local io = io local csv = require "csv" local dir = require "pl.dir" local lfs = require "lfs" local uuid = require "uuid" local path = require "pl.path" local cksum = require "sha2" local config = require "config.app" -- Random number generator for uuid() local posix_uuid = pcall(function() uuid.set_rng(uuid.rng.urandom()) end) if not posix_uuid then rng = uuid.set_rng(uuid.rng.win_ffi()) end local M = {} -- Submission module -- Adapted from lua-nĂșcleo local function escape_pattern(s) local matches = { ["^"] = "%^"; ["$"] = "%$"; ["("] = "%("; [")"] = "%)"; ["%"] = "%%"; ["."] = "%."; ["["] = "%["; ["]"] = "%]"; ["*"] = "%*"; ["+"] = "%+"; ["-"] = "%-"; ["?"] = "%?"; ["\0"] = "%z"; } return (s:gsub(".", matches)) end M.generate_sip_v1 = function(path) --[[ Version 1 CSV parsing. Each row has 3 cells: reference path (ref), metadata key (k), value (v). Each v cell contains one value. Properties for the same ref can be added in successive rows without repeating the ref cell. Multiple values can be added in successive rows without repeating the ref and v cells. --]] local sub_data = assert(csv.open(path)) local md = {} local prev_ref, prev_k -- Collate metadata. local i = 1 for row in sub_data:lines() do ref, k, v = table.unpack(row) -- nil-out empty cells (they come through as "") if ref == "" then ref = nil end if k == "" then k = nil end if v == "" then v = nil end print("Parsing row:", ref, k, v) -- v can be a legit false value. if ref and not k and v == nil then -- This can be a placeholder for ordering purposes. md[ref] = md_ref or {} goto continue elseif v == nil then goto continue else -- If ref or k are missing, reuse the previous one. if ref then prev_ref = ref else if not prev_ref then -- If column 1 is empty, it must have been set in a -- previous row. error(string.format( "Reference in column 1, row %d not found!", i), 2) end ref = prev_ref end if k then prev_k = k else if not prev_k then -- If column 2 is empty, it must have been set in a -- previous row. error(string.format( "Property key in column 2, row %d not found!", i), 2) end k = prev_k end end md[ref] = md[ref] or {id = uuid(), path = ref, _sort = i} md[ref][k] = md[ref][k] or {} if k == "type" then md[ref][k] = v else table.insert(md[ref][k], v) end ::continue:: i = i + 1 end -- Move md to an ordered list. mdlist = {root_path = path:match("(.*/)")} for _, v in pairs(md) do table.insert(mdlist, v) end table.sort(mdlist, function (a, b) return (a._sort < b._sort) end) -- Infer structure from paths and row ordering. for i, v in ipairs(mdlist) do for j = i + 1, #mdlist do --print(string.format("comparing %s : %s", v.path, mdlist[j].path)) if not v["next"] and mdlist[j].path:match("(.*/)") == v.path:match("(.*/)") then --print("next match.") v["next"] = mdlist[j].path end if not v.firstChild and mdlist[j].path:match("^" .. escape_pattern(v.path)) then --print("First child match.") v.firstChild = mdlist[j].path end end v._sort = nil end return mdlist end M.generate_sip_v2 = function(path) local sub_data = assert(csv.open(path, {header = true})) local sip = {root_path = path:match("(.*/)")} local prev_path local i = 1 for row in sub_data:lines() do print("Processing row: " .. i) print("Row path: " .. row["path"]) if row["path"] ~= "" then prev_path = row["path"] -- New row. sip[i] = {pas_id = uuid()} for k, v in pairs(row) do if v == "" then goto cont1 end -- skip empty strings. if config.md.single_values[k] then sip[i][k] = v else sip[i][k] = {v} end ::cont1:: end i = i + 1 else -- Continuation of values from a previous row. if i == 1 then error("First row MUST have a path value.", 2) elseif not prev_path then error(("No path information at row %d"):format(i), 2) else row.path = prev_path for k, v in pairs(row) do if v == "" then goto cont2 end -- skip empty strings. if config.md.single_values[k] then -- It doesn't make much sense to overwrite, maybe throw an error? sip[i - 1][k] = v else print("Value: " .. v) print("Inserting at row " .. i -1) table.insert(sip[i - 1][k], v) end ::cont2:: end end end end return sip end M.validate = function(sip) -- TODO end M.deposit = function(sip) for i, rsrc in ipairs(sip) do print(("Processing resource #%d of %d: %s"):format(i, #sip, rsrc.id)) in_path = sip.root_path .. rsrc.path -- If it's a directory, skip processing. if not path.isfile(in_path) then goto continue end local tmp_dir = config.fs.ores_path .. "tmp/" local tmp_path = tmp_dir .. rsrc.id dir.makepath(tmp_dir) local ifh = io.open(in_path, "r") local ofh = io.open(tmp_path, "w") b2 = cksum.blake2b() while true do chunk = ifh:read(config.fs.stream_chunk_size) if not chunk then break end b2(chunk) ofh:write(chunk) end rsrc.b2checksum = b2() ofh:close() ifh:close() out_dir = ("%s%s/%s/"):format( config.fs.ores_path, rsrc.b2checksum:sub(1,4), rsrc.b2checksum:sub(5,8)) out_path = out_dir .. rsrc.b2checksum rsrc.path = out_path dir.makepath(out_dir) print(("Moving file %s"):format(rsrc.id)) dir.movefile(tmp_path, rsrc.path) ::continue:: end end return M