Prechádzať zdrojové kódy

Merge branch 'submission_oo'

scossu 1 deň pred
rodič
commit
c8ef113f9b
6 zmenil súbory, kde vykonal 350 pridanie a 235 odobranie
  1. 14 9
      doc/user_guide/docs/roadmap.md
  2. 2 0
      src/core.lua
  3. 313 221
      src/submission.lua
  4. 5 3
      src/util/pkar.lua
  5. 6 2
      src/util/watcher.lua
  6. 10 0
      test/submission.lua

+ 14 - 9
doc/user_guide/docs/roadmap.md

@@ -54,12 +54,19 @@ usage and opportunities for expanding adoption in relevant areas.
 
 - ✖︎ Management UI & API
     - ✖︎ Deposit via single tar or zip file submission
--  submission
+-  submission
     - ✓ Watch local folder and trigger submission
         - ✓ Option to regenerate site after submission
         - ✓ Option to clean up sources & LL on success
     - ✓ Submission report
     - ✓ Deleting resources
+    - ✓ Allow updating a file's metadata if it's only present in the archive
+    - ❏ Prevent modification of system properties
+    - ❏ Protect some properties when updating resource
+    - ❏ Define list of system-managed properties (user-provided data ignored)
+    - ❏ Protect some properties when updating resource
+        - Do not update: `content_model`, `submitted`, `submitted_by`
+        - Do not delete existing: `sub_id`
 - ✓ Proper collection handling
     - ✓ Dedicated template
     - ✓ Link to markdown doc for presentation page
@@ -76,10 +83,10 @@ usage and opportunities for expanding adoption in relevant areas.
     - ⚒ Content model setup manual (sysadmin)
     - ✓ Glossary
     - ❏ Site generation guide
-    - ⚒ Migrate doc platform (Mkdocs?) & publish separately
-    - ⚒ API documentation (ldoc)
+    - ✓ Migrate doc platform to Mkdocs & publish separately
+    - ⚒ API documentation (ldoc + separate site)
 - ⚒ Testing
-    -  Unit tests (Busted)
+    -  Unit tests (Busted)
     - ⚒ Roundtrip submission, download LL, update, resubmission
     - ⚒ >100 resource data set
 - ⚒ Presentation
@@ -98,16 +105,14 @@ usage and opportunities for expanding adoption in relevant areas.
 
 ## ❏ Post-release wishlist
 
-(will be turned into separate release plans)
+(will be grouped into separate release plans)
 
+- Provided checksum verification
 - Multilingual support
-- ✖︎ Deposit via remote hot folder
-    - FTP [Addressed by separate FTP server]
-    - S3 [Not a good choice - See remote deposit guide]
 - Schema definition validator
 - Incremental build
 - Rebuild only site assets
 - Custom templating
-- Auto relatioships inference
+- Auto relationships inference
 - Markdown support for property values
 

+ 2 - 0
src/core.lua

@@ -7,6 +7,8 @@ local term = require "volksdata.term"
 local nsm = require "volksdata.namespace"
 
 
+local VERSION = "1.0a2"
+
 -- Read only module properties.
 local PROTECTED = {
     store = true,

+ 313 - 221
src/submission.lua

@@ -5,7 +5,7 @@ end storage, which is called here but defined in the repo module).
 
 The deposit process is carried out in several steps:
 
-- SIP generation (`generate_sip()`): scans the laundry list CSV and builds a
+- SIP generation (`parse_ll()`): scans the laundry list CSV and builds a
   temporary data structure with the found metadata; generates unique IDs for
   resources; infers some implicit relationships from the position of the CSV
   rows and folder layout; adds system-controlled metadata.
@@ -46,15 +46,6 @@ local logger = pkar.logger
 -- "nil" table - for missing key fallback in chaining.
 local NT = {}
 
--- Local path to URI mapping. For linking between newly created resources.
-local path_to_uri
-
--- Track IDs in SIP to validate links created in a submission.
-local sip_ids
-
--- Submission ID and name.
-local sub_id, sub_name
-
 -- Initialize libmagic database.
 local magic = libmagic.open(libmagic.MIME_TYPE, libmagic.NO_CHECK_COMPRESS )
 assert(magic:load())
@@ -73,7 +64,7 @@ Generate a random, reader-friendly ID.
 A 16-character ID with the above defined #chpool of 60 smybols has an entropy
 of 94.5 bits, which should be plenty for a medium-sized repository.
 ]]
-local function idgen(len)
+function idgen(len)
     local charlist = {}
     for i = 1, (len or pkar.config.id.len) do
         table.insert(charlist, string.char(chpool[math.random(1, #chpool)]))
@@ -83,24 +74,21 @@ local function idgen(len)
 end
 
 
-local function generate_sip(ll_path)
-    if not path.isfile(ll_path) then error(ll_path .. " is not a file.", 2) end
-
-    -- Submission ID sticks to all the resources.
-    sub_id = "sub:" .. idgen()
-    sub_name = ll_path:match("pkar_submission[%-_%.](.*)%.csv")
+--[[--
+Parse laundry list and generate the basic SIP.
 
-    local sip = {root_path = path.dirname(ll_path)}
-    path_to_uri = {}
-    sip_ids = {}
+@tparam Submission sub Submission object to populate.
 
-    local tn_dir = path.join(sip.root_path, "proc", "tn")
+@return true on success; false, error report on failure.
+]]
+local function parse_ll(sub)
+    local tn_dir = path.join(sub.root_path, "proc", "tn")
     dir.makepath(tn_dir)
 
     local prev_id
 
     local i = 0
-    for row_n, row in csv.parseLine(ll_path) do
+    for row_n, row in csv.parseLine(sub.ll_path) do
         local has_content
         for k, v in pairs(row) do
             -- Change "" to nil.
@@ -119,22 +107,23 @@ local function generate_sip(ll_path)
                     ("Processing LL resource #%d at row #%d.")
                     :format(i, row_n))
 
-            sip[i] = {
+            sub.sip[i] = {
                 -- Normalize provided ID or generate random ID if not provided.
                 id = "par:" .. (row.id or idgen()),
-                sub_id = sub_id,
+                sub_id = sub.id,
+                sub = sub,
             }
             prev_id = row.id
-            sip_ids[sip[i].id] = true  -- Add to common sip ID set.
+            sub.ids[sub.sip[i].id] = true  -- Add to common SIP ID set.
             for k, v in pairs(row) do
                 if not v or k == "id" then goto cont1 end  -- skip empty strings.
-                if pkar.config.md.single_values[k] then sip[i][k] = v
+                if pkar.config.md.single_values[k] then sub.sip[i][k] = v
                 -- Multi-values are ordered in the SIP for further processing.
-                else sip[i][k] = {v} end
+                else sub.sip[i][k] = {v} end
                 ::cont1::
             end
             -- Add to path to URI map for later referencing.
-            path_to_uri[row.source_path] = sip[i].id
+            sub.path_to_uri[row.source_path] = sub.sip[i].id
         else
             -- Continuation of values from a previous row.
             if i < 1 then
@@ -151,7 +140,7 @@ local function generate_sip(ll_path)
                     else
                         logger:debug("Value: ", v)
                         logger:debug("Inserting at row ", i - 1)
-                        table.insert(sip[i][k], v)
+                        table.insert(sub.sip[i][k], v)
                     end
                     ::cont2::
                 end
@@ -161,63 +150,90 @@ local function generate_sip(ll_path)
         ::skip::
         row_n = row_n + 1
     end
-    -- Infer structure from paths and row ordering.
-    for i, v in ipairs(sip) do
-        local rmod = model.types[v.content_type]
-        --require "debugger".assert(rmod)
-        local fpath = path.join(sip.root_path, v.source_path)
-        --dbg.assert(rmod)
-        v.has_member = v.has_member or {}
-        -- Create implicit members from single-file artifact.
-        if rmod.types.artifact and path.isfile(fpath) then
-            local file_id = "par:" .. idgen()
-            sip_ids[file_id] = true
-            -- Insert file resource and move it into a new sub-folder.
-            table.insert(sip, {
-                content_type = rmod.default_fmodel or "file",
-                id = file_id,
-                sub_id = sub_id,
-                label = path.basename(v.source_path),
-                source_path = v.source_path,
-            })
-            sip[i].has_file = file_id
-            sip[i].pref_rep = file_id
-            sip[i].source_path = nil
-            goto skip
-        end
-        for j = i + 1, #sip do
-            if sip[j].source_path:match("^" .. pkar.escape_ptn(v.source_path))
-            then
-                local rel_path = sip[j].source_path:sub(#v.source_path + 2)
-                logger:debug("rel_path: " .. rel_path)
-                if not rel_path:match("/") then
-                    logger:debug(("Adding member %s to %s"):format(
-                            rel_path, v.source_path))
-                    table.insert(v.has_member, sip[j].id)
-                end
-            end
-        end
-        ::skip::
-    end
-    logger:debug("Parsed SIP: ", pp.write(sip))
 
-    return sip
+    return true
 end
 
 
---[[  Convert a SIP resource table to an in-memory Volksdata graph.
+--[[--
+Process a file resource.
 
---]]
+During this step, the file is moved to a staging area, its checksum is
+calculated, and some more technical metadata are extracted and added to the
+file's D-Res.
+
+@tparam table rsrc Resource table to be updated.
+]]
+local function process_file(rsrc)
+    local src_path = path.join(rsrc.sub.root_path, rsrc.source_path)
+    local tmp_dir = path.join(pkar.config.fs.ores_path, "tmp/")
+    local fext
+    _, fext = path.splitext(src_path)
+    local tmp_path = tmp_dir .. rsrc.id .. fext
+    dir.makepath(tmp_dir)
+
+    local ifh = io.open(src_path, "r")
+    if not ifh then
+    end
+
+    rsrc.format = {magic:filehandle(ifh)}
+    local hash_it = mc.new_blake2b()
+    local fsize = 0
+    logger:debug("Hashing ", src_path)
+    local ofh = assert(io.open(tmp_path, "w"))
+    while true do
+        chunk = ifh:read(pkar.config.fs.stream_chunk_size)
+        if not chunk then break end
+        hash_it:update(chunk)
+        ofh:write(chunk)
+        fsize = fsize + #chunk
+    end
+    local checksum = hash_it:final(true)
+    rsrc.checksum = {"blake2:" .. checksum}
+    rsrc.size = fsize
+
+    ofh:close()
+    ifh:close()
+
+    -- Copy file and calculate checksum.
+    local out_dir, out_path
+    out_dir = path.join(
+            pkar.config.fs.ores_path,
+            checksum:sub(1, 2),
+            checksum:sub(3, 4))
+    out_path = path.join(out_dir, checksum:sub(1,32) .. fext)
+    dir.makepath(out_dir)
+    logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
+    dir.movefile(tmp_path, out_path)
+    rsrc.archive_path = out_path
+
+    -- Copy thumbnail if existing.
+    if rsrc.thumbnail then
+        src_path = rsrc.thumbnail
+        out_path = path.join(
+                out_dir, path.basename(src_path))
+        logger:debug(("Moving file %s to %s"):format(src_path, out_path))
+        dir.movefile(src_path, out_path)
+        rsrc.thumbnail = out_path
+    end
+
+    return true
+end
+
+
+--[[--
+Convert a SIP resource table to an in-memory Volksdata graph.
+]]
 local function rsrc_to_graph(rsrc)
     local rmod = model.types[rsrc.content_type]
-    logger:debug("Updating resource md: ", pp.write(rsrc))
+    --logger:debug("Updating resource md: ", pp.write(rsrc))
 
     local s = term.new_iriref_ns(rsrc.id)
-    local gr = graph.new(nil)
+    local skip_props = {id = true, sub = true}
 
-    it = gr:add_init()
+    it = rsrc.sub.gr:add_init()
     for prop, v in pairs(rsrc) do
-        if prop == "id" then goto skip end
+        if skip_props[prop] then goto skip end
         logger:debug(("Adding attribute: %s = %s"):format(prop, pp.write(v)))
         local p = model.id_to_uri[prop]
         if not p then
@@ -247,7 +263,7 @@ local function rsrc_to_graph(rsrc)
                 -- "par:" could have been added previously.
                 local rel_id = "par:" .. vv:gsub("^par:", "")
                 if
-                    not sip_ids[rel_id]
+                    not rsrc.sub.ids[rel_id]
                     and not repo.gr:contains(triple.new(
                         term.new_iriref_ns(rel_id),
                         pkar.RDF_TYPE,
@@ -255,7 +271,7 @@ local function rsrc_to_graph(rsrc)
                 ))
                 then
                     -- Convert local path to URIs.
-                    local uri = path_to_uri[vv]
+                    local uri = rsrc.sub.path_to_uri[vv]
                     if not uri then error(
                         ("Not a valid path: %s for property: %s on res: %s")
                         :format(vv, prop, rsrc.id))
@@ -280,7 +296,7 @@ local function rsrc_to_graph(rsrc)
                 -- Add linked list proxies.
                 local brick_id = "par:" .. idgen()
                 local brick_uri = term.new_iriref_ns(brick_id)
-                sip_ids[brick_id] = true
+                rsrc.sub.ids[brick_id] = true
                 if i == 1 then
                     proxy_s = s
                     it:add_iter(triple.new(
@@ -292,7 +308,8 @@ local function rsrc_to_graph(rsrc)
                         term.new_iriref_ns(pconf.uri),
                         term.new_iriref_ns(vv)))
                 else
-                    it:add_iter(triple.new(proxy_s, model.id_to_uri.next, brick_uri))
+                    it:add_iter(triple.new(
+                        proxy_s, model.id_to_uri.next, brick_uri))
                 end
                 -- Add the reference.
                 -- Add basic triples.
@@ -307,6 +324,22 @@ local function rsrc_to_graph(rsrc)
                     brick_uri,
                     model.id_to_uri.content_type,
                     term.new_iriref_ns("pas:Brick")))
+                it:add_iter(triple.new(
+                    brick_uri,
+                    model.id_to_uri.sub_id,
+                    rsrc.sub.uri))
+
+                local tstamp = os.date("!%Y-%m-%dT%TZ")
+                it:add_iter(triple.new(
+                    brick_uri,
+                    model.id_to_uri.submitted,
+                    term.new_lit(tstamp, "xsd:dateTime", nil, true)
+                ))
+                it:add_iter(triple.new(
+                    brick_uri,
+                    model.id_to_uri.last_modified,
+                    term.new_lit(tstamp, "xsd:dateTime", nil, true)
+                ))
                 -- Add reference.
                 it:add_iter(triple.new(
                     brick_uri,
@@ -320,6 +353,9 @@ local function rsrc_to_graph(rsrc)
         ::skip::
     end
 
+    -- Add submission ID.
+    it:add_iter(triple.new(s, model.id_to_uri.sub_id, rsrc.sub.uri))
+
     -- Add resource lineage triples.
     for i, m in ipairs(rmod.lineage) do
         it:add_iter(triple.new(
@@ -328,150 +364,123 @@ local function rsrc_to_graph(rsrc)
     end
     it:add_done()
 
-    return gr, s
+    return s
 end
 
 
--- Submission module.
-local M = {
-    idgen = idgen,
-    reset_ores = function()
-        if path.isdir(pkar.config.fs.ores_path) then
-            logger:warn("Removing existing opaque resource store.")
-            dir.rmtree(pkar.config.fs.ores_path)
-        end
-        dir.makepath(pkar.config.fs.ores_path)
-    end,
-}
-
-
 --[[--
 Process SIP files and metadata.
-During this step, files are moved to a staging area, their checksums are
-calculated, and some more technical metadata are extracted and added to the
-D-Res.
 
-@tparam table rsrc Resource from the SIP parsed by #{generate_sip}.
+@tparam table rsrc Resource from the SIP parsed by #{parse_ll}.
+@tparam integer i Position in the SIP. Used to look ahead for implicit members
+    by path.
 --]]
-local function process_rsrc(rsrc, root_path)
-    local in_path, fext
-    if not rsrc.source_path then goto continue end
-
-    in_path = path.join(root_path, rsrc.source_path)
-    fext = path.extension(in_path)
-    -- If it's a directory, skip file processing.
-    if not path.isfile(in_path) then goto continue end
-
-    do
-        local tmp_dir = path.join(pkar.config.fs.ores_path, "tmp/")
-        local file_ext
-        _, file_ext = path.splitext(in_path)
-        local tmp_path = tmp_dir .. rsrc.id .. file_ext
-        dir.makepath(tmp_dir)
-
-        local ifh = assert(io.open(in_path, "r"))
-
-        rsrc.format = {magic:filehandle(ifh)}
-        local hash_it = mc.new_blake2b()
-        local fsize = 0
-        logger:debug("Hashing ", in_path)
-        local ofh = assert(io.open(tmp_path, "w"))
-        while true do
-            chunk = ifh:read(pkar.config.fs.stream_chunk_size)
-            if not chunk then break end
-            hash_it:update(chunk)
-            ofh:write(chunk)
-            fsize = fsize + #chunk
-        end
-        local checksum = hash_it:final(true)
-        rsrc.checksum = {"blake2:" .. checksum}
-        rsrc.size = fsize
-
-        ofh:close()
-        ifh:close()
-
-        -- Copy file and calculate checksum.
-        local out_dir, out_path
-        out_dir = path.join(
-                pkar.config.fs.ores_path,
-                checksum:sub(1, 2),
-                checksum:sub(3, 4))
-        out_path = path.join(out_dir, checksum:sub(1,32) .. fext)
-        dir.makepath(out_dir)
-        logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
-        dir.movefile(tmp_path, out_path)
-        rsrc.archive_path = out_path
-
-        -- Copy thumbnail if existing.
-        if rsrc.thumbnail then
-            src_path = rsrc.thumbnail
-            out_path = path.join(
-                    out_dir, path.basename(src_path))
-            logger:debug(("Moving file %s to %s"):format(src_path, out_path))
-            dir.movefile(src_path, out_path)
-            rsrc.thumbnail = out_path
+local function process_rsrc(rsrc, i)
+    local rmod = model.types[rsrc.content_type]
+    --require "debugger".assert(rmod)
+
+    -- BEGIN file check and processing.
+    if rmod.types.file then
+        if rsrc.source_path then
+            local in_path = path.join(rsrc.sub.root_path, rsrc.source_path)
+            if path.isfile(in_path) then process_file(rsrc, in_path)
+            else error(
+                "File type provided for " .. rsrc.id ..
+                "but the source path " .. in_path .. " is not a file.")
+            end
+        else
+            -- If it's a file and no path is provided, look for it in the repo.
+            -- This will obviously fail if no ID was provided either, and
+            -- a random one was just generated.
+            if repo.gr:contains(triple.new(
+                term.new_iriref_ns("par:" .. rsrc.id),
+                pkar.RDF_TYPE,
+                model.id_to_uri("file")
+            )) then
+                -- File is in the repo. This is a metadata-only update.
+                logger:info("Metadata-only update on file: " .. rsrc.id)
+            else
+                -- No file found. That's an error.
+                error(
+                    "No source path was provided and no file with this ID \z
+                    was found in the archive: " .. rsrc.id)
+            end
         end
     end
+    -- END file processing.
 
-    ::continue::
+    ::skip_file_proc::
 
     local tstamp = os.date("!%Y-%m-%dT%TZ")
     rsrc.submitted = tstamp
     rsrc.last_modified = tstamp
 
-    local tmp_gr, s
-    tmp_gr, s = rsrc_to_graph(rsrc)
-
-    local val_report = validator.validate(tmp_gr, s)
-    if val_report.max_level == "ERROR" then error(
-        "Validation raised errors: " .. pp.write(val_report))
-    elseif val_report.max_level == "WARN" then logger:warn(
-        "Validation raised warnings: " .. pp.write(val_report))
-    elseif val_report.max_level == "NOTICE" then logger:warn(
-        "Validation raised notices: " .. pp.write(val_report)) end
-
-    repo.store_updates(tmp_gr, s)
-    logger:info("Stored: ", s.data)
-end
-
-
-local function generate_report(rpath, report)
-    report.timestamp = os.date("!%Y-%m-%dT%TZ")
+    -- Infer structure from paths and row ordering.
+    --require "debugger".assert(rmod)
+    local fpath = path.join(rsrc.sub.root_path, rsrc.source_path)
+    rsrc.has_member = rsrc.has_member or {}
+    -- Create implicit members from single-file artifact.
+    if rmod.types.artifact and path.isfile(fpath) then
+        local file_id = "par:" .. idgen()
+        rsrc.sub.ids[file_id] = true
+        -- Insert file resource. It will be processed as part of the sip table.
+        table.insert(rsrc.sub.sip, {
+            content_type = rmod.default_fmodel or "file",
+            id = file_id,
+            sub_id = rsrc.sub.id,
+            label = path.basename(rsrc.source_path),
+            source_path = rsrc.source_path,
+        })
+        rsrc.has_file = file_id
+        rsrc.pref_rep = file_id
+        rsrc.source_path = nil
+        goto skip
+    end
+    ::skip::
+
+    -- Look ahead for resources under this container and add membership.
+    for j = i + 1, #rsrc.sub.sip do
+        if rsrc.sub.sip[j].source_path:match(
+            "^" .. pkar.escape_ptn(rsrc.source_path))
+        then
+            local rel_path = rsrc.sub.sip[j].source_path:sub(
+                #rsrc.source_path + 2)
+            logger:debug("rel_path: " .. rel_path)
+            if not rel_path:match("/") then
+                logger:debug(("Adding member %s to %s"):format(
+                        rel_path, rsrc.source_path))
+                table.insert(rsrc.has_member, rsrc.sub.sip[j].id)
+            end
+        end
+    end
 
-    if rpath then
-        local fh = io.open(rpath, "w")
-        fh:write(json.encode(report))
-        fh:close()
-        logger:info("Report written to " .. rpath)
-        return true
-    else return report end
+    return rsrc_to_graph(rsrc)
 end
 
 
-local function add_sub_meta()
+local function add_sub_meta(sub)
     -- Add triples for submission metadata directly to the stored graph.
     local it = repo.gr:add_init()
-    local sub_uri = term.new_iriref_ns(sub_id)
     it:add_iter(triple.new(
-        sub_uri,
+        sub.uri,
         pkar.RDF_TYPE,
         term.new_iriref_ns("par:Submission")
     ))
-    if sub_name then
+    if sub.name then
         it:add_iter(triple.new(
-            sub_uri,
+            sub.uri,
             term.new_iriref_ns("rdfs:label"),
-            term.new_lit(sub_name)
+            term.new_lit(sub.name)
         ))
     end
     local tstamp = os.date("!%Y-%m-%dT%TZ")
     it:add_iter(triple.new(
-        sub_uri,
+        sub.uri,
         model.id_to_uri.submitted,
         term.new_lit(tstamp, "xsd:dateTime", nil, true)
     ))
     it:add_iter(triple.new(
-        sub_uri,
+        sub.uri,
         model.id_to_uri.last_modified,
         term.new_lit(tstamp, "xsd:dateTime", nil, true)
     ))
@@ -479,58 +488,105 @@ local function add_sub_meta()
 end
 
 
-local function cleanup_src(sip)
+local function generate_report(rpath, report)
+    report.timestamp = os.date("!%Y-%m-%dT%TZ")
+
+    if rpath then
+        local fh = io.open(rpath, "w")
+        fh:write(json.encode(report))
+        fh:close()
+        logger:info("Report written to " .. rpath)
+        return true
+    else return report end
+end
+
+
+local function cleanup_src(sub)
     -- Gather all top-level directories and delete them.
     rsrc_paths = {}
-    for i, rsrc in ipairs(sip) do
+    for i, rsrc in ipairs(sub.sip) do
         rsrc_paths[rsrc.source_path:match("[^/]+")] = true
     end
     for tlf in pairs(rsrc_paths) do
-        local target = path.join(sip.root_path, tlf)
+        local target = path.join(sub.root_path, tlf)
         logger:info("Cleaning up: " .. target)
         if path.isdir(target) then dir.rmtree(target)
         elseif path.isfile(target) then file.delete(target) end
     end
-    logger:info("Cleaning up: " .. ll_path)
+    logger:info("Cleaning up: " .. sub.ll_path)
     file.delete(ll_path)
 end
 
 
+--
+-- Public class & members
+--
+
+local Submission = {}
+
+
 --[[--
-Deposit resources from a SIP.
-@tparam string ll_path Path of the laundry list. All SIP source references are
-    relative to this path.
-@tparam boolean cleanup If set to `true`, it removes the SIP if the submission
-    is successful.
-@param string report_path Optional path for the report file.
+Create a new submission.
 
-@treturn bool Whether the process completed successfully.
---]]
-M.deposit = function(ll_path, cleanup, report_path)
-    local sip, rc, ret
+@tparam string ll_path Path of laundry list file.
 
-    rc, ret = xpcall(generate_sip, debug.traceback, ll_path)
-    if rc then sip = ret
-    else
-        return generate_report(report_path, {
+@treturn table New Submission object.
+]]
+function Submission:new (ll_path, report_path)
+    -- TODO generate a report.
+    if not path.isfile(ll_path) then error(ll_path .. " is not a file.", 2) end
+
+    local sub = {
+        root_path = path.dirname(ll_path),
+        ll_path = ll_path,
+        report_path = report_path,
+        id = "sub:" .. idgen(),
+        name = ll_path:match("pkar_submission[%-_%.](.*)%.csv"),
+        sip = {},
+        gr = graph.new(),
+        -- Local path to URI mapping.
+        path_to_uri = {},
+        -- Track IDs in SIP to validate links created in a submission.
+        ids = {},
+    }
+    sub.uri = term.new_iriref_ns(sub.id)
+    self.__index = self
+    setmetatable(sub, self)
+
+    local rc, ret = xpcall(parse_ll, debug.traceback, sub)
+    if not rc then
+        return nil, generate_report(report_path, {
             result = "failure",
             message = "An error occurred while parsing the SIP.",
             traceback = ret,
             metadata = {
-                ll = ll_path,
+                ll = sub.ll_path,
             },
         })
     end
 
-    for i, rsrc in ipairs(sip) do
-        -- TODO Wrap this chunk into a txn. Each row is atomic.
+    return sub
+end
+
+
+--[[--
+Deposit resources from a SIP.
+@tparam string ll_path Path of the laundry list. All SIP source references are
+    relative to this path.
+@tparam boolean cleanup If set to `true`, it removes the SIP if the submission
+    is successful.
+@param string report_path Optional path for the report file.
+
+@treturn bool Whether the process completed successfully.
+--]]
+function Submission:deposit(ll_path, cleanup)
+    local rc, ret
+
+    for i, rsrc in ipairs(self.sip) do
         logger:debug(("Processing resource #%d of %d: %s"):format(
-                i, #sip, rsrc.id))
+                i, #self.sip, rsrc.id))
 
-        local rc, ret = xpcall(
-            process_rsrc,
-            debug.traceback,
-            rsrc, sip.root_path)
+        local rc, ret = xpcall(process_rsrc, debug.traceback, rsrc, i)
         if not rc then
             return generate_report(report_path, {
                 result = "failure",
@@ -545,33 +601,59 @@ M.deposit = function(ll_path, cleanup, report_path)
         end
     end
 
-    rc, ret = xpcall(add_sub_meta, debug.traceback)
+    rc, ret = xpcall(add_sub_meta, debug.traceback, self)
     if not rc then
         return generate_report(report_path, {
             result = "failure",
             message = "An error occurred while updating submission metadata.",
             traceback = ret,
             metadata = {
-                sub_id = sub_id,
+                sub_id = self.id,
             },
         })
     end
 
+    for id in pairs(self.ids) do
+        local val_report = validator.validate(self.gr, term.new_iriref_ns(id))
+        if val_report.max_level == "ERROR" then
+            return generate_report(report_path, {
+                result = "failure",
+                message = "A resource did not pass validation.",
+                traceback = "",
+                metadata = {
+                    sub_id = self.id,
+                    rsrc_id = id,
+                    validation = val_report,
+                }
+            })
+        elseif val_report.max_level == "WARN" then logger:warn(
+            "Validation raised warnings: " .. pp.write(val_report))
+        elseif val_report.max_level == "NOTICE" then logger:info(
+            "Validation raised notices: " .. pp.write(val_report)) end
+        -- TODO send report for warnings and notices.
+    end
+
+    -- TODO wrap this in an MDB transaction.
+    for id in pairs(self.ids) do
+        repo.store_updates(self.gr, term.new_iriref_ns(id))
+        logger:info("Stored: ", id)
+    end
+
     -- After this point, the outcome is either `success` or `warnings`.
 
     -- Remove processing directory.
-    local proc_dir = path.join(sip.root_path, "proc")
+    local proc_dir = path.join(self.root_path, "proc")
     if path.isdir(proc_dir) then dir.rmtree(proc_dir) end
 
     local report = {
         metadata = {
-            sub_id = sub_id,
+            sub_id = self.id,
             resources = {},
         },
     }
 
     if cleanup then
-        rc, ret = xpcall(cleanup_src, debug.traceback, sip)
+        rc, ret = xpcall(cleanup_src, debug.traceback, self.sip)
         if not rc then
             report.result = "warnings"
             report.message = "An error occurred while deleting the SIP source."
@@ -581,7 +663,7 @@ M.deposit = function(ll_path, cleanup, report_path)
     report.result = "success"
     report.message = "The SIP has been successfully submitted."
 
-    for _, rsrc in ipairs(sip) do
+    for _, rsrc in ipairs(self.sip) do
         report.metadata.resources[rsrc.id] = rsrc.source_path
     end
 
@@ -589,4 +671,14 @@ M.deposit = function(ll_path, cleanup, report_path)
 end
 
 
-return M
+return {
+    Submission = Submission,
+    idgen = idgen,
+    reset_ores = function()
+        if path.isdir(pkar.config.fs.ores_path) then
+            logger:warn("Removing existing opaque resource store.")
+            dir.rmtree(pkar.config.fs.ores_path)
+        end
+        dir.makepath(pkar.config.fs.ores_path)
+    end,
+}

+ 5 - 3
src/util/pkar.lua

@@ -14,7 +14,7 @@ local cmdoc = require "pocket_archive.cmdoc"
 local model = require "pocket_archive.model"
 local pres = require "pocket_archive.presentation"
 local repo = require "pocket_archive.repo"
-local sub = require "pocket_archive.submission"
+local submission = require "pocket_archive.submission"
 
 
 cli.locale "en_US"  -- TODO set with multilingual support.
@@ -31,7 +31,7 @@ init = cli.command {
         if a == "yes" then
             io.write("Alright, you asked for it.\n")
             repo.reset_store()
-            sub.reset_ores()
+            submission.reset_ores()
             pres.reset_site()
         else io.write("Chicken out.\n")
         end
@@ -63,8 +63,10 @@ deposit = cli.command {
 
     function(args)
         --require "debugger"()
-        local report = sub.deposit(args.path, args.cleanup)
+        local sub = submission.Submission:new(args.path)
+        local report = sub:deposit(args.cleanup)
         io.write(json.encode(report))
+        io.write("\n")
     end
 }
 

+ 6 - 2
src/util/watcher.lua

@@ -11,7 +11,7 @@ local watchdog = require "watchdog"
 local pkar = require "pocket_archive"
 local logger = pkar.logger
 local pres = require "pocket_archive.presentation"
-local sub = require "pocket_archive.submission"
+local submission = require "pocket_archive.submission"
 
 
 local running = true
@@ -96,7 +96,11 @@ cli.program {
                     logger:info("Starting submission with pid: " .. mypid)
                     local fpath = path.join(args.path, ev.name)
                     local sub_rc, sub_ret = pcall(
-                        sub.deposit,
+                        function(fpath, cleanup, report_path)
+                            local sub = submission.Submission:new(
+                                fpath, report_path)
+                            local report = sub:deposit(cleanup)
+                        end,
                         fpath,
                         args.cleanup,
                         fpath:gsub("%.csv$", "-report.json")

+ 10 - 0
test/submission.lua

@@ -35,6 +35,16 @@ describe("Archive submission process", function()
         end)
         --]]
     end)
+    describe("Update", function()
+        it("should update metadata only from a LL", function()
+        end)
+        it("should update metadata and files from a full SIP", function()
+        end)
+        it("should update metadata of a file present in the archive but not in the SIP", function()
+        end)
+        it("should ", function()
+        end)
+    end)
 end)
 
 describe("Submission data validation", function ()