Bläddra i källkod

Wrap submission in a class; allow stored file MD update.

scossu 1 dag sedan
förälder
incheckning
c892cc90e0
1 ändrade filer med 74 tillägg och 41 borttagningar
  1. 74 41
      src/submission.lua

+ 74 - 41
src/submission.lua

@@ -114,7 +114,7 @@ local function parse_ll(sub)
                 sub = sub,
             }
             prev_id = row.id
-            sub.sip_ids[sub.sip[i].id] = true  -- Add to common SIP ID set.
+            sub.ids[sub.sip[i].id] = true  -- Add to common SIP ID set.
             for k, v in pairs(row) do
                 if not v or k == "id" then goto cont1 end  -- skip empty strings.
                 if pkar.config.md.single_values[k] then sub.sip[i][k] = v
@@ -229,10 +229,9 @@ local function rsrc_to_graph(rsrc)
     --logger:debug("Updating resource md: ", pp.write(rsrc))
 
     local s = term.new_iriref_ns(rsrc.id)
-    local gr = graph.new(nil)
     local skip_props = {id = true, sub = true}
 
-    it = gr:add_init()
+    it = rsrc.sub.gr:add_init()
     for prop, v in pairs(rsrc) do
         if skip_props[prop] then goto skip end
         logger:debug(("Adding attribute: %s = %s"):format(prop, pp.write(v)))
@@ -264,7 +263,7 @@ local function rsrc_to_graph(rsrc)
                 -- "par:" could have been added previously.
                 local rel_id = "par:" .. vv:gsub("^par:", "")
                 if
-                    not rsrc.sub.sip_ids[rel_id]
+                    not rsrc.sub.ids[rel_id]
                     and not repo.gr:contains(triple.new(
                         term.new_iriref_ns(rel_id),
                         pkar.RDF_TYPE,
@@ -297,7 +296,7 @@ local function rsrc_to_graph(rsrc)
                 -- Add linked list proxies.
                 local brick_id = "par:" .. idgen()
                 local brick_uri = term.new_iriref_ns(brick_id)
-                rsrc.sub.sip_ids[brick_id] = true
+                rsrc.sub.ids[brick_id] = true
                 if i == 1 then
                     proxy_s = s
                     it:add_iter(triple.new(
@@ -325,6 +324,22 @@ local function rsrc_to_graph(rsrc)
                     brick_uri,
                     model.id_to_uri.content_type,
                     term.new_iriref_ns("pas:Brick")))
+                it:add_iter(triple.new(
+                    brick_uri,
+                    model.id_to_uri.sub_id,
+                    rsrc.sub.uri))
+
+                local tstamp = os.date("!%Y-%m-%dT%TZ")
+                it:add_iter(triple.new(
+                    brick_uri,
+                    model.id_to_uri.submitted,
+                    term.new_lit(tstamp, "xsd:dateTime", nil, true)
+                ))
+                it:add_iter(triple.new(
+                    brick_uri,
+                    model.id_to_uri.last_modified,
+                    term.new_lit(tstamp, "xsd:dateTime", nil, true)
+                ))
                 -- Add reference.
                 it:add_iter(triple.new(
                     brick_uri,
@@ -338,6 +353,9 @@ local function rsrc_to_graph(rsrc)
         ::skip::
     end
 
+    -- Add submission ID.
+    it:add_iter(triple.new(s, model.id_to_uri.sub_id, rsrc.sub.uri))
+
     -- Add resource lineage triples.
     for i, m in ipairs(rmod.lineage) do
         it:add_iter(triple.new(
@@ -346,7 +364,7 @@ local function rsrc_to_graph(rsrc)
     end
     it:add_done()
 
-    return gr, s
+    return s
 end
 
 
@@ -354,8 +372,10 @@ end
 Process SIP files and metadata.
 
 @tparam table rsrc Resource from the SIP parsed by #{parse_ll}.
+@tparam integer i Position in the SIP. Used to look ahead for implicit members
+    by path.
 --]]
-local function process_rsrc(rsrc)
+local function process_rsrc(rsrc, i)
     local rmod = model.types[rsrc.content_type]
     --require "debugger".assert(rmod)
 
@@ -391,7 +411,6 @@ local function process_rsrc(rsrc)
 
     ::skip_file_proc::
 
-    -- BEGIN metadata processing.
     local tstamp = os.date("!%Y-%m-%dT%TZ")
     rsrc.submitted = tstamp
     rsrc.last_modified = tstamp
@@ -403,7 +422,7 @@ local function process_rsrc(rsrc)
     -- Create implicit members from single-file artifact.
     if rmod.types.artifact and path.isfile(fpath) then
         local file_id = "par:" .. idgen()
-        rsrc.sub.sip_ids[file_id] = true
+        rsrc.sub.ids[file_id] = true
         -- Insert file resource. It will be processed as part of the sip table.
         table.insert(rsrc.sub.sip, {
             content_type = rmod.default_fmodel or "file",
@@ -419,21 +438,23 @@ local function process_rsrc(rsrc)
     end
     ::skip::
 
-    -- END metadata processing.
-
-    local tmp_gr, s
-    tmp_gr, s = rsrc_to_graph(rsrc)
-
-    local val_report = validator.validate(tmp_gr, s)
-    if val_report.max_level == "ERROR" then error(
-        "Validation raised errors: " .. pp.write(val_report))
-    elseif val_report.max_level == "WARN" then logger:warn(
-        "Validation raised warnings: " .. pp.write(val_report))
-    elseif val_report.max_level == "NOTICE" then logger:warn(
-        "Validation raised notices: " .. pp.write(val_report)) end
+    -- Look ahead for resources under this container and add membership.
+    for j = i + 1, #rsrc.sub.sip do
+        if rsrc.sub.sip[j].source_path:match(
+            "^" .. pkar.escape_ptn(rsrc.source_path))
+        then
+            local rel_path = rsrc.sub.sip[j].source_path:sub(
+                #rsrc.source_path + 2)
+            logger:debug("rel_path: " .. rel_path)
+            if not rel_path:match("/") then
+                logger:debug(("Adding member %s to %s"):format(
+                        rel_path, rsrc.source_path))
+                table.insert(rsrc.has_member, rsrc.sub.sip[j].id)
+            end
+        end
+    end
 
-    repo.store_updates(tmp_gr, s)
-    logger:info("Stored: ", s.data)
+    return rsrc_to_graph(rsrc)
 end
 
 
@@ -516,16 +537,17 @@ function Submission:new (ll_path, report_path)
     if not path.isfile(ll_path) then error(ll_path .. " is not a file.", 2) end
 
     local sub = {
+        root_path = path.dirname(ll_path),
         ll_path = ll_path,
         report_path = report_path,
         id = "sub:" .. idgen(),
         name = ll_path:match("pkar_submission[%-_%.](.*)%.csv"),
         sip = {},
-        root_path = path.dirname(ll_path),
+        gr = graph.new(),
         -- Local path to URI mapping.
         path_to_uri = {},
         -- Track IDs in SIP to validate links created in a submission.
-        sip_ids = {},
+        ids = {},
     }
     sub.uri = term.new_iriref_ns(sub.id)
     self.__index = self
@@ -561,11 +583,10 @@ function Submission:deposit(ll_path, cleanup)
     local rc, ret
 
     for i, rsrc in ipairs(self.sip) do
-        -- TODO Wrap this chunk into a txn. Each row is atomic.
         logger:debug(("Processing resource #%d of %d: %s"):format(
                 i, #self.sip, rsrc.id))
 
-        local rc, ret = xpcall(process_rsrc, debug.traceback, rsrc)
+        local rc, ret = xpcall(process_rsrc, debug.traceback, rsrc, i)
         if not rc then
             return generate_report(report_path, {
                 result = "failure",
@@ -578,20 +599,6 @@ function Submission:deposit(ll_path, cleanup)
                 },
             })
         end
-        -- Look ahead for resources under this container and add membership.
-        for j = i + 1, #rsrc.sub.sip do
-            if rsrc.sub.sip[j].source_path:match(
-                "^" .. pkar.escape_ptn(rsrc.source_path))
-            then
-                local rel_path = rsrc.sub.sip[j].source_path:sub(#rsrc.source_path + 2)
-                logger:debug("rel_path: " .. rel_path)
-                if not rel_path:match("/") then
-                    logger:debug(("Adding member %s to %s"):format(
-                            rel_path, rsrc.source_path))
-                    table.insert(rsrc.has_member, rsrc.sub.sip[j].id)
-                end
-            end
-        end
     end
 
     rc, ret = xpcall(add_sub_meta, debug.traceback, self)
@@ -606,6 +613,32 @@ function Submission:deposit(ll_path, cleanup)
         })
     end
 
+    for id in pairs(self.ids) do
+        local val_report = validator.validate(self.gr, term.new_iriref_ns(id))
+        if val_report.max_level == "ERROR" then
+            return generate_report(report_path, {
+                result = "failure",
+                message = "A resource did not pass validation.",
+                traceback = "",
+                metadata = {
+                    sub_id = self.id,
+                    rsrc_id = id,
+                    validation = val_report,
+                }
+            })
+        elseif val_report.max_level == "WARN" then logger:warn(
+            "Validation raised warnings: " .. pp.write(val_report))
+        elseif val_report.max_level == "NOTICE" then logger:info(
+            "Validation raised notices: " .. pp.write(val_report)) end
+        -- TODO send report for warnings and notices.
+    end
+
+    -- TODO wrap this in an MDB transaction.
+    for id in pairs(self.ids) do
+        repo.store_updates(self.gr, term.new_iriref_ns(id))
+        logger:info("Stored: ", id)
+    end
+
     -- After this point, the outcome is either `success` or `warnings`.
 
     -- Remove processing directory.