Browse Source

Validation; submission metadata adjustments.

scossu 5 days ago
parent
commit
56c810a645

+ 8 - 7
README.md

@@ -149,21 +149,22 @@ Simple road map for a rough prototype:
 - ⚒ Configuration + config parser
 - ⚒ Configuration + config parser
   - ✓ Application
   - ✓ Application
   - ⚒ Content model
   - ⚒ Content model
-    -  Validation rules
-    -  Relationship inference rules
+    -  Validation rules
+    -  Relationship inference rules
   - ⎊ Local overrides
   - ⎊ Local overrides
 - ⚒ Submission module
 - ⚒ Submission module
   - ✓ SIP building
   - ✓ SIP building
   - ✓ Metadata from LL
   - ✓ Metadata from LL
   - ✓ Structure inference
   - ✓ Structure inference
-  - Relatioships inference
+  - Relatioships inference
 - ⚒ HTML generator
 - ⚒ HTML generator
   - ✓ Index
   - ✓ Index
   - ✓ Resource
   - ✓ Resource
   - ✓ Static assets
   - ✓ Static assets
   - ✓ Transformers
   - ✓ Transformers
--  Non-HTML generators
-  - LL
-  - RDF (turtle)
+-  Non-HTML generators
+  - ✓ RDF (turtle)
+  - ⎊ LL
 - ⎊ Front end
 - ⎊ Front end
-  - JS search engine
+  - ⎊ JS search engine
+  - ⎊ Styling

+ 3 - 1
config/model/typedef/anything.lua

@@ -16,16 +16,18 @@ return {
         },
         },
         ["pas:contentType"] = {
         ["pas:contentType"] = {
             label = "Content type",
             label = "Content type",
-            type = "string",
+            type = "resource",
             min_cardinality = 1,
             min_cardinality = 1,
             max_cardinality = 1,
             max_cardinality = 1,
         },
         },
+        --[[
         ["pas:id"] = {
         ["pas:id"] = {
             label = "Primary ID",
             label = "Primary ID",
             type = "string",
             type = "string",
             min_cardinality = 1,
             min_cardinality = 1,
             max_cardinality = 1,
             max_cardinality = 1,
         },
         },
+        --]]
         ["dc:identifier"] = {
         ["dc:identifier"] = {
             label = "External system ID",
             label = "External system ID",
             type = "string",
             type = "string",

+ 9 - 1
config/model/typedef/file.lua

@@ -4,12 +4,14 @@ return {
     broader = "pas:Anything",
     broader = "pas:Anything",
 
 
     properties = {
     properties = {
+        --[[
         ["pas:location"] = {
         ["pas:location"] = {
             label = "Location",
             label = "Location",
             type = "string",
             type = "string",
             min_cardinality = 1,
             min_cardinality = 1,
             max_cardinality = 1,
             max_cardinality = 1,
         },
         },
+        --]]
         ["pas:path"] = {
         ["pas:path"] = {
             label = "Archival path",
             label = "Archival path",
             description = [[
             description = [[
@@ -18,10 +20,16 @@ return {
             min_cardinality = 1,
             min_cardinality = 1,
             max_cardinality = 1,
             max_cardinality = 1,
         },
         },
+        ["pas:next"] = {
+            label = "Next sibling",
+            type = "resource",
+            range = {["pas:Part"] = true},
+            max_cardinality = 1,
+        },
         ["pas:thumbnail"] = {
         ["pas:thumbnail"] = {
             label = "Thumbnail",
             label = "Thumbnail",
             type = "string",
             type = "string",
-            min_cardinality = 1,
+            --min_cardinality = 1,
             max_cardinality = 1,
             max_cardinality = 1,
         },
         },
         ["dc:format"] = {
         ["dc:format"] = {

+ 3 - 3
config/model/typedef/part.lua

@@ -3,11 +3,11 @@ Part content type = "a logical subdivision within an artifact.",
 Resources of this type may be used to build logical structures implemented
 Resources of this type may be used to build logical structures implemented
 as linked lists.
 as linked lists.
 
 
-The `pas = "first` property points to the first child in a",
+The `pas:first` property points to the first child in a",
 linked list nested inside the current resource. There may be any number of
 linked list nested inside the current resource. There may be any number of
 linked list heads under the same Part, or none.
 linked list heads under the same Part, or none.
 
 
-The `pas = "next` property points to the next sibling in a linked list. The",
+The `pas:next` property points to the next sibling in a linked list. The",
 last item in a list is identified by the lack of this property.
 last item in a list is identified by the lack of this property.
 
 
 In a more complex hierarchy, any given Part may have both "first" and
 In a more complex hierarchy, any given Part may have both "first" and
@@ -30,7 +30,7 @@ return {
             label = "Next sibling",
             label = "Next sibling",
             type = "resource",
             type = "resource",
             range = {["pas:Part"] = true},
             range = {["pas:Part"] = true},
-            max_cardinality = "1",
+            max_cardinality = 1,
         }
         }
     }
     }
 }
 }

+ 1 - 0
pocket_archive-scm-1.rockspec

@@ -33,6 +33,7 @@ build = {
     modules = {
     modules = {
         ["pocket_archive"] = "src/core.lua",
         ["pocket_archive"] = "src/core.lua",
         ["pocket_archive.model"] = "src/model.lua",
         ["pocket_archive.model"] = "src/model.lua",
+        ["pocket_archive.validator"] = "src/validator.lua",
         ["pocket_archive.submission"] = "src/submission.lua",
         ["pocket_archive.submission"] = "src/submission.lua",
         ["pocket_archive.html_generator"] = "src/html_generator.lua",
         ["pocket_archive.html_generator"] = "src/html_generator.lua",
         ["pocket_archive.transformers"] = "src/transformers.lua",
         ["pocket_archive.transformers"] = "src/transformers.lua",

+ 4 - 2
scratch.lua

@@ -5,6 +5,7 @@ local store = require "volksdata.store"
 
 
 local pkar = require "pocket_archive"
 local pkar = require "pocket_archive"
 local sub = require "pocket_archive.submission"
 local sub = require "pocket_archive.submission"
+local val = require "pocket_archive.validator"
 local hgen = require "pocket_archive.html_generator"
 local hgen = require "pocket_archive.html_generator"
 
 
 
 
@@ -13,7 +14,8 @@ local st = store.new(store.MDB, pkar.store_id, true)  -- delete prev archive
 ---[[
 ---[[
 sip = sub.generate_sip(
 sip = sub.generate_sip(
     "test/sample_submission/postcard-bag/data/pkar_submission.csv")
     "test/sample_submission/postcard-bag/data/pkar_submission.csv")
-sub.deposit(sip)
+
+--sub.deposit(sip)
 --]]
 --]]
 
 
-html = hgen.generate_site()
+--html = hgen.generate_site()

+ 26 - 13
src/core.lua

@@ -3,20 +3,26 @@ local path = require "pl.path"
 local store = require "volksdata.store"
 local store = require "volksdata.store"
 
 
 local term = require "volksdata.term"
 local term = require "volksdata.term"
-local namespace = require "volksdata.namespace"
+local nsm = require "volksdata.namespace"
 
 
 
 
 local fpath = debug.getinfo(1, "S").source:sub(2)
 local fpath = debug.getinfo(1, "S").source:sub(2)
 local root_path = path.dirname(path.dirname(fpath))
 local root_path = path.dirname(path.dirname(fpath))
 local config_path = os.getenv("PA_CONFIG_DIR") or (root_path .. "/config")
 local config_path = os.getenv("PA_CONFIG_DIR") or (root_path .. "/config")
 
 
+local config = dofile(config_path .. "/app.lua")
+
+for pfx, ns in pairs(config.namespace) do nsm.add(pfx, ns) end
+
 
 
 local M = {
 local M = {
     -- Project root path.
     -- Project root path.
     root = root_path,
     root = root_path,
-    config = dofile(config_path .. "/app.lua"),
+    config = config,
 
 
     default_title = "Pocket Archive",
     default_title = "Pocket Archive",
+    store_id = "file://" .. (os.getenv("PA_BASE") or config.fs.dres_path),
+    store = store.new(store.MDB, store_id),
 
 
     --Logger config.
     --Logger config.
     logger = require "sllog":init{
     logger = require "sllog":init{
@@ -29,24 +35,31 @@ local M = {
       hookrequire=true, -- also report calls to require()
       hookrequire=true, -- also report calls to require()
       level="debug",    -- output levels up to and including "dbg"
       level="debug",    -- output levels up to and including "dbg"
     },
     },
-}
 
 
-M.store_id = "file://" .. (os.getenv("PA_BASE") or M.config.fs.dres_path)
-M.store = store.new(store.MDB, M.store_id)
+    -- Commonly used terms.
+    RDF_TYPE = term.new_iriref_ns("rdf:type"),
 
 
-print(("Default NSM: %s"):format(M.nsm))
+    DC_TITLE_P = term.new_iriref_ns("dc:title"),
+    DC_CREATED_P = term.new_iriref_ns("dc:created"),
+    TN_P = term.new_iriref_ns("pas:thumbnail"),
+    FIRST_P = term.new_iriref_ns("pas:first"),
+    NEXT_P = term.new_iriref_ns("pas:next"),
+    PATH_P = term.new_iriref_ns("pas:path"),
+    CONTENT_TYPE_P = term.new_iriref_ns("pas:contentType"),
 
 
-for pfx, ns in pairs(M.config.namespace) do namespace.add(pfx, ns) end
+    ART_T = term.new_iriref_ns("pas:Artifact"),
+    PART_T = term.new_iriref_ns("pas:Part"),
+    FILE_T = term.new_iriref_ns("pas:File"),
+
+    -- Common namespaces
+    PAR_NS = nsm.get_ns("par"),
+    PAS_NS = nsm.get_ns("pas"),
+}
 
 
--- Some constant terms.
-M.RDF_TYPE = term.new_iriref_ns("rdf:type", M.nsm)
 
 
 -- Initialize random ID generator.
 -- Initialize random ID generator.
 math.randomseed(M.config.id.seed[1], M.config.id.seed[2])
 math.randomseed(M.config.id.seed[1], M.config.id.seed[2])
 
 
-
-local par_ns = namespace.get_ns("par")
-
 --[[
 --[[
   Gnerate pairtree directory and file path from an ID string and prefix.
   Gnerate pairtree directory and file path from an ID string and prefix.
 
 
@@ -63,7 +76,7 @@ local par_ns = namespace.get_ns("par")
   return: full file path, with the optional extension if provided.
   return: full file path, with the optional extension if provided.
 --]]
 --]]
 M.gen_pairtree = function (pfx, id_str, ext, no_create)
 M.gen_pairtree = function (pfx, id_str, ext, no_create)
-    local bare_id = id_str:gsub(par_ns, ""):gsub("^par:", "")
+    local bare_id = id_str:gsub(M.PAR_NS, ""):gsub("^par:", "")
     local res_dir = path.join(pfx, bare_id:sub(1,2), bare_id:sub(3,4))
     local res_dir = path.join(pfx, bare_id:sub(1,2), bare_id:sub(3,4))
 
 
     local created, err
     local created, err

+ 49 - 21
src/submission.lua

@@ -14,7 +14,7 @@ local pkar = require "pocket_archive"
 local model = require "pocket_archive.model"
 local model = require "pocket_archive.model"
 local mc = require "pocket_archive.monocypher"
 local mc = require "pocket_archive.monocypher"
 local transformers = require "pocket_archive.transformers"
 local transformers = require "pocket_archive.transformers"
-
+local validator = require "pocket_archive.validator"
 
 
 local logger = pkar.logger
 local logger = pkar.logger
 
 
@@ -170,18 +170,26 @@ M.generate_sip = function(path)
     end
     end
     -- Infer structure from paths and row ordering.
     -- Infer structure from paths and row ordering.
     for i, v in ipairs(sip) do
     for i, v in ipairs(sip) do
-        for j = i + 1, #sip do
-            if not v["pas:next"] and
-                    sip[j]["pas:sourcePath"]:match("(.*/)") ==
-                            v["pas:sourcePath"]:match("(.*/)") then
-                v["pas:next"] = sip[j].id
+        local rmod = model.parse_model(v["pas:contentType"])
+        if rmod.properties["pas:next"] then
+            for j = i + 1, #sip do
+                if not v["pas:next"] and
+                        sip[j]["pas:sourcePath"]:match("(.*/)") ==
+                                v["pas:sourcePath"]:match("(.*/)") then
+                    v["pas:next"] = sip[j].id
+                end
             end
             end
-            if not v["pas:first"] and
-                    sip[j]["pas:sourcePath"]:match("^" .. escape_pattern(v["pas:sourcePath"])) then
-                v["pas:first"] = sip[j].id
+        end
+        if rmod.properties["pas:first"] then
+            for j = i + 1, #sip do
+                if not v["pas:first"] and
+                    sip[j]["pas:sourcePath"]:match(
+                            "^" .. escape_pattern(v["pas:sourcePath"])
+                ) then
+                    v["pas:first"] = sip[j].id
+                end
             end
             end
         end
         end
-        v._sort = nil
     end
     end
     --require "debugger"()
     --require "debugger"()
 
 
@@ -189,19 +197,16 @@ M.generate_sip = function(path)
 end
 end
 
 
 
 
-M.validate = function(sip)
-    -- TODO
-end
-
+--[[  Convert a SIP resource table to an in-memory Volksdata graph.
 
 
-M.update_rsrc_md = function(rsrc)
-    -- TODO use a transaction when volksdata_lua supports it.
+--]]
+M.rsrc_to_graph = function(rsrc)
+    local rmod = model.parse_model(rsrc["pas:contentType"])
     logger:info("Updating resource md: ", pp.write(rsrc))
     logger:info("Updating resource md: ", pp.write(rsrc))
-    rmod = model.parse_model(rsrc["pas:contentType"])
-    triples = {}
 
 
-    gr = graph.new(pkar.store, term.DEFAULT_CTX)
     local s = term.new_iriref_ns(rsrc.id)
     local s = term.new_iriref_ns(rsrc.id)
+    triples = {}
+
     for k, v in pairs(rsrc) do
     for k, v in pairs(rsrc) do
         -- id is the subject, it won't be an attribute.
         -- id is the subject, it won't be an attribute.
         if k == "id" then goto skip end
         if k == "id" then goto skip end
@@ -210,13 +215,16 @@ M.update_rsrc_md = function(rsrc)
         local p = term.new_iriref_ns(k)
         local p = term.new_iriref_ns(k)
         local o
         local o
         if type(v) == "table" then
         if type(v) == "table" then
-            for vv, _ in pairs(v) do
+            for vv in pairs(v) do
                 if ((rmod.properties or NT)[k] or NT).type == "resource" then
                 if ((rmod.properties or NT)[k] or NT).type == "resource" then
                     o = term.new_iriref_ns(vv)
                     o = term.new_iriref_ns(vv)
                 else o = term.new_lit(vv) end
                 else o = term.new_lit(vv) end
                 table.insert(triples, triple.new(s, p, o))
                 table.insert(triples, triple.new(s, p, o))
             end
             end
         else
         else
+            if k == "pas:contentType" then
+                v = "pas:" .. v
+            end
             if ((rmod.properties or NT)[k] or NT).type == "resource" then
             if ((rmod.properties or NT)[k] or NT).type == "resource" then
                 o = term.new_iriref_ns(v)
                 o = term.new_iriref_ns(v)
             else o = term.new_lit(v) end
             else o = term.new_lit(v) end
@@ -229,6 +237,7 @@ M.update_rsrc_md = function(rsrc)
             triples, triple.new(s, pkar.RDF_TYPE, term.new_iriref_ns(m)))
             triples, triple.new(s, pkar.RDF_TYPE, term.new_iriref_ns(m)))
     end
     end
 
 
+    local gr = graph.new(nil, rsrc.id, true)
     -- This is a full replacement.
     -- This is a full replacement.
     --require "debugger"()
     --require "debugger"()
     logger:info("Removing triples.")
     logger:info("Removing triples.")
@@ -237,8 +246,27 @@ M.update_rsrc_md = function(rsrc)
     -- TODO implement volksdata_lua fn to add a single triple and add triples
     -- TODO implement volksdata_lua fn to add a single triple and add triples
     -- in the previous loop.
     -- in the previous loop.
     gr:add(triples)
     gr:add(triples)
+
+    return gr
+end
+
+
+M.store_updates = function(gr)
+    -- TODO use a transaction when volksdata_lua supports it.
     logger:debug("Graph: ", tostring(gr))
     logger:debug("Graph: ", tostring(gr))
     for trp in gr:lookup() do logger:debug(tostring(trp)) end
     for trp in gr:lookup() do logger:debug(tostring(trp)) end
+
+    local val_report = validator.validate(gr)
+    if val_report.max_level == "ERROR" then error(
+        "Validation raised errors: " .. pp.write(val_report))
+    elseif val_report.max_level == "WARN" then logger:warn(
+        "Validation raised warnings: " .. pp.write(val_report))
+    elseif val_report.max_level == "NOTICE" then logger:warn(
+        "Validation raised notices: " .. pp.write(val_report)) end
+
+    local stored_gr = graph.new(pkar.store, term.DEFAULT_CTX)
+
+    return gr:copy(stored_gr)
 end
 end
 
 
 
 
@@ -309,7 +337,7 @@ M.deposit = function(sip)
         tstamp = os.date("!%Y-%m-%dT%TZ")
         tstamp = os.date("!%Y-%m-%dT%TZ")
         rsrc["dc:created"] = tstamp
         rsrc["dc:created"] = tstamp
         rsrc["dc:modified"] = tstamp
         rsrc["dc:modified"] = tstamp
-        M.update_rsrc_md(rsrc)
+        M.store_updates(M.rsrc_to_graph(rsrc))
     end
     end
 
 
     -- Remove processing directory.
     -- Remove processing directory.

+ 99 - 0
src/validator.lua

@@ -0,0 +1,99 @@
+local pp = require "pl.pretty"
+local term = require "volksdata.term"
+
+local pkar = require "pocket_archive"
+local model = require "pocket_archive.model"
+
+local logger = pkar.logger
+
+
+local E_TYPE = "Type error"
+local E_CARD = "Cardinality error"
+local E_RANGE = "Range error"
+
+local M = {}
+
+
+M.validate = function(gr)
+    _, ctype = next(gr:attr(gr:get_uri(), pkar.CONTENT_TYPE_P))
+    local rmod = model.parse_model(ctype.data)
+    if not rmod then error("No type definition for ", ctype.data) end
+
+    local report = {notices = {}, warnings = {}, errors = {}}
+
+    for fname, rules in pairs(rmod.properties or NT) do
+        local values
+        values = gr:attr(gr:get_uri(), term.new_iriref_ns(fname))
+
+        -- Cardinality
+        local card = 0
+        for _, v in pairs(values) do card = card + 1 end
+        if rules.min_cardinality or rules.max_cardinality then
+            min_card = rules.min_cardinality or 0
+            if card < min_card then
+                table.insert(report.errors, {
+                    E_CARD,
+                    ("Too few values for %s: expected %d, got %d"):format(
+                            fname, min_card, card)
+                })
+            end
+            max_card = rules.max_cardinality or math.huge
+            if card > max_card then
+                table.insert(report.errors, {
+                    E_CARD,
+                    ("Too many values for %s: expected %d, got %d"):format(
+                            fname, max_card, card)
+                })
+            end
+        end
+
+        -- From this point on, if there are no values, skip other criteria.
+        if card == 0 then goto skip_prop end
+
+        -- Type
+        if rules.type then
+            -- String type accepts any value.
+            if rules.type == "number" then
+                for v in pairs(values) do
+                    if type(v) ~= "number" then
+                        table.insert(
+                            report.errors, {
+                                E_TYPE,
+                                ("Number expected for %s; got: %s")
+                                :format(fname, v)
+                            })
+                    end
+                end
+            elseif rules.type == "resource" then
+                for _, v in ipairs(values) do
+                    if v:sub(1,4) ~= "par" then
+                        table.insert(
+                            report.errors, {
+                                E_TYPE,
+                                ("`par:` prefix expected for %s; got: %s")
+                                :format(fname, v)
+                            })
+                    end
+                end
+            end
+        end
+
+        -- Range
+        if rules.range then
+            for _, v in ipairs(values) do
+
+            end
+        end
+
+        ::skip_prop::
+    end
+
+    if #report.errors > 0 then report.max_level = "ERROR"
+    elseif #report.warnings > 0 then report.max_level = "WARN"
+    elseif #report.notices > 0 then report.max_level = "NOTICE" end
+
+    return report
+end
+
+
+return M