Bläddra i källkod

Validation; submission metadata adjustments.

scossu 5 dagar sedan
förälder
incheckning
56c810a645

+ 8 - 7
README.md

@@ -149,21 +149,22 @@ Simple road map for a rough prototype:
 - ⚒ Configuration + config parser
   - ✓ Application
   - ⚒ Content model
-    -  Validation rules
-    -  Relationship inference rules
+    -  Validation rules
+    -  Relationship inference rules
   - ⎊ Local overrides
 - ⚒ Submission module
   - ✓ SIP building
   - ✓ Metadata from LL
   - ✓ Structure inference
-  - Relatioships inference
+  - Relatioships inference
 - ⚒ HTML generator
   - ✓ Index
   - ✓ Resource
   - ✓ Static assets
   - ✓ Transformers
--  Non-HTML generators
-  - LL
-  - RDF (turtle)
+-  Non-HTML generators
+  - ✓ RDF (turtle)
+  - ⎊ LL
 - ⎊ Front end
-  - JS search engine
+  - ⎊ JS search engine
+  - ⎊ Styling

+ 3 - 1
config/model/typedef/anything.lua

@@ -16,16 +16,18 @@ return {
         },
         ["pas:contentType"] = {
             label = "Content type",
-            type = "string",
+            type = "resource",
             min_cardinality = 1,
             max_cardinality = 1,
         },
+        --[[
         ["pas:id"] = {
             label = "Primary ID",
             type = "string",
             min_cardinality = 1,
             max_cardinality = 1,
         },
+        --]]
         ["dc:identifier"] = {
             label = "External system ID",
             type = "string",

+ 9 - 1
config/model/typedef/file.lua

@@ -4,12 +4,14 @@ return {
     broader = "pas:Anything",
 
     properties = {
+        --[[
         ["pas:location"] = {
             label = "Location",
             type = "string",
             min_cardinality = 1,
             max_cardinality = 1,
         },
+        --]]
         ["pas:path"] = {
             label = "Archival path",
             description = [[
@@ -18,10 +20,16 @@ return {
             min_cardinality = 1,
             max_cardinality = 1,
         },
+        ["pas:next"] = {
+            label = "Next sibling",
+            type = "resource",
+            range = {["pas:Part"] = true},
+            max_cardinality = 1,
+        },
         ["pas:thumbnail"] = {
             label = "Thumbnail",
             type = "string",
-            min_cardinality = 1,
+            --min_cardinality = 1,
             max_cardinality = 1,
         },
         ["dc:format"] = {

+ 3 - 3
config/model/typedef/part.lua

@@ -3,11 +3,11 @@ Part content type = "a logical subdivision within an artifact.",
 Resources of this type may be used to build logical structures implemented
 as linked lists.
 
-The `pas = "first` property points to the first child in a",
+The `pas:first` property points to the first child in a",
 linked list nested inside the current resource. There may be any number of
 linked list heads under the same Part, or none.
 
-The `pas = "next` property points to the next sibling in a linked list. The",
+The `pas:next` property points to the next sibling in a linked list. The",
 last item in a list is identified by the lack of this property.
 
 In a more complex hierarchy, any given Part may have both "first" and
@@ -30,7 +30,7 @@ return {
             label = "Next sibling",
             type = "resource",
             range = {["pas:Part"] = true},
-            max_cardinality = "1",
+            max_cardinality = 1,
         }
     }
 }

+ 1 - 0
pocket_archive-scm-1.rockspec

@@ -33,6 +33,7 @@ build = {
     modules = {
         ["pocket_archive"] = "src/core.lua",
         ["pocket_archive.model"] = "src/model.lua",
+        ["pocket_archive.validator"] = "src/validator.lua",
         ["pocket_archive.submission"] = "src/submission.lua",
         ["pocket_archive.html_generator"] = "src/html_generator.lua",
         ["pocket_archive.transformers"] = "src/transformers.lua",

+ 4 - 2
scratch.lua

@@ -5,6 +5,7 @@ local store = require "volksdata.store"
 
 local pkar = require "pocket_archive"
 local sub = require "pocket_archive.submission"
+local val = require "pocket_archive.validator"
 local hgen = require "pocket_archive.html_generator"
 
 
@@ -13,7 +14,8 @@ local st = store.new(store.MDB, pkar.store_id, true)  -- delete prev archive
 ---[[
 sip = sub.generate_sip(
     "test/sample_submission/postcard-bag/data/pkar_submission.csv")
-sub.deposit(sip)
+
+--sub.deposit(sip)
 --]]
 
-html = hgen.generate_site()
+--html = hgen.generate_site()

+ 26 - 13
src/core.lua

@@ -3,20 +3,26 @@ local path = require "pl.path"
 local store = require "volksdata.store"
 
 local term = require "volksdata.term"
-local namespace = require "volksdata.namespace"
+local nsm = require "volksdata.namespace"
 
 
 local fpath = debug.getinfo(1, "S").source:sub(2)
 local root_path = path.dirname(path.dirname(fpath))
 local config_path = os.getenv("PA_CONFIG_DIR") or (root_path .. "/config")
 
+local config = dofile(config_path .. "/app.lua")
+
+for pfx, ns in pairs(config.namespace) do nsm.add(pfx, ns) end
+
 
 local M = {
     -- Project root path.
     root = root_path,
-    config = dofile(config_path .. "/app.lua"),
+    config = config,
 
     default_title = "Pocket Archive",
+    store_id = "file://" .. (os.getenv("PA_BASE") or config.fs.dres_path),
+    store = store.new(store.MDB, store_id),
 
     --Logger config.
     logger = require "sllog":init{
@@ -29,24 +35,31 @@ local M = {
       hookrequire=true, -- also report calls to require()
       level="debug",    -- output levels up to and including "dbg"
     },
-}
 
-M.store_id = "file://" .. (os.getenv("PA_BASE") or M.config.fs.dres_path)
-M.store = store.new(store.MDB, M.store_id)
+    -- Commonly used terms.
+    RDF_TYPE = term.new_iriref_ns("rdf:type"),
 
-print(("Default NSM: %s"):format(M.nsm))
+    DC_TITLE_P = term.new_iriref_ns("dc:title"),
+    DC_CREATED_P = term.new_iriref_ns("dc:created"),
+    TN_P = term.new_iriref_ns("pas:thumbnail"),
+    FIRST_P = term.new_iriref_ns("pas:first"),
+    NEXT_P = term.new_iriref_ns("pas:next"),
+    PATH_P = term.new_iriref_ns("pas:path"),
+    CONTENT_TYPE_P = term.new_iriref_ns("pas:contentType"),
 
-for pfx, ns in pairs(M.config.namespace) do namespace.add(pfx, ns) end
+    ART_T = term.new_iriref_ns("pas:Artifact"),
+    PART_T = term.new_iriref_ns("pas:Part"),
+    FILE_T = term.new_iriref_ns("pas:File"),
+
+    -- Common namespaces
+    PAR_NS = nsm.get_ns("par"),
+    PAS_NS = nsm.get_ns("pas"),
+}
 
--- Some constant terms.
-M.RDF_TYPE = term.new_iriref_ns("rdf:type", M.nsm)
 
 -- Initialize random ID generator.
 math.randomseed(M.config.id.seed[1], M.config.id.seed[2])
 
-
-local par_ns = namespace.get_ns("par")
-
 --[[
   Gnerate pairtree directory and file path from an ID string and prefix.
 
@@ -63,7 +76,7 @@ local par_ns = namespace.get_ns("par")
   return: full file path, with the optional extension if provided.
 --]]
 M.gen_pairtree = function (pfx, id_str, ext, no_create)
-    local bare_id = id_str:gsub(par_ns, ""):gsub("^par:", "")
+    local bare_id = id_str:gsub(M.PAR_NS, ""):gsub("^par:", "")
     local res_dir = path.join(pfx, bare_id:sub(1,2), bare_id:sub(3,4))
 
     local created, err

+ 49 - 21
src/submission.lua

@@ -14,7 +14,7 @@ local pkar = require "pocket_archive"
 local model = require "pocket_archive.model"
 local mc = require "pocket_archive.monocypher"
 local transformers = require "pocket_archive.transformers"
-
+local validator = require "pocket_archive.validator"
 
 local logger = pkar.logger
 
@@ -170,18 +170,26 @@ M.generate_sip = function(path)
     end
     -- Infer structure from paths and row ordering.
     for i, v in ipairs(sip) do
-        for j = i + 1, #sip do
-            if not v["pas:next"] and
-                    sip[j]["pas:sourcePath"]:match("(.*/)") ==
-                            v["pas:sourcePath"]:match("(.*/)") then
-                v["pas:next"] = sip[j].id
+        local rmod = model.parse_model(v["pas:contentType"])
+        if rmod.properties["pas:next"] then
+            for j = i + 1, #sip do
+                if not v["pas:next"] and
+                        sip[j]["pas:sourcePath"]:match("(.*/)") ==
+                                v["pas:sourcePath"]:match("(.*/)") then
+                    v["pas:next"] = sip[j].id
+                end
             end
-            if not v["pas:first"] and
-                    sip[j]["pas:sourcePath"]:match("^" .. escape_pattern(v["pas:sourcePath"])) then
-                v["pas:first"] = sip[j].id
+        end
+        if rmod.properties["pas:first"] then
+            for j = i + 1, #sip do
+                if not v["pas:first"] and
+                    sip[j]["pas:sourcePath"]:match(
+                            "^" .. escape_pattern(v["pas:sourcePath"])
+                ) then
+                    v["pas:first"] = sip[j].id
+                end
             end
         end
-        v._sort = nil
     end
     --require "debugger"()
 
@@ -189,19 +197,16 @@ M.generate_sip = function(path)
 end
 
 
-M.validate = function(sip)
-    -- TODO
-end
-
+--[[  Convert a SIP resource table to an in-memory Volksdata graph.
 
-M.update_rsrc_md = function(rsrc)
-    -- TODO use a transaction when volksdata_lua supports it.
+--]]
+M.rsrc_to_graph = function(rsrc)
+    local rmod = model.parse_model(rsrc["pas:contentType"])
     logger:info("Updating resource md: ", pp.write(rsrc))
-    rmod = model.parse_model(rsrc["pas:contentType"])
-    triples = {}
 
-    gr = graph.new(pkar.store, term.DEFAULT_CTX)
     local s = term.new_iriref_ns(rsrc.id)
+    triples = {}
+
     for k, v in pairs(rsrc) do
         -- id is the subject, it won't be an attribute.
         if k == "id" then goto skip end
@@ -210,13 +215,16 @@ M.update_rsrc_md = function(rsrc)
         local p = term.new_iriref_ns(k)
         local o
         if type(v) == "table" then
-            for vv, _ in pairs(v) do
+            for vv in pairs(v) do
                 if ((rmod.properties or NT)[k] or NT).type == "resource" then
                     o = term.new_iriref_ns(vv)
                 else o = term.new_lit(vv) end
                 table.insert(triples, triple.new(s, p, o))
             end
         else
+            if k == "pas:contentType" then
+                v = "pas:" .. v
+            end
             if ((rmod.properties or NT)[k] or NT).type == "resource" then
                 o = term.new_iriref_ns(v)
             else o = term.new_lit(v) end
@@ -229,6 +237,7 @@ M.update_rsrc_md = function(rsrc)
             triples, triple.new(s, pkar.RDF_TYPE, term.new_iriref_ns(m)))
     end
 
+    local gr = graph.new(nil, rsrc.id, true)
     -- This is a full replacement.
     --require "debugger"()
     logger:info("Removing triples.")
@@ -237,8 +246,27 @@ M.update_rsrc_md = function(rsrc)
     -- TODO implement volksdata_lua fn to add a single triple and add triples
     -- in the previous loop.
     gr:add(triples)
+
+    return gr
+end
+
+
+M.store_updates = function(gr)
+    -- TODO use a transaction when volksdata_lua supports it.
     logger:debug("Graph: ", tostring(gr))
     for trp in gr:lookup() do logger:debug(tostring(trp)) end
+
+    local val_report = validator.validate(gr)
+    if val_report.max_level == "ERROR" then error(
+        "Validation raised errors: " .. pp.write(val_report))
+    elseif val_report.max_level == "WARN" then logger:warn(
+        "Validation raised warnings: " .. pp.write(val_report))
+    elseif val_report.max_level == "NOTICE" then logger:warn(
+        "Validation raised notices: " .. pp.write(val_report)) end
+
+    local stored_gr = graph.new(pkar.store, term.DEFAULT_CTX)
+
+    return gr:copy(stored_gr)
 end
 
 
@@ -309,7 +337,7 @@ M.deposit = function(sip)
         tstamp = os.date("!%Y-%m-%dT%TZ")
         rsrc["dc:created"] = tstamp
         rsrc["dc:modified"] = tstamp
-        M.update_rsrc_md(rsrc)
+        M.store_updates(M.rsrc_to_graph(rsrc))
     end
 
     -- Remove processing directory.

+ 99 - 0
src/validator.lua

@@ -0,0 +1,99 @@
+local pp = require "pl.pretty"
+local term = require "volksdata.term"
+
+local pkar = require "pocket_archive"
+local model = require "pocket_archive.model"
+
+local logger = pkar.logger
+
+
+local E_TYPE = "Type error"
+local E_CARD = "Cardinality error"
+local E_RANGE = "Range error"
+
+local M = {}
+
+
+M.validate = function(gr)
+    _, ctype = next(gr:attr(gr:get_uri(), pkar.CONTENT_TYPE_P))
+    local rmod = model.parse_model(ctype.data)
+    if not rmod then error("No type definition for ", ctype.data) end
+
+    local report = {notices = {}, warnings = {}, errors = {}}
+
+    for fname, rules in pairs(rmod.properties or NT) do
+        local values
+        values = gr:attr(gr:get_uri(), term.new_iriref_ns(fname))
+
+        -- Cardinality
+        local card = 0
+        for _, v in pairs(values) do card = card + 1 end
+        if rules.min_cardinality or rules.max_cardinality then
+            min_card = rules.min_cardinality or 0
+            if card < min_card then
+                table.insert(report.errors, {
+                    E_CARD,
+                    ("Too few values for %s: expected %d, got %d"):format(
+                            fname, min_card, card)
+                })
+            end
+            max_card = rules.max_cardinality or math.huge
+            if card > max_card then
+                table.insert(report.errors, {
+                    E_CARD,
+                    ("Too many values for %s: expected %d, got %d"):format(
+                            fname, max_card, card)
+                })
+            end
+        end
+
+        -- From this point on, if there are no values, skip other criteria.
+        if card == 0 then goto skip_prop end
+
+        -- Type
+        if rules.type then
+            -- String type accepts any value.
+            if rules.type == "number" then
+                for v in pairs(values) do
+                    if type(v) ~= "number" then
+                        table.insert(
+                            report.errors, {
+                                E_TYPE,
+                                ("Number expected for %s; got: %s")
+                                :format(fname, v)
+                            })
+                    end
+                end
+            elseif rules.type == "resource" then
+                for _, v in ipairs(values) do
+                    if v:sub(1,4) ~= "par" then
+                        table.insert(
+                            report.errors, {
+                                E_TYPE,
+                                ("`par:` prefix expected for %s; got: %s")
+                                :format(fname, v)
+                            })
+                    end
+                end
+            end
+        end
+
+        -- Range
+        if rules.range then
+            for _, v in ipairs(values) do
+
+            end
+        end
+
+        ::skip_prop::
+    end
+
+    if #report.errors > 0 then report.max_level = "ERROR"
+    elseif #report.warnings > 0 then report.max_level = "WARN"
+    elseif #report.notices > 0 then report.max_level = "NOTICE" end
+
+    return report
+end
+
+
+return M