Kaynağa Gözat

Many metadata enhancements.

scossu 2 gün önce
ebeveyn
işleme
05f0056b95

+ 14 - 3
config/app.lua

@@ -1,7 +1,8 @@
 -- Application configuration.
 
+local plpath = require "pl.path"
 
-local ROOT = os.getenv("PKA_ROOT") or "./"
+local ROOT = os.getenv("PKAR_ROOT") or ""
 
 return {
     id = {
@@ -19,12 +20,22 @@ return {
             ["path"] = true,
             ["pas:sourcePath"] = true,
         },
+
+        -- Map of data types in prop definitions to RDF literal types.
+        -- Non-mapped items are set to `xsd:string`.
+        datatypes = {
+            integer = "xsd:integer",
+            decimal = "xsd:decimal",
+            float = "xsd:double",
+            boolean = "xsd:boolean",
+            datetime = "xsd:datetime",
+        }
     },
     fs = {
         -- Base path to write opaque resources.
-        ores_path = ROOT .. "data/ores/",
+        ores_path = plpath.join(ROOT, "data/ores"),
         -- Base path of LSUP store for descriptive resources (RDF).
-        dres_path = ROOT .. "data/dres/",
+        dres_path = plpath.join(ROOT, "data/dres"),
 
         -- How many bytes to read when handling files. Adjust to memory
         -- availability.

+ 2 - 2
config/model/typedef/anything.lua

@@ -47,13 +47,13 @@ return {
         },
         ["dc:created"] = {
             label = "Created On",
-            type = "timestamp",
+            type = "datetime",
             min_cardinality = 1,
             max_cardinality = 1,
         },
         ["dc:modified"] = {
             label = "Last Updated On",
-            type = "timestamp",
+            type = "datetime",
             min_cardinality = 1,
             max_cardinality = 1,
         },

+ 3 - 2
config/model/typedef/file.lua

@@ -48,8 +48,9 @@ return {
         ["premis:hasMessageDigest"] = {
             label = "Checksum",
             description = [[
-                File checksum formatted as: <algorithm>:<hex digest>]],
-            type = "string",
+                File checksum formatted as an URN:
+                `urn:<algorithm>:<hex digest>`]],
+            type = "ext_resource",
             min_cardinality = 1,
         },
     }

+ 3 - 3
scratch.lua

@@ -9,13 +9,13 @@ local val = require "pocket_archive.validator"
 local hgen = require "pocket_archive.html_generator"
 
 
+---[[
 local st = store.new(store.MDB, pkar.store_id, true)  -- delete prev archive
 
----[[
 sip = sub.generate_sip(
     "test/sample_submission/postcard-bag/data/pkar_submission.csv")
 
---sub.deposit(sip)
+sub.deposit(sip)
 --]]
 
---html = hgen.generate_site()
+html = hgen.generate_site()

+ 15 - 4
src/core.lua

@@ -8,9 +8,11 @@ local nsm = require "volksdata.namespace"
 
 local fpath = debug.getinfo(1, "S").source:sub(2)
 local root_path = path.dirname(path.dirname(fpath))
-local config_path = os.getenv("PA_CONFIG_DIR") or (root_path .. "/config")
+local config_path = os.getenv("PKAR_CONFIG_DIR") or
+        plpath.join(root_path .. "/config")
 
-local config = dofile(config_path .. "/app.lua")
+local config = dofile(plpath.join(config_path, "app.lua"))
+local store_id = "file://" .. (os.getenv("PKAR_BASE") or config.fs.dres_path)
 
 for pfx, ns in pairs(config.namespace) do nsm.add(pfx, ns) end
 
@@ -21,7 +23,7 @@ local M = {
     config = config,
 
     default_title = "Pocket Archive",
-    store_id = "file://" .. (os.getenv("PA_BASE") or config.fs.dres_path),
+    store_id = store_id,
     store = store.new(store.MDB, store_id),
 
     --Logger config.
@@ -44,7 +46,7 @@ local M = {
     TN_P = term.new_iriref_ns("pas:thumbnail"),
     FIRST_P = term.new_iriref_ns("pas:first"),
     NEXT_P = term.new_iriref_ns("pas:next"),
-    PATH_P = term.new_iriref_ns("pas:path"),
+    PATH_P = term.new_iriref_ns("pas:sourcePath"),
     CONTENT_TYPE_P = term.new_iriref_ns("pas:contentType"),
 
     ART_T = term.new_iriref_ns("pas:Artifact"),
@@ -92,4 +94,13 @@ M.gen_pairtree = function (pfx, id_str, ext, no_create)
     return fpath, created
 end
 
+
+--[[
+    Escape strings for use in gsub patterns.
+--]]
+M.escape_ptn = function(src)
+    return src:gsub("[%(%)%.%%%+%-%*%?%[%]%^%$]", "%%%0")
+end
+
+
 return M

+ 29 - 4
src/html_generator.lua

@@ -14,6 +14,9 @@ local logger = pkar.logger
 local model = require "pocket_archive.model"
 local transformers = require "pocket_archive.transformers"
 
+local dbg = require "debugger"
+
+
 -- "nil" table - for missing key fallback in chaining.
 local NT = {}
 
@@ -102,16 +105,27 @@ local function generate_dres(s, mconf)
                 local child_s = o
                 logger:debug("local child_s: ", child_s.data)
                 local ll = {}
+
+                -- Fallback labels.
                 local label
                 _, label = next(gr:attr(child_s, pkar.DC_TITLE_P))
+                if label then label = label.data
+                else
+                    _, label = next(gr:attr(child_s, pkar.PATH_P))
+                    if label then label = plpath.basename(label.data)
+                        else label = child_s.data end
+                end
+
                 while child_s do
                     -- Loop trough all next nodes for each first child.
+                    require "debugger".assert(get_tn_url(child_s))
                     table.insert(ll, {
                         href = pkar.gen_pairtree(
                                 "/res", child_s.data, ".html", true),
-                        label = (label or NT).data,
+                        label = label,
                         tn = get_tn_url(child_s):gsub(M.media_dir, "/media/tn"),
                     })
+                    logger:debug("Child label for ", child_s.data, ": ", ll[#ll].label or "nil")
                     -- There can only be one "next"
                     _, child_s = next(gr:attr(child_s, pkar.NEXT_P))
                 end
@@ -131,7 +145,11 @@ local function generate_dres(s, mconf)
         end
         ::skip::
     end
-    table.sort(dmd, function(a, b) return (a.label < b.label) end)
+    table.sort(
+        dmd, function(a, b)
+            return ((a.label or a.uri) < (b.label or b.uri))
+        end
+    )
     table.sort(rel)
     table.sort(children)
     logger:debug("Lineage:", pp.write(mconf.lineage))
@@ -256,14 +274,21 @@ local function generate_ores(s, mconf)
 end
 
 
+M.get_graph = function(s)
+    out_gr = graph.new(nil, s.data)
+    gr:copy(out_gr, s)
+
+    return out_gr
+end
+
+
 M.generate_resource = function(s)
     local res_type
     _, res_type = next(gr:attr(s, pkar.CONTENT_TYPE_P))
     local mconf = model.models[res_type.data]
 
     -- Generate RDF/Turtle doc.
-    local res_gr = graph.new(nil, s.data)
-    gr:copy(res_gr, s)
+    local res_gr = M.get_graph(s)
     logger:debug("Serializing graph: ", s.data)
     local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".ttl")
     local ofh = assert(io.open(res_path, "w"))

+ 6 - 0
src/model.lua

@@ -2,6 +2,11 @@ local string = string
 local table = table
 local io = io
 
+local pkar = require "pocket_archive"
+
+
+-- Escape magic characters.
+local PAS_NS_PTN = pkar.escape_ptn(pkar.PAS_NS)
 
 local M = {models = {}}
 
@@ -19,6 +24,7 @@ end
 
 
 M.parse_model = function(mod_id)
+    mod_id = mod_id:gsub(PAS_NS_PTN, ""):gsub("par:", "")
     local hierarchy = {}
 
     local function traverse(mod_id)

+ 25 - 21
src/submission.lua

@@ -17,6 +17,7 @@ local transformers = require "pocket_archive.transformers"
 local validator = require "pocket_archive.validator"
 
 local logger = pkar.logger
+local dbg = require "debugger"
 
 -- "nil" table - for missing key fallback in chaining.
 local NT = {}
@@ -211,23 +212,26 @@ M.rsrc_to_graph = function(rsrc)
         -- id is the subject, it won't be an attribute.
         if k == "id" then goto skip end
 
-        logger:debug("Adding attribute:", k, pp.write(v))
+        logger:debug(("Adding attribute: %s = %s"):format(k, pp.write(v)))
         local p = term.new_iriref_ns(k)
         local o
-        if type(v) == "table" then
-            for vv in pairs(v) do
-                if ((rmod.properties or NT)[k] or NT).type == "resource" then
-                    o = term.new_iriref_ns(vv)
-                else o = term.new_lit(vv) end
-                table.insert(triples, triple.new(s, p, o))
-            end
-        else
+        local datatype = ((rmod.properties or NT)[k] or NT).type
+        local rdf_type_str = pkar.config.md.datatypes[datatype]
+        local rdf_type
+        if rdf_type_str then
+            rdf_type = term.new_iriref_ns(rdf_type_str).data
+        end
+        -- Force all fields to be multi-valued.
+        if type(v) ~= "table" then v = {[v] = true} end
+        for vv in pairs(v) do
             if k == "pas:contentType" then
-                v = "pas:" .. v
+                vv = "pas:" .. vv
             end
-            if ((rmod.properties or NT)[k] or NT).type == "resource" then
-                o = term.new_iriref_ns(v)
-            else o = term.new_lit(v) end
+            if datatype == "resource" then
+                o = term.new_iriref_ns(vv)
+            elseif datatype == "ext_resource" then
+                o = term.new_iriref(vv)
+            else o = term.new_lit(vv, rdf_type) end
             table.insert(triples, triple.new(s, p, o))
         end
         ::skip::
@@ -237,7 +241,7 @@ M.rsrc_to_graph = function(rsrc)
             triples, triple.new(s, pkar.RDF_TYPE, term.new_iriref_ns(m)))
     end
 
-    local gr = graph.new(nil, rsrc.id, true)
+    local gr = graph.new(nil)
     -- This is a full replacement.
     --require "debugger"()
     logger:info("Removing triples.")
@@ -247,16 +251,15 @@ M.rsrc_to_graph = function(rsrc)
     -- in the previous loop.
     gr:add(triples)
 
-    return gr
+    return gr, s
 end
 
 
-M.store_updates = function(gr)
+M.store_updates = function(tmp_gr, s)
     -- TODO use a transaction when volksdata_lua supports it.
-    logger:debug("Graph: ", tostring(gr))
-    for trp in gr:lookup() do logger:debug(tostring(trp)) end
+    logger:debug("Graph: ", tmp_gr:encode("ttl"))
 
-    local val_report = validator.validate(gr)
+    local val_report = validator.validate(tmp_gr, s)
     if val_report.max_level == "ERROR" then error(
         "Validation raised errors: " .. pp.write(val_report))
     elseif val_report.max_level == "WARN" then logger:warn(
@@ -266,7 +269,7 @@ M.store_updates = function(gr)
 
     local stored_gr = graph.new(pkar.store, term.DEFAULT_CTX)
 
-    return gr:copy(stored_gr)
+    return tmp_gr:copy(stored_gr)
 end
 
 
@@ -303,7 +306,8 @@ M.deposit = function(sip)
                 fsize = fsize + #chunk
             end
             local checksum = hash_it:final(true)
-            rsrc["premis:hasMessageDigest"] = {["blake2:" .. checksum] = true}
+            rsrc["premis:hasMessageDigest"] = {
+                    ["urn:blake2:" .. checksum] = true}
             rsrc["dc:extent"] = fsize
 
             ofh:close()

+ 4 - 3
src/validator.lua

@@ -5,6 +5,7 @@ local pkar = require "pocket_archive"
 local model = require "pocket_archive.model"
 
 local logger = pkar.logger
+local dbg = require "debugger"
 
 
 local E_TYPE = "Type error"
@@ -14,8 +15,8 @@ local E_RANGE = "Range error"
 local M = {}
 
 
-M.validate = function(gr)
-    _, ctype = next(gr:attr(gr:get_uri(), pkar.CONTENT_TYPE_P))
+M.validate = function(gr, s)
+    _, ctype = next(gr:attr(s, pkar.CONTENT_TYPE_P))
     local rmod = model.parse_model(ctype.data)
     if not rmod then error("No type definition for ", ctype.data) end
 
@@ -23,7 +24,7 @@ M.validate = function(gr)
 
     for fname, rules in pairs(rmod.properties or NT) do
         local values
-        values = gr:attr(gr:get_uri(), term.new_iriref_ns(fname))
+        values = gr:attr(s, term.new_iriref_ns(fname))
 
         -- Cardinality
         local card = 0