Ver Fonte

Create implicit artifacts from single-file LL lines.

scossu há 1 semana atrás
pai
commit
fbb51c0667

+ 1 - 0
config/app.lua

@@ -62,5 +62,6 @@ return {
     -- Static site generation settings.
     htmlgen = {
         out_dir = "./out/html",
+        max_homepage_items = 12,
     },
 }

+ 0 - 1
config/model/typedef/anything.lua

@@ -12,7 +12,6 @@ return {
                 [[Path of the resource at deposit time, relative to the SIP
                 root.]],
             type = "string",
-            min_cardinality = 1,
             max_cardinality = 1,
         },
         content_type = {

+ 1 - 0
config/model/typedef/file.lua

@@ -4,6 +4,7 @@ return {
     broader = "anything",
 
     properties = {
+        source_path = { min_cardinality = 1 },
         archive_path = {
             uri = "pas:archivePath",
             label = "Archival path",

+ 7 - 0
config/model/typedef/still_image.lua

@@ -2,4 +2,11 @@ return {
     uri = "pas:StillImage",
     label = "Still Image",
     broader = "artifact",
+
+    --[[ Default file model: when a file is deposited and marked with
+      this content model, the submission process automatically creates a file
+      resources that becomes the only member of this artifact. This field
+      indicates the content model to use for that file resource. If not
+      specified, `file` is used. ]]
+    default_fmodel = "still_image_file",
 }

+ 1 - 0
src/core.lua

@@ -54,6 +54,7 @@ local M = {
     FIRST_P = term.new_iriref_ns("pas:first"),
     NEXT_P = term.new_iriref_ns("pas:next"),
     PATH_P = term.new_iriref_ns("pas:sourcePath"),
+    PREF_REP_P = term.new_iriref_ns("pas:hasPreferredRepresentation"),
     REF_P = term.new_iriref_ns("pas:ref"),
     SUBMITTED_P = term.new_iriref_ns("dc:dateSubmitted"),
     TN_P = term.new_iriref_ns("pas:thumbnail"),

+ 15 - 0
src/generator.lua

@@ -101,6 +101,10 @@ local function get_tn_url(s, ext)  -- TODO caller needs to pass correct ext
     _, ref = next(repo.gr:attr(s, pkar.REF_P))
     if ref then return get_tn_url(ref, ext) end
 
+    local pref_rep
+    _, pref_rep = next(repo.gr:attr(s, pkar.PREF_REP_P))
+    if pref_rep then return get_tn_url(pref_rep, ext) end
+
     -- Recurse through all first children until one with a thumbnail, or a
     -- leaf without children, is found.
     local t
@@ -222,6 +226,12 @@ local function generate_dres(s, mconf)
     logger:debug("Children:", pp.write(children))
     logger:debug("Breadcrumbs:", pp.write(get_breadcrumbs(mconf)))
 
+    local pref_rep, pref_rep_url
+    _, pref_rep = next(repo.gr:attr(s, pkar.PREF_REP_P))
+    if pref_rep then
+        pref_rep_url = pkar.gen_pairtree("/res", pref_rep.data, ".html", true)
+    end
+
     out_html = templates.dres.data({
         --webroot = M.webroot,
         site_title = pkar.config.site.title or pkar.default_title,
@@ -234,6 +244,8 @@ local function generate_dres(s, mconf)
         rel = rel,
         children = children,
         ls_next = ls_next,
+        tn_url = get_tn_url(s),
+        pref_rep = pref_rep_url,
         icon_url = get_icon_url(mconf.lineage),
         breadcrumbs = get_breadcrumbs(mconf),
         rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true),
@@ -503,7 +515,9 @@ M.generate_homepage = function()
         pkar.RDF_TYPE, triple.POS_P,
         term.new_iriref_ns("pas:Artifact"), triple.POS_O
     )
+    local i = 1
     for _, s in pairs(s_ts) do
+        if i > (pkar.config.htmlgen.max_homepage_items or 10) then break end
         local title, submitted
         _, title = next(repo.gr:attr(s, pkar.DC_TITLE_P))
         _, submitted = next(repo.gr:attr(s, pkar.SUBMITTED_P))
@@ -515,6 +529,7 @@ M.generate_homepage = function()
             tn = get_tn_url(s),
         }
         table.insert(idx_data.objects, obj)
+        i = i + 1
     end
     table.sort(
         idx_data.objects, function(a, b)

+ 2 - 0
src/repo.lua

@@ -2,6 +2,8 @@
 RDF repository services.
 --]]
 
+local pp = require "pl.pretty"
+
 local nsm = require "volksdata.namespace"
 local term = require "volksdata.term"
 local triple = require "volksdata.triple"

+ 72 - 20
src/submission.lua

@@ -1,9 +1,30 @@
+--[[ Deposit module.
+
+This module takes care of the complete deposit process (except for the back
+end storage, which is called here but defined in the repo module).
+
+The deposit process is carried out in several steps:
+
+- SIP generation (`generate_sip()`): scans the laundry list CSV and builds a
+  temporary data structure with the found metadata; generates unique IDs for
+  resources; infers some implicit relationships from the position of the CSV
+  rows and folder layout; adds system-controlled metadata.
+- File staging (`deposit()`): scan through the generated SIP, identifies the
+  files, calculates their checksums, and moves them to temporary storage; adds
+  checksums to the metadata. TODO allow user-provided metadata and validation
+- graph generation: generate an RDF graph for each resource in the SIP.
+- permanent storage: push the RDF graph to permanent store (via functions in
+  the `repo` module), which includes content model validation; if this
+  succeeds, related files are also moved from the staging area to the archival
+  store.
+
+--]]
 local io = io
 
 local csv = require "ftcsv"
 local dir = require "pl.dir"
 local libmagic = require "libmagic"
-local plpath = require "pl.path"
+local path = require "pl.path"
 local pp = require "pl.pretty"
 
 local term = require "volksdata.term"
@@ -15,7 +36,6 @@ local model = require "pocket_archive.model"
 local mc = require "pocket_archive.monocypher"
 local repo = require "pocket_archive.repo"
 local transformers = require "pocket_archive.transformers"
-local validator = require "pocket_archive.validator"
 
 local logger = pkar.logger
 local dbg = require "debugger"
@@ -57,17 +77,27 @@ M.idgen = function(len)
 end
 
 
-M.generate_sip = function(path)
-    local sip = {root_path = path:match("(.*/)")}
+M.generate_sip = function(src_path)
+    local sip = {root_path = src_path:match("(.*/)")}
+    local src_dir = path.dirname(src_path)
     path_to_uri = {}
 
-    local tn_dir = plpath.join(sip.root_path, "proc", "tn")
+    local tn_dir = path.join(sip.root_path, "proc", "tn")
     dir.makepath(tn_dir)
 
     local prev_path
 
     local i = 0
-    for row_n, row in csv.parseLine(path) do
+    for row_n, row in csv.parseLine(src_path) do
+        local has_content
+        for k, v in pairs(row) do
+            -- Change "" to nil.
+            if v == "" then row[k] = nil
+            else has_content = true end
+        end
+        -- Skip empty lines.
+        if not has_content then goto skip end
+
         logger:debug("Row path: ", row.source_path)
         logger:debug("Parsing row:", pp.write(row))
         if #row.source_path > 0 then
@@ -81,7 +111,7 @@ M.generate_sip = function(path)
             -- Add to path to URI map for later referencing.
             path_to_uri[row.source_path] = sip[i].id
             for k, v in pairs(row) do
-                if v == "" then goto cont1 end  -- skip empty strings.
+                if not v then goto cont1 end  -- skip empty strings.
                 if pkar.config.md.single_values[k] then sip[i][k] = v
                 -- Multi-values are ordered in the SIP for further processing.
                 else sip[i][k] = {v} end
@@ -90,9 +120,9 @@ M.generate_sip = function(path)
 
             --[[
             -- Generate thumbnail for files.
-            local rsrc_path = plpath.join(
+            local rsrc_path = path.join(
                     sip.root_path, sip[i].source_path)
-            if plpath.isfile(rsrc_path) then
+            if path.isfile(rsrc_path) then
                 --require "debugger"()
                 sip[i].thumbnail = generate_thumbnail(
                         sip[i], sip.root_path, tn_dir)
@@ -106,7 +136,7 @@ M.generate_sip = function(path)
                 error(("No path information at row %d"):format(i), 2)
             else
                 for k, v in pairs(row) do
-                    if v == "" then goto cont2 end  -- skip empty strings.
+                    if not v then goto cont2 end  -- skip empty strings.
                     if pkar.config.md.single_values[k] then
                         -- It doesn't make much sense to overwrite, maybe throw an error?
                         error(
@@ -122,13 +152,31 @@ M.generate_sip = function(path)
                 row.source_path = prev_path
             end
         end
+        ::skip::
         row_n = row_n + 1
     end
     -- Infer structure from paths and row ordering.
     for i, v in ipairs(sip) do
         local rmod = model.types[v.content_type]
+        dbg.assert(v.source_path)
+        local fpath = path.join(src_dir, v.source_path)
         --dbg.assert(rmod)
         v.has_member = v.has_member or {}
+        -- Create implicit members from single-file artifact.
+        if rmod.types.artifact and path.isfile(fpath) then
+            local file_id = "par:" .. M.idgen()
+            -- Insert file resource and move it into a new sub-folder.
+            table.insert(sip, {
+                content_type = rmod.default_fmodel or "file",
+                id = file_id,
+                label = path.basename(v.source_path),
+                source_path = v.source_path,
+            })
+            sip[i].has_file = file_id
+            sip[i].pref_rep = file_id
+            sip[i].source_path = nil
+            goto skip
+        end
         for j = i + 1, #sip do
             if sip[j].source_path:match(
                     "^" .. pkar.escape_pattern(v.source_path))
@@ -142,6 +190,7 @@ M.generate_sip = function(path)
                 end
             end
         end
+        ::skip::
     end
 
     return sip
@@ -255,15 +304,18 @@ M.deposit = function(sip)
         logger:debug(("Processing resource #%d of %d: %s"):format(
                 i, #sip, rsrc.id))
 
-        local in_path = sip.root_path .. rsrc.source_path
-        local fext = plpath.extension(in_path)
+        local in_path, fext
+        if not rsrc.source_path then goto continue end
+
+        in_path = sip.root_path .. rsrc.source_path
+        fext = path.extension(in_path)
         -- If it's a directory, skip file processing.
-        if not plpath.isfile(in_path) then goto continue end
+        if not path.isfile(in_path) then goto continue end
 
         do
-            local tmp_dir = plpath.join(pkar.config.fs.ores_path, "tmp/")
+            local tmp_dir = path.join(pkar.config.fs.ores_path, "tmp/")
             local file_ext
-            _, file_ext = plpath.splitext(in_path)
+            _, file_ext = path.splitext(in_path)
             local tmp_path = tmp_dir .. rsrc.id .. file_ext
             dir.makepath(tmp_dir)
 
@@ -290,11 +342,11 @@ M.deposit = function(sip)
 
             -- Copy file and calculate checksum.
             local out_dir, out_path
-            out_dir = plpath.join(
+            out_dir = path.join(
                     pkar.config.fs.ores_path,
                     checksum:sub(1, 2),
                     checksum:sub(3, 4))
-            out_path = plpath.join(out_dir, checksum:sub(1,32) .. fext)
+            out_path = path.join(out_dir, checksum:sub(1,32) .. fext)
             dir.makepath(out_dir)
             logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
             dir.movefile(tmp_path, out_path)
@@ -303,8 +355,8 @@ M.deposit = function(sip)
             -- Copy thumbnail if existing.
             if rsrc.thumbnail then
                 src_path = rsrc.thumbnail
-                out_path = plpath.join(
-                        out_dir, plpath.basename(src_path))
+                out_path = path.join(
+                        out_dir, path.basename(src_path))
                 logger:debug(("Moving file %s to %s"):format(src_path, out_path))
                 dir.movefile(src_path, out_path)
                 rsrc.thumbnail = out_path
@@ -320,7 +372,7 @@ M.deposit = function(sip)
     end
 
     -- Remove processing directory.
-    dir.rmtree(plpath.join(sip.root_path, "proc"))
+    dir.rmtree(path.join(sip.root_path, "proc"))
 end
 
 

+ 8 - 1
templates/dres.html

@@ -17,6 +17,13 @@
                 <p><a href="<%= rdf_href %>">Download RDF document</a></p>
                 <p><a href="#">Download Laundry List [TODO]</a></p>
             </section>
+            <section id="res_thumbnail">
+                <% if pref_rep then %>
+                    <a href="<%= pref_rep %>">
+                        <img src="<%= tn_url %>" alt="Link to image file"/>
+                   </a>
+               <% end %>
+            </section>
             <section id="res_lineage">
                 <h2>Classification</h2>
                 <p>
@@ -61,7 +68,7 @@
                         <% else %><code><%= ol.uri %></code><% end %>
                     </dt>
                     <% for _, o in ipairs(ol) do %>
-                        <dd><a href="/res/<%= o.href %>"><%= o.label %></a></dd>
+                        <dd><a href="<%= o.href %>"><%= o.label %></a></dd>
                     <% end %>
                 <% end %>
                 </dl>