Ver código fonte

Almost done with thumbnails.

scossu 1 semana atrás
pai
commit
cc97458585

+ 6 - 0
config/model/typedef/file.lua

@@ -18,6 +18,12 @@ return {
             min_cardinality = 1,
             max_cardinality = 1,
         },
+        ["pas:thumbnail"] = {
+            label = "Thumbnail",
+            type = "string",
+            min_cardinality = 1,
+            max_cardinality = 1,
+        },
         ["dc:format"] = {
             label = "MIME type",
             type = "string",

+ 2 - 2
scratch.lua

@@ -11,8 +11,8 @@ local hgen = require "pocket_archive.html_generator"
 local st = store.new(store.MDB, pkar.store_id, true)  -- delete prev archive
 
 ---[[
-sip = sub.generate_sip_v2(
-    "test/sample_submission/postcard-bag/data/submission-v2.csv")
+sip = sub.generate_sip(
+    "test/sample_submission/postcard-bag/data/pkar_submission.csv")
 sub.deposit(sip)
 --]]
 

+ 78 - 42
src/html_generator.lua

@@ -17,7 +17,23 @@ local transformers = require "pocket_archive.transformers"
 -- "nil" table - for missing key fallback in chaining.
 local NT = {}
 
--- Compile all templates once.
+-- Default store graph to search all triples.
+local gr = graph.new(pkar.store, term.DEFAULT_CTX)
+
+-- Some commonly used terms.
+local dc_title_p = term.new_iriref_ns("dc:title")
+local tn_p = term.new_iriref_ns("pas:thumbnail")
+local first_p = term.new_iriref_ns("pas:first")
+local next_p = term.new_iriref_ns("pas:next")
+local path_p = term.new_iriref_ns("pas:path")
+local content_type_p = term.new_iriref_ns("pas:contentType")
+local file_t = term.new_iriref_ns("pas:File")
+
+-- Namespaces.
+local par_ns = nsm.get_ns("par")
+--local pas_ns = namespace.get_ns("pas")
+
+-- HTML templates. Compile them only once.
 -- TODO Add override for user-maintained templates.
 local fh, idx_tpl, dres_tpl, ores_tpl
 fh = datafile.open("templates/index.html")
@@ -39,9 +55,9 @@ fh:close()
 
 -- HTML generator module.
 local M = {
-    res_dir = pkar.config.htmlgen.out_dir .. "/res",
-    asset_dir = pkar.config.htmlgen.out_dir .. "/assets",
-    media_dir = pkar.config.htmlgen.out_dir .. "/media",
+    res_dir = plpath.join(pkar.config.htmlgen.out_dir, "res"),
+    asset_dir = plpath.join(pkar.config.htmlgen.out_dir, "assets"),
+    media_dir = plpath.join(pkar.config.htmlgen.out_dir, "media"),
 }
 
 
@@ -60,9 +76,22 @@ local function get_breadcrumbs(mconf)
 end
 
 
-local function generate_dres(s, mconf)
-    local gr = graph.new(pkar.store, term.DEFAULT_CTX)
+local function get_tn_url(s)
+    if gr:attr(s, pkar.RDF_TYPE)[file_t] then
+        -- The subject is a file.
+        tn_fname = (s.data:gsub(par_ns, "") .. ".jpg")  -- FIXME do not hardcode.
+        return plpath.join(
+                M.media_dir, tn_fname:sub(1, 2), tn_fname:sub(3, 4), tn_fname)
+    end
+
+    -- Recurse through all first children until one with a thumbnail, or a
+    -- leaf without children, is found.
+    first_child = next(gr:attr(s, first_p))
+    if first_child then return get_tn_url(first_child) end
+end
+
 
+local function generate_dres(s, mconf)
     local dmd = {}
     local rel = {}
     local children = {}
@@ -80,25 +109,22 @@ local function generate_dres(s, mconf)
             for o in pairs(ots) do table.insert(dmd[fname], o.data) end
         elseif fname == "pas:first" then
             -- Build a linked list for every first found.
-            local dc_title = term.new_iriref_ns("dc:title")
-            local tn_p = term.new_iriref_ns("pas:thumbnail")
             for o in pairs(ots) do
                 -- Loop through all first children.
-                local node_uri = o
-                logger:debug("local node_uri: ", node_uri.data)
+                local child_s = o
+                logger:debug("local child_s: ", child_s.data)
                 local ll = {}
-                --require "debugger"()
-                while node_uri do
+                require "debugger"()
+                while child_s do
                     -- Loop trough all next nodes for each first child.
                     table.insert(ll, {
-                        href = node_uri.data:gsub(
+                        href = child_s.data:gsub(
                                 nsm.get_ns("par"), "/res/") .. ".html",
-                        label = (next(gr:attr(node_uri, dc_title)) or NT).data,
-                        tn = next(gr:attr(node_uri, tn_p)),
+                        label = (next(gr:attr(child_s, dc_title_p)) or NT).data,
+                        tn = get_tn_url(child_s),
                     })
-                    local next_attr = gr:attr(
-                            node_uri, term.new_iriref_ns("pas:next"))
-                    node_uri = next(next_attr)  -- There can only be one "next"
+                    -- There can only be one "next"
+                    child_s = next(gr:attr(child_s, next_p))
                 end
                 table.insert(children, ll)
             end
@@ -111,7 +137,7 @@ local function generate_dres(s, mconf)
             -- TODO differentiate term types
             for o in pairs(ots) do table.insert(attr, o.data) end
             table.sort(attr)
-            if p == term.new_iriref_ns("dc:title") then title = attr[1] end
+            if p == dc_title_p then title = attr[1] end
             table.insert(dmd, attr)
         end
         ::skip::
@@ -136,7 +162,6 @@ local function generate_dres(s, mconf)
         children = children,
         ls_next = ls_next,
         breadcrumbs = get_breadcrumbs(mconf),
-        deliverable = deliverable,
     })
 
     local res_id = s.data:gsub(nsm.get_ns("par"), "")
@@ -150,8 +175,6 @@ end
 
 
 local function generate_ores(s, mconf)
-    local gr = graph.new(pkar.store, term.DEFAULT_CTX)
-
     local techmd = {}
     local rel = {}
     -- Metadata
@@ -184,21 +207,41 @@ local function generate_ores(s, mconf)
     logger:debug("techmd:", pp.write(techmd))
     logger:debug("REL:", pp.write(rel))
 
-    --require "debugger"()
     -- Transform and move media assets.
+    local dest_fname, dest_dir, dest  -- Reused for thumbnail.
     logger:info("Transforming resource file.")
     local res_path = techmd["pas:path"]
     if not res_path then error("No file path for File resource!") end
     local txconf = (mconf.transformers or NT).deliverable or {fn = "copy"}
-    local dest_fname = plpath.basename(res_path[1])
-    if txconf.ext then
-        dest_fname = plpath.splitext(dest_fname) .. txconf.ext
-    end
-    dest = M.media_dir .. "/" .. dest_fname
+    -- Set file name to resource ID + source extension.
+    dest_fname = (
+            s.data:gsub(par_ns, "") ..
+            (txconf.ext or plpath.extension(res_path[1])))
+    dest_dir = plpath.join(
+            M.media_dir, dest_fname:sub(1, 2), dest_fname:sub(3, 4))
+    dir.makepath(dest_dir)
+    dest = plpath.join(dest_dir, dest_fname)
     assert(transformers[txconf.fn](
             res_path[1], dest, table.unpack(txconf or NT)))
-    local deliverable = dest:gsub(pkar.config.htmlgen.out_dir, "..")
-    logger:info("Deliverable: ", dest)
+    local deliverable = dest:gsub(pkar.config.htmlgen.out_dir, "")
+    logger:info("Access file: ", deliverable)
+
+    -- Thumbnail.
+    local tn
+    txconf = (mconf.transformers or NT).thumbnail
+    if txconf then
+        if txconf.ext then
+            dest_fname = plpath.splitext(dest_fname) .. txconf.ext
+        end
+        dest_dir = plpath.join(
+                M.media_dir, "tn", dest_fname:sub(1, 2), dest_fname:sub(3, 4))
+        dir.makepath(dest_dir)
+        dest = plpath.join(dest_dir, dest_fname)
+        assert(transformers[txconf.fn](
+                res_path[1], dest, table.unpack(txconf or NT)))
+        tn = dest:gsub(pkar.config.htmlgen.out_dir, "")
+        logger:info("Thumbnail: ", tn)
+    end
 
     out_html = ores_tpl({
         site_title = pkar.config.site.title or pkar.default_title,
@@ -211,6 +254,7 @@ local function generate_ores(s, mconf)
         ls_next = ls_next,
         breadcrumbs = get_breadcrumbs(mconf),
         deliverable = deliverable,
+        thumbnail = tn,
     })
 
     local res_id = s.data:gsub(nsm.get_ns("par"), "")
@@ -224,12 +268,7 @@ end
 
 
 M.generate_resource = function(s)
-    local gr = graph.new(pkar.store, term.DEFAULT_CTX)
-
-    local res_type
-    local type_attr = gr:attr(
-        s, term.new_iriref_ns("pas:contentType"))
-    res_type = next(type_attr).data
+    local res_type = next(gr:attr(s, content_type_p)).data
     local mconf = model.models[res_type]
 
     if mconf.types["pas:File"] then return generate_ores(s, mconf)
@@ -238,7 +277,6 @@ end
 
 
 M.generate_resources = function()
-    local gr = graph.new(pkar.store, term.DEFAULT_CTX)
     local subjects = gr:unique_terms(triple.POS_S)
 
     -- TODO parallelize
@@ -250,7 +288,6 @@ end
 
 M.generate_idx = function()
     local obj_idx = {}
-    local gr = graph.new(pkar.store, term.DEFAULT_CTX)
     -- Get all subject of type: Artifact.
     s_ts = gr:term_set(
         pkar.RDF_TYPE, triple.POS_P,
@@ -258,8 +295,7 @@ M.generate_idx = function()
     )
     for s in pairs(s_ts) do
         local s_label = nsm.denormalize_uri(s.data)
-        local titles = gr:attr(s, term.new_iriref_ns("dc:title"))
-        local obj = {title = next(titles)}
+        local obj = {title = next(gr:attr(s, dc_title_p))}
         if obj.title then obj_idx[s_label] = obj end
     end
 
@@ -271,7 +307,7 @@ M.generate_idx = function()
         obj_idx = obj_idx,
     })
 
-    local idx_path = pkar.config.htmlgen.out_dir .. "/index.html"
+    local idx_path = plpath.join(pkar.config.htmlgen.out_dir, "index.html")
     local ofh = assert(io.open(idx_path, "w"))
 
     logger:debug("Writing info at ", idx_path)
@@ -288,7 +324,7 @@ M.generate_site = function()
     dir.rmtree(M.asset_dir)
     dir.makepath(M.asset_dir)
     dir.rmtree(M.media_dir)
-    dir.makepath(M.media_dir)
+    dir.makepath(plpath.join(M.media_dir, "tn"))
 
     assert(M.generate_idx())
     assert(M.generate_resources())

+ 87 - 26
src/submission.lua

@@ -10,9 +10,10 @@ local term = require "volksdata.term"
 local triple = require "volksdata.triple"
 local graph = require "volksdata.graph"
 
-local mc = require "pocket_archive.monocypher"
-local model = require "pocket_archive.model"
 local pkar = require "pocket_archive"
+local model = require "pocket_archive.model"
+local mc = require "pocket_archive.monocypher"
+local transformers = require "pocket_archive.transformers"
 
 
 local logger = pkar.logger
@@ -43,6 +44,33 @@ local function escape_pattern(s)
     return (s:gsub(".", matches))
 end
 
+
+--[[
+    Only generate a thumbnail for pas:File types.
+
+    Non-file resources may be assigned a thumbnail from a contained file
+    or from a stock type icon in the metadata population phase.
+--]]
+--[=[
+local function generate_thumbnail(rsrc, sip_root, tn_dir)
+    local mconf = model.models[rsrc["pas:contentType"]]
+    if not mconf.types["pas:File"] then return end
+
+    local txconf = (mconf.transformers or NT).thumbnail or {fn = "type_icon"}
+    local src = plpath.join(sip_root, rsrc["pas:sourcePath"])
+    local dest_fname = rsrc.id:gsub("^par:", "")
+    local ext = txconf.ext or plpath.extension(src)
+    local dest = plpath.join(tn_dir, dest_fname .. ext)
+    assert(transformers[txconf.fn](
+            src, dest, table.unpack(txconf or NT)))
+    local deliverable = dest:gsub(pkar.config.htmlgen.out_dir, "..")
+    logger:debug("thumbnail: ", dest)
+
+    return dest
+end
+--]=]
+
+
 -- Initialize libmagic database.
 local magic = libmagic.open( libmagic.MIME_TYPE, libmagic.NO_CHECK_COMPRESS )
 assert(magic:load())
@@ -74,13 +102,21 @@ M.generate_sip = function(path)
     local sub_data = assert(csv.open(path, {header = true}))
     local sip = {root_path = path:match("(.*/)")}
 
+    local tn_dir = plpath.join(sip.root_path, "proc", "tn")
+    dir.makepath(tn_dir)
+
     local prev_path
 
-    local i = 1
+    local i = 0
+    local row_n = 2  -- Skipping header row.
     for row in sub_data:lines() do
-        print("Processing row: " .. i)
-        print("Row path: " .. row["pas:sourcePath"])
+        logger:debug("Row path: ", row["pas:sourcePath"])
+        logger:debug("Parsing row:", pp.write(row))
         if row["pas:sourcePath"] ~= "" then
+            i = i + 1
+            logger:info(
+                    ("Processing LL resource #%d at row #%d.")
+                    :format(i, row_n))
             prev_path = row["pas:sourcePath"]
             -- New row.
             local id
@@ -95,43 +131,53 @@ M.generate_sip = function(path)
                 else sip[i][k] = {[v] = true} end  -- Multi-values are a set.
                 ::cont1::
             end
-            i = i + 1
+
+            --[[
+            -- Generate thumbnail for files.
+            local rsrc_path = plpath.join(
+                    sip.root_path, sip[i]["pas:sourcePath"])
+            if plpath.isfile(rsrc_path) then
+                --require "debugger"()
+                sip[i]["pas:thumbnail"] = generate_thumbnail(
+                        sip[i], sip.root_path, tn_dir)
+            end
+            --]]
         else
             -- Continuation of values from a previous row.
-            if i == 1 then
+            if i < 1 then
                 error("First row MUST have a path value.", 2)
             elseif not prev_path then
                 error(("No path information at row %d"):format(i), 2)
             else
-                row["pas:sourcePath"] = prev_path
                 for k, v in pairs(row) do
                     if v == "" then goto cont2 end  -- skip empty strings.
                     if pkar.config.md.single_values[k] then
                         -- It doesn't make much sense to overwrite, maybe throw an error?
-                        sip[i - 1][k] = v
+                        error(
+                            ("On CSV row #%d: field %s is single-valued.")
+                            :format(row_n, k))
                     else
-                        print("Value: " .. v)
-                        print("Inserting at row " .. i -1)
-                        sip[i - 1][k][v] = true
+                        logger:debug("Value: ", v)
+                        logger:debug("Inserting at row ", i - 1)
+                        sip[i][k][v] = true
                     end
                     ::cont2::
                 end
+                row["pas:sourcePath"] = prev_path
             end
         end
+        row_n = row_n + 1
     end
     -- Infer structure from paths and row ordering.
     for i, v in ipairs(sip) do
         for j = i + 1, #sip do
-            --print(string.format("comparing %s : %s", v["pas:sourcePath"], sip[j]["pas:sourcePath"]))
             if not v["pas:next"] and
                     sip[j]["pas:sourcePath"]:match("(.*/)") ==
                             v["pas:sourcePath"]:match("(.*/)") then
-                --print("next match.")
                 v["pas:next"] = sip[j].id
             end
             if not v["pas:first"] and
                     sip[j]["pas:sourcePath"]:match("^" .. escape_pattern(v["pas:sourcePath"])) then
-                --print("First child match.")
                 v["pas:first"] = sip[j].id
             end
         end
@@ -150,8 +196,7 @@ end
 
 M.update_rsrc_md = function(rsrc)
     -- TODO use a transaction when volksdata_lua supports it.
-    print("Updating resource md: ")
-    pp.dump(rsrc)
+    logger:info("Updating resource md: ", pp.write(rsrc))
     rmod = model.parse_model(rsrc["pas:contentType"])
     triples = {}
 
@@ -161,7 +206,7 @@ M.update_rsrc_md = function(rsrc)
         -- id is the subject, it won't be an attribute.
         if k == "id" then goto skip end
 
-        print("Adding attribute:", k, v)
+        logger:debug("Adding attribute:", k, pp.write(v))
         local p = term.new_iriref_ns(k)
         local o
         if type(v) == "table" then
@@ -200,7 +245,8 @@ end
 M.deposit = function(sip)
     for i, rsrc in ipairs(sip) do
         -- TODO Wrap this chunk into a txn. Each row is atomic.
-        print(("Processing resource #%d of %d: %s"):format(i, #sip, rsrc.id))
+        logger:debug(("Processing resource #%d of %d: %s"):format(
+                i, #sip, rsrc.id))
 
         local in_path = sip.root_path .. rsrc["pas:sourcePath"]
         local fext = plpath.extension(in_path)
@@ -208,7 +254,7 @@ M.deposit = function(sip)
         if not plpath.isfile(in_path) then goto continue end
 
         do
-            local tmp_dir = pkar.config.fs.ores_path .. "tmp/"
+            local tmp_dir = plpath.join(pkar.config.fs.ores_path, "tmp/")
             local file_ext
             _, file_ext = plpath.splitext(in_path)
             local tmp_path = tmp_dir .. rsrc.id .. file_ext
@@ -219,7 +265,7 @@ M.deposit = function(sip)
             rsrc["dc:format"] = {[magic:filehandle(ifh)] = true}
             local hash_it = mc.new_blake2b()
             local fsize = 0
-            print(("Hashing %s"):format(in_path))
+            logger:debug("Hashing ", in_path)
             local ofh = assert(io.open(tmp_path, "w"))
             while true do
                 chunk = ifh:read(pkar.config.fs.stream_chunk_size)
@@ -235,15 +281,27 @@ M.deposit = function(sip)
             ofh:close()
             ifh:close()
 
-            local out_dir = ("%s%s/%s/"):format(
+            -- Copy file and calculate checksum.
+            local out_dir, out_path
+            out_dir = plpath.join(
                     pkar.config.fs.ores_path,
-                    checksum:sub(1, 4),
-                    checksum:sub(5, 9))
-            local out_path = out_dir .. checksum:sub(1,32) .. fext
+                    checksum:sub(1, 2),
+                    checksum:sub(3, 4))
+            out_path = plpath.join(out_dir, checksum:sub(1,32) .. fext)
             dir.makepath(out_dir)
-            print(("Moving file %s to %s"):format(tmp_path, out_path))
+            logger:debug(("Moving file %s to %s"):format(tmp_path, out_path))
             dir.movefile(tmp_path, out_path)
             rsrc["pas:path"] = out_path
+
+            -- Copy thumbnail if existing.
+            if rsrc["pas:thumbnail"] then
+                src_path = rsrc["pas:thumbnail"] 
+                out_path = plpath.join(
+                        out_dir, plpath.basename(src_path))
+                logger:debug(("Moving file %s to %s"):format(src_path, out_path))
+                dir.movefile(src_path, out_path)
+                rsrc["pas:thumbnail"] = out_path
+            end
         end
 
         ::continue::
@@ -253,6 +311,9 @@ M.deposit = function(sip)
         rsrc["dc:modified"] = tstamp
         M.update_rsrc_md(rsrc)
     end
+
+    -- Remove processing directory.
+    dir.rmtree(plpath.join(sip.root_path, "proc"))
 end
 
 

+ 10 - 0
src/transformers.lua

@@ -1,5 +1,8 @@
 local vips = require "vips"
 
+local pkar = require "pocket_archive"
+local logger = pkar.logger
+
 
 M = {}
 
@@ -16,6 +19,13 @@ M.img_resize = function(src, dest, size)
 end
 
 
+M.type_icon = function(src, dest, rtype)
+    logger:info("TODO implement type icon")
+
+    return true
+end
+
+
 -- Straight copy with no transformation.
 M.copy = function(src, dest)
     local ifh = assert(io.open(src, "r"))

+ 7 - 1
templates/dres.html

@@ -61,7 +61,13 @@
                 <% for _, ll in ipairs(children) do %>
                 <li>Sequence (<%= #ll %> items):<ol>
                     <% for i, el in ipairs(ll) do %>
-                        <li><a href="<%= el.href %>"><%= el.label %></a></li>
+                        <li><a href="<%= el.href %>">
+                        <% if el.tn then %>
+                            <img src="<%= el.tn %>" alt="<%= el.label %>" />
+                        <% else %>
+                            <%= el.label %>
+                        <% end %>
+                        </a></li>
                     <%end %>
                     </ol></li>
                 <% end %>

+ 7 - 7
test/sample_submission/postcard-bag/data/pkar_submission.csv

@@ -1,10 +1,10 @@
 "pas:sourcePath","id","pas:contentType","dc:title","dc:alternative","dc:description"
-0001,0001,"Postcard","Example Postcard","This is an alternative label","Note that recto and verso representations have been named front and back, to emphasize that the ordering is not alphabetical."
+0001,00001,"Postcard","Example Postcard","This is an alternative label","Note that recto and verso representations have been named front and back, to emphasize that the ordering is not alphabetical."
 ,,,,"And this is another alternative label","Second description."
 ,,,,"Yet another alt label.",
-"0001/0001-front",0002,"Part","Recto",,
-"0001/0001-front/54321.jpg",0003,"StillImageFile",,,
-"0001/0001-back",0004,"Part","Verso",,
-"0001/0001-back/567890.jpg",0005,"StillImageFile",,,
-0006,0006,"StillImage","Single Image",,"Preparing kebab at Aqil's during curfew."
-0006/0685_04.jpg,0007,"StillImageFile","B/W scan of physical photo",,
+"0001/0001-front",00002,"Part","Recto",,
+"0001/0001-front/54321.jpg",00003,"StillImageFile",,,
+"0001/0001-back",00004,"Part","Verso",,
+"0001/0001-back/567890.jpg",00005,"StillImageFile",,,
+0006,00006,"StillImage","Single Image",,"Preparing kebab at Aqil's during curfew."
+0006/0685_04.jpg,00007,"StillImageFile","B/W scan of physical photo",,