123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481 |
- local csv = require "ftcsv"
- local datafile = require "datafile"
- local dir = require "pl.dir"
- local etlua = require "etlua"
- local json = require "cjson"
- local plpath = require "pl.path"
- local pp = require "pl.pretty"
- local nsm = require "volksdata.namespace"
- local term = require "volksdata.term"
- local triple = require "volksdata.triple"
- local graph = require "volksdata.graph"
- local pkar = require "pocket_archive"
- local logger = pkar.logger
- local model = require "pocket_archive.model"
- local repo = require "pocket_archive.repo"
- local transformers = require "pocket_archive.transformers"
- local dbg = require "debugger"
- -- "nil" table - for missing key fallback in chaining.
- local NT = {}
- -- All resource subjects.
- local subjects
- local asset_dir = pkar.config.htmlgen.out_dir
- local index_path = plpath.join(asset_dir, "js", "fuse_index.json")
- local keys_path = plpath.join(asset_dir, "js", "fuse_keys.json")
- local idx_ignore = {["pas_first"] = true, ["pas:next"] = true,}
- -- Collector for all search term keys.
- local idx_keys = {}
- -- HTML templates. Compile them only once.
- -- TODO Add override for user-maintained templates.
- local templates = {
- idx = {file = "templates/index.html"},
- dres = {file = "templates/dres.html"},
- ores = {file = "templates/ores.html"},
- head = {file = "templates/head_common.html"},
- header = {file = "templates/header.html"},
- }
- for _, tpl in pairs(templates) do
- local fh = datafile.open(tpl.file)
- tpl.data = assert(etlua.compile(fh:read("a")))
- end
- -- HTML generator module.
- local M = {
- res_dir = plpath.join(pkar.config.htmlgen.out_dir, "res"),
- asset_dir = asset_dir,
- media_dir = plpath.join(pkar.config.htmlgen.out_dir, "media"),
- webroot = "", -- TODO switch depending on local FS or webserver generation.
- }
- local function get_breadcrumbs(mconf)
- -- Breadcrumbs, from top class to current class.
- -- Also verify if it's a File subclass.
- local breadcrumbs = {}
- for i = 1, #mconf.lineage do
- breadcrumbs[i] = {
- mconf.lineage[i],
- model.models[mconf.lineage[i]].label
- }
- end
- return breadcrumbs
- end
- local function get_tn_url(s)
- if repo.gr:attr(s, pkar.RDF_TYPE)[pkar.FILE_T.hash] then
- -- The subject is a file.
- tn_fname = (s.data:gsub(pkar.PAR_NS, "") .. ".jpg") -- FIXME do not hardcode.
- return plpath.join(
- M.media_dir, tn_fname:sub(1, 2), tn_fname:sub(3, 4), tn_fname)
- end
- -- Recurse through all first children until one with a thumbnail, or a
- -- leaf without children, is found.
- local first_child
- _, first_child = next(repo.gr:attr(s, pkar.FIRST_P))
- if first_child then return get_tn_url(first_child) end
- end
- local function generate_dres(s, mconf)
- local dmd = {}
- local rel = {}
- local children = {}
- local title
- -- Metadata
- local attrs = repo.gr:connections(s, term.LINK_OUTBOUND)
- for p, ots in pairs(attrs) do
- local pname = nsm.denormalize_uri(p.data)
- p_label = ((mconf.properties or NT)[pname] or NT).label
- -- RDF types are shown in in breadcrumbs.
- if pname == "rdf:type" then goto skip
- elseif ((mconf.properties or NT)[pname] or NT).type == "rel" then
- -- Relationship.
- rel[pname] = {label = p_label, uri = pname}
- for _, o in pairs(ots) do table.insert(dmd[pname], o.data) end
- elseif pname == "pas:first" then
- -- Build a linked list for every first found.
- for _, o in pairs(ots) do
- -- Loop through all first children.
- local child_s = o
- logger:debug("local child_s: ", child_s.data)
- local ll = {}
- -- Fallback labels.
- local label
- _, label = next(repo.gr:attr(child_s, pkar.DC_TITLE_P))
- if label then label = label.data
- else
- _, label = next(repo.gr:attr(child_s, pkar.PATH_P))
- if label then label = plpath.basename(label.data)
- else label = child_s.data end
- end
- while child_s do
- -- Loop trough all next nodes for each first child.
- --require "debugger".assert(get_tn_url(child_s))
- table.insert(ll, {
- href = pkar.gen_pairtree(
- "/res", child_s.data, ".html", true),
- label = label,
- tn = get_tn_url(child_s):gsub(M.media_dir, "/media/tn"),
- })
- logger:debug("Child label for ", child_s.data, ": ", ll[#ll].label or "nil")
- -- There can only be one "next"
- _, child_s = next(repo.gr:attr(child_s, pkar.NEXT_P))
- end
- table.insert(children, ll)
- end
- elseif pname == "pas:next" then
- -- Sibling.
- for _, o in pairs(ots) do ls_next = o.data break end
- else
- -- Descriptive metadata.
- local attr = {label = p_label, uri = pname}
- -- TODO differentiate term types
- for _, o in pairs(ots) do table.insert(attr, o.data) end
- table.sort(attr)
- if p == pkar.DC_TITLE_P then title = attr[1] end
- table.insert(dmd, attr)
- end
- ::skip::
- end
- table.sort(
- dmd, function(a, b)
- return ((a.label or a.uri) < (b.label or b.uri))
- end
- )
- table.sort(rel)
- table.sort(children)
- logger:debug("Lineage:", pp.write(mconf.lineage))
- logger:debug("DMD:", pp.write(dmd))
- logger:debug("REL:", pp.write(rel))
- logger:debug("Children:", pp.write(children))
- logger:debug("Breadcrumbs:", pp.write(get_breadcrumbs(mconf)))
- out_html = templates.dres.data({
- --webroot = M.webroot,
- site_title = pkar.config.site.title or pkar.default_title,
- title = title or s.data,
- head_tpl = templates.head.data,
- header_tpl = templates.header.data,
- mconf = mconf,
- uri = s,
- dmd = dmd,
- rel = rel,
- children = children,
- ls_next = ls_next,
- breadcrumbs = get_breadcrumbs(mconf),
- rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true),
- })
- local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".html")
- local ofh = assert(io.open(res_path, "w"))
- ofh:write(out_html)
- ofh:close()
- return true
- end
- local function generate_ores(s, mconf)
- local techmd = {}
- local rel = {}
- -- Metadata
- local attrs = repo.gr:connections(s, term.LINK_OUTBOUND)
- for p, ots in pairs(attrs) do
- local pname = nsm.denormalize_uri(p.data)
- p_label = ((mconf.properties or NT)[pname] or NT).label
- -- RDF types are shown in in breadcrumbs.
- if pname == "rdf:type" then goto skip
- elseif ((mconf.properties or NT)[pname] or NT).type == "rel" then
- -- Relationship.
- rel[pname] = {label = p_label, uri = pname}
- for _, o in pairs(ots) do table.insert(techmd[pname], o.data) end
- elseif pname == "pas:next" then
- -- Sibling.
- for _, o in pairs(ots) do ls_next = o.data break end
- else
- -- Descriptive metadata.
- techmd[pname] = {label = p_label, uri = pname}
- -- TODO differentiate term types
- for _, o in pairs(ots) do table.insert(techmd[pname], o.data) end
- table.sort(techmd[pname])
- end
- ::skip::
- end
- table.sort(techmd)
- table.sort(rel)
- logger:debug("Lineage:", pp.write(mconf.lineage))
- logger:debug("Breadcrumbs:", pp.write(get_breadcrumbs(mconf)))
- logger:debug("techmd:", pp.write(techmd))
- logger:debug("REL:", pp.write(rel))
- -- Transform and move media assets.
- local dest_fname, dest_dir, dest -- Reused for thumbnail.
- logger:info("Transforming resource file.")
- local res_path = techmd["pas:path"]
- if not res_path then error("No file path for File resource!") end
- local txconf = (mconf.transformers or NT).deliverable or {fn = "copy"}
- -- Set file name to resource ID + source extension.
- dest_fname = (
- s.data:gsub(pkar.PAR_NS, "") ..
- (txconf.ext or plpath.extension(res_path[1])))
- dest_dir = plpath.join(
- M.media_dir, dest_fname:sub(1, 2), dest_fname:sub(3, 4))
- dir.makepath(dest_dir)
- dest = plpath.join(dest_dir, dest_fname)
- assert(transformers[txconf.fn](
- res_path[1], dest, table.unpack(txconf or NT)))
- local deliverable = dest:gsub(pkar.config.htmlgen.out_dir, "")
- logger:info("Access file: ", deliverable)
- -- Thumbnail.
- local tn
- txconf = (mconf.transformers or NT).thumbnail
- if txconf then
- if txconf.ext then
- dest_fname = plpath.splitext(dest_fname) .. txconf.ext
- end
- dest_dir = plpath.join(
- M.media_dir, "tn", dest_fname:sub(1, 2), dest_fname:sub(3, 4))
- dir.makepath(dest_dir)
- dest = plpath.join(dest_dir, dest_fname)
- assert(transformers[txconf.fn](
- res_path[1], dest, table.unpack(txconf or NT)))
- tn = dest:gsub(M.media_dir, "/media/tn")
- logger:info("Thumbnail: ", tn)
- end
- out_html = templates.ores.data({
- --webroot = M.webroot,
- site_title = pkar.config.site.title or pkar.default_title,
- pname = plpath.basename(techmd["pas:sourcePath"][1]),
- head_tpl = templates.head.data,
- header_tpl = templates.header.data,
- mconf = mconf,
- uri = s,
- techmd = techmd,
- rel = rel,
- ls_next = ls_next,
- breadcrumbs = get_breadcrumbs(mconf),
- deliverable = deliverable,
- thumbnail = tn,
- rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true),
- })
- local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".html")
- local ofh = assert(io.open(res_path, "w"))
- ofh:write(out_html)
- ofh:close()
- return true
- end
- M.generate_res_idx = function(s, mconf)
- local rrep = {
- id = nsm.denormalize_uri(s.data),
- tn = get_tn_url(s):gsub(M.media_dir, "/media/tn"),
- href = pkar.gen_pairtree("/res", s.data, ".html", true),
- }
- local attrs = repo.gr:connections(s, term.LINK_OUTBOUND)
- local function format_value(pname, o)
- logger:debug("Adding value to " .. pname .. ": " .. ((o or NT).data or "nil"))
- local v
- if pname == "rdf:type" or pname == "pas:contentType" then
- v = nsm.denormalize_uri(o.data)
- else v = o.data
- end
- return v
- end
- for p, ots in pairs(attrs) do
- local pname = nsm.denormalize_uri(p.data)
- local pconf = (mconf.properties or NT)[pname] or NT
- if idx_ignore[pname] or pconf.type == "resource" then goto skip end
- local attr
- -- Quick check if it's multi-valued
- local o
- if next(ots, next(ots)) then
- attr = {}
- for _, o in pairs(ots) do
- table.insert(attr, format_value(pname, o))
- end
- else
- _, o = next(ots)
- attr = format_value(pname, o)
- end
- rrep[pconf.label or pname] = attr -- Add to search index.
- idx_keys[pname] = true -- Add to search keys.
- ::skip::
- end
- return rrep
- end
- M.generate_ll = function(s)
- local res_gr = repo.get_rsrc(s)
- tdata = {}
- for p, ots in pairs(res_gr:connections(s, term.LINK_OUTBOUND)) do
- pname = nsm.denormalize_uri(p.data)
- for _, o in pairs (ots) do
- -- Find a row where the pname slot has not been occupied.
- for i = 1, math.huge do
- if (tdata[i] or NT)[pname] then goto continue
- else
- if tdata[i] then tdata[i][pname] = o.data
- else tdata[i] = {[pname] = o.data} end
- break
- end
- ::continue::
- end
- end
- end
- -- FIXME ftcsv encodes nil values as `"nil"`. See
- -- https://github.com/FourierTransformer/ftcsv/issues/46
- return csv.encode(tdata)
- end
- M.generate_resource = function(s)
- local res_type
- _, res_type = next(repo.gr:attr(s, pkar.CONTENT_TYPE_P))
- local mconf = model.models[res_type.data]
- -- Generate RDF/Turtle doc.
- local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".ttl")
- local ofh = assert(io.open(res_path, "w"))
- ofh:write(repo.serialze_rsrc(s, "ttl"))
- ofh:close()
- -- Generate HTML doc.
- if mconf.types["pas:File"] then assert(generate_ores(s, mconf))
- else assert(generate_dres(s, mconf)) end
- -- Generate JSON rep and append to search index.
- idx_rep = M.generate_res_idx(s, mconf)
- json_rep = " " .. json.encode(idx_rep)
- ofh = assert(io.open(index_path, "a"))
- ofh:write(json_rep)
- ofh:write(",\n") -- Hack together the JSON objects in a list.
- ofh:close()
- return s
- end
- M.generate_resources = function()
- -- Look up if subjects are already populated.
- subjects = subjects or repo.gr:unique_terms(triple.POS_S)
- -- Initialize the JSON template with an opening brace.
- local ofh = assert(io.open(index_path, "w"))
- ofh:write("[\n")
- ofh:close()
- -- TODO parallelize
- for _, s in pairs(subjects) do assert(M.generate_resource(s)) end
- -- Close the open list brace in the JSON template after all the resources
- -- have been added.
- ofh = assert(io.open(index_path, "a"))
- ofh:write("{}]") -- Add empty object to validate the last comma
- ofh:close()
- -- Write index keys.
- ofh = assert(io.open(keys_path, "w"))
- idx_keys_ls = {}
- for k in pairs(idx_keys) do table.insert(idx_keys_ls, k) end
- ofh:write(json.encode(idx_keys_ls))
- ofh:close()
- return true
- end
- M.generate_idx = function()
- local obj_idx = {}
- -- Get all subject of type: Artifact.
- s_ts = repo.gr:term_set(
- pkar.RDF_TYPE, triple.POS_P,
- term.new_iriref_ns("pas:Artifact"), triple.POS_O
- )
- for _, s in pairs(s_ts) do
- local title, created
- _, title = next(repo.gr:attr(s, pkar.DC_TITLE_P))
- _, created = next(repo.gr:attr(s, pkar.DC_CREATED_P))
- local obj = {
- href = pkar.gen_pairtree("/res", s.data, ".html", true),
- title = title,
- created = created.data,
- tn = get_tn_url(s):gsub(M.media_dir, "/media/tn"),
- }
- table.insert(obj_idx, obj)
- end
- table.sort(obj_idx, function(a, b) return a.created < b.created end)
- logger:debug(pp.write(obj_idx))
- out_html = templates.idx.data({
- webroot = M.webroot,
- title = pkar.config.site.title or pkar.default_title,
- site_title = pkar.config.site.title or pkar.default_title,
- head_tpl = templates.head.data,
- header_tpl = templates.header.data,
- nsm = nsm,
- obj_idx = obj_idx,
- })
- local idx_path = plpath.join(pkar.config.htmlgen.out_dir, "index.html")
- local ofh = assert(io.open(idx_path, "w"))
- logger:debug("Writing info at ", idx_path)
- ofh:write(out_html)
- ofh:close()
- return true
- end
- M.generate_site = function()
- -- Reset target folders.
- -- TODO for larger sites, a selective update should be implemented by
- -- comparing RDF resource timestamps with HTML page timestamps. Post-MVP.
- if plpath.isdir(M.res_dir) then dir.rmtree(M.res_dir) end
- dir.makepath(M.res_dir)
- --[[
- if plpath.isdir(M.asset_dir) then dir.rmtree(M.asset_dir) end
- dir.makepath(M.asset_dir)
- --]]
- if plpath.isdir(M.media_dir) then dir.rmtree(M.media_dir) end
- dir.makepath(plpath.join(M.media_dir, "tn"))
- -- Copy static assets.
- dir.clonetree("templates/assets", M.asset_dir, dir.copyfile)
- -- Generate individual resource pages, RDF, and JSON index.
- assert(M.generate_resources())
- -- Generate index page.
- assert(M.generate_idx())
- end
- return M
|