local csv = require "ftcsv" local datafile = require "datafile" local dir = require "pl.dir" local etlua = require "etlua" local json = require "cjson" local plpath = require "pl.path" local pp = require "pl.pretty" local nsm = require "volksdata.namespace" local term = require "volksdata.term" local triple = require "volksdata.triple" local graph = require "volksdata.graph" local pkar = require "pocket_archive" local logger = pkar.logger local model = require "pocket_archive.model" local repo = require "pocket_archive.repo" local transformers = require "pocket_archive.transformers" local dbg = require "debugger" -- "nil" table - for missing key fallback in chaining. local NT = {} -- All resource subjects. local subjects local asset_dir = pkar.config.htmlgen.out_dir local index_path = plpath.join(asset_dir, "js", "fuse_index.json") local keys_path = plpath.join(asset_dir, "js", "fuse_keys.json") local idx_ignore = {["pas_first"] = true, ["pas:next"] = true,} -- Collector for all search term keys. local idx_keys = {} -- HTML templates. Compile them only once. -- TODO Add override for user-maintained templates. local templates = { idx = {file = "templates/index.html"}, dres = {file = "templates/dres.html"}, ores = {file = "templates/ores.html"}, head = {file = "templates/head_common.html"}, header = {file = "templates/header.html"}, } for _, tpl in pairs(templates) do local fh = datafile.open(tpl.file) tpl.data = assert(etlua.compile(fh:read("a"))) end -- HTML generator module. local M = { res_dir = plpath.join(pkar.config.htmlgen.out_dir, "res"), asset_dir = asset_dir, media_dir = plpath.join(pkar.config.htmlgen.out_dir, "media"), webroot = "", -- TODO switch depending on local FS or webserver generation. } local function get_breadcrumbs(mconf) -- Breadcrumbs, from top class to current class. -- Also verify if it's a File subclass. local breadcrumbs = {} for i = 1, #mconf.lineage do breadcrumbs[i] = { mconf.lineage[i], model.models[mconf.lineage[i]].label } end return breadcrumbs end local function get_tn_url(s) if repo.gr:attr(s, pkar.RDF_TYPE)[pkar.FILE_T.hash] then -- The subject is a file. tn_fname = (s.data:gsub(pkar.PAR_NS, "") .. ".jpg") -- FIXME do not hardcode. return plpath.join( M.media_dir, tn_fname:sub(1, 2), tn_fname:sub(3, 4), tn_fname) end -- Recurse through all first children until one with a thumbnail, or a -- leaf without children, is found. local first_child _, first_child = next(repo.gr:attr(s, pkar.FIRST_P)) if first_child then return get_tn_url(first_child) end end local function generate_dres(s, mconf) local dmd = {} local rel = {} local children = {} local title -- Metadata local attrs = repo.gr:connections(s, term.LINK_OUTBOUND) for p, ots in pairs(attrs) do local pname = nsm.denormalize_uri(p.data) p_label = ((mconf.properties or NT)[pname] or NT).label -- RDF types are shown in in breadcrumbs. if pname == "rdf:type" then goto skip elseif ((mconf.properties or NT)[pname] or NT).type == "rel" then -- Relationship. rel[pname] = {label = p_label, uri = pname} for _, o in pairs(ots) do table.insert(dmd[pname], o.data) end elseif pname == "pas:first" then -- Build a linked list for every first found. for _, o in pairs(ots) do -- Loop through all first children. local child_s = o logger:debug("local child_s: ", child_s.data) local ll = {} -- Fallback labels. local label _, label = next(repo.gr:attr(child_s, pkar.DC_TITLE_P)) if label then label = label.data else _, label = next(repo.gr:attr(child_s, pkar.PATH_P)) if label then label = plpath.basename(label.data) else label = child_s.data end end while child_s do -- Loop trough all next nodes for each first child. --require "debugger".assert(get_tn_url(child_s)) table.insert(ll, { href = pkar.gen_pairtree( "/res", child_s.data, ".html", true), label = label, tn = get_tn_url(child_s):gsub(M.media_dir, "/media/tn"), }) logger:debug("Child label for ", child_s.data, ": ", ll[#ll].label or "nil") -- There can only be one "next" _, child_s = next(repo.gr:attr(child_s, pkar.NEXT_P)) end table.insert(children, ll) end elseif pname == "pas:next" then -- Sibling. for _, o in pairs(ots) do ls_next = o.data break end else -- Descriptive metadata. local attr = {label = p_label, uri = pname} -- TODO differentiate term types for _, o in pairs(ots) do table.insert(attr, o.data) end table.sort(attr) if p == pkar.DC_TITLE_P then title = attr[1] end table.insert(dmd, attr) end ::skip:: end table.sort( dmd, function(a, b) return ((a.label or a.uri) < (b.label or b.uri)) end ) table.sort(rel) table.sort(children) logger:debug("Lineage:", pp.write(mconf.lineage)) logger:debug("DMD:", pp.write(dmd)) logger:debug("REL:", pp.write(rel)) logger:debug("Children:", pp.write(children)) logger:debug("Breadcrumbs:", pp.write(get_breadcrumbs(mconf))) out_html = templates.dres.data({ --webroot = M.webroot, site_title = pkar.config.site.title or pkar.default_title, title = title or s.data, head_tpl = templates.head.data, header_tpl = templates.header.data, mconf = mconf, uri = s, dmd = dmd, rel = rel, children = children, ls_next = ls_next, breadcrumbs = get_breadcrumbs(mconf), rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true), }) local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".html") local ofh = assert(io.open(res_path, "w")) ofh:write(out_html) ofh:close() return true end local function generate_ores(s, mconf) local techmd = {} local rel = {} -- Metadata local attrs = repo.gr:connections(s, term.LINK_OUTBOUND) for p, ots in pairs(attrs) do local pname = nsm.denormalize_uri(p.data) p_label = ((mconf.properties or NT)[pname] or NT).label -- RDF types are shown in in breadcrumbs. if pname == "rdf:type" then goto skip elseif ((mconf.properties or NT)[pname] or NT).type == "rel" then -- Relationship. rel[pname] = {label = p_label, uri = pname} for _, o in pairs(ots) do table.insert(techmd[pname], o.data) end elseif pname == "pas:next" then -- Sibling. for _, o in pairs(ots) do ls_next = o.data break end else -- Descriptive metadata. techmd[pname] = {label = p_label, uri = pname} -- TODO differentiate term types for _, o in pairs(ots) do table.insert(techmd[pname], o.data) end table.sort(techmd[pname]) end ::skip:: end table.sort(techmd) table.sort(rel) logger:debug("Lineage:", pp.write(mconf.lineage)) logger:debug("Breadcrumbs:", pp.write(get_breadcrumbs(mconf))) logger:debug("techmd:", pp.write(techmd)) logger:debug("REL:", pp.write(rel)) -- Transform and move media assets. local dest_fname, dest_dir, dest -- Reused for thumbnail. logger:info("Transforming resource file.") local res_path = techmd["pas:path"] if not res_path then error("No file path for File resource!") end local txconf = (mconf.transformers or NT).deliverable or {fn = "copy"} -- Set file name to resource ID + source extension. dest_fname = ( s.data:gsub(pkar.PAR_NS, "") .. (txconf.ext or plpath.extension(res_path[1]))) dest_dir = plpath.join( M.media_dir, dest_fname:sub(1, 2), dest_fname:sub(3, 4)) dir.makepath(dest_dir) dest = plpath.join(dest_dir, dest_fname) assert(transformers[txconf.fn]( res_path[1], dest, table.unpack(txconf or NT))) local deliverable = dest:gsub(pkar.config.htmlgen.out_dir, "") logger:info("Access file: ", deliverable) -- Thumbnail. local tn txconf = (mconf.transformers or NT).thumbnail if txconf then if txconf.ext then dest_fname = plpath.splitext(dest_fname) .. txconf.ext end dest_dir = plpath.join( M.media_dir, "tn", dest_fname:sub(1, 2), dest_fname:sub(3, 4)) dir.makepath(dest_dir) dest = plpath.join(dest_dir, dest_fname) assert(transformers[txconf.fn]( res_path[1], dest, table.unpack(txconf or NT))) tn = dest:gsub(M.media_dir, "/media/tn") logger:info("Thumbnail: ", tn) end out_html = templates.ores.data({ --webroot = M.webroot, site_title = pkar.config.site.title or pkar.default_title, pname = plpath.basename(techmd["pas:sourcePath"][1]), head_tpl = templates.head.data, header_tpl = templates.header.data, mconf = mconf, uri = s, techmd = techmd, rel = rel, ls_next = ls_next, breadcrumbs = get_breadcrumbs(mconf), deliverable = deliverable, thumbnail = tn, rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true), }) local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".html") local ofh = assert(io.open(res_path, "w")) ofh:write(out_html) ofh:close() return true end M.generate_res_idx = function(s, mconf) local rrep = { id = nsm.denormalize_uri(s.data), tn = get_tn_url(s):gsub(M.media_dir, "/media/tn"), href = pkar.gen_pairtree("/res", s.data, ".html", true), } local attrs = repo.gr:connections(s, term.LINK_OUTBOUND) local function format_value(pname, o) logger:debug("Adding value to " .. pname .. ": " .. ((o or NT).data or "nil")) local v if pname == "rdf:type" or pname == "pas:contentType" then v = nsm.denormalize_uri(o.data) else v = o.data end return v end for p, ots in pairs(attrs) do local pname = nsm.denormalize_uri(p.data) local pconf = (mconf.properties or NT)[pname] or NT if idx_ignore[pname] or pconf.type == "resource" then goto skip end local attr -- Quick check if it's multi-valued local o if next(ots, next(ots)) then attr = {} for _, o in pairs(ots) do table.insert(attr, format_value(pname, o)) end else _, o = next(ots) attr = format_value(pname, o) end rrep[pconf.label or pname] = attr -- Add to search index. idx_keys[pname] = true -- Add to search keys. ::skip:: end return rrep end M.generate_ll = function(s) local res_gr = repo.get_rsrc(s) tdata = {} for p, ots in pairs(res_gr:connections(s, term.LINK_OUTBOUND)) do pname = nsm.denormalize_uri(p.data) for _, o in pairs (ots) do -- Find a row where the pname slot has not been occupied. for i = 1, math.huge do if (tdata[i] or NT)[pname] then goto continue else if tdata[i] then tdata[i][pname] = o.data else tdata[i] = {[pname] = o.data} end break end ::continue:: end end end -- FIXME ftcsv encodes nil values as `"nil"`. See -- https://github.com/FourierTransformer/ftcsv/issues/46 return csv.encode(tdata) end M.generate_resource = function(s) local res_type _, res_type = next(repo.gr:attr(s, pkar.CONTENT_TYPE_P)) local mconf = model.models[res_type.data] -- Generate RDF/Turtle doc. local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".ttl") local ofh = assert(io.open(res_path, "w")) ofh:write(repo.serialze_rsrc(s, "ttl")) ofh:close() -- Generate HTML doc. if mconf.types["pas:File"] then assert(generate_ores(s, mconf)) else assert(generate_dres(s, mconf)) end -- Generate JSON rep and append to search index. idx_rep = M.generate_res_idx(s, mconf) json_rep = " " .. json.encode(idx_rep) ofh = assert(io.open(index_path, "a")) ofh:write(json_rep) ofh:write(",\n") -- Hack together the JSON objects in a list. ofh:close() return s end M.generate_resources = function() -- Look up if subjects are already populated. subjects = subjects or repo.gr:unique_terms(triple.POS_S) -- Initialize the JSON template with an opening brace. local ofh = assert(io.open(index_path, "w")) ofh:write("[\n") ofh:close() -- TODO parallelize for _, s in pairs(subjects) do assert(M.generate_resource(s)) end -- Close the open list brace in the JSON template after all the resources -- have been added. ofh = assert(io.open(index_path, "a")) ofh:write("{}]") -- Add empty object to validate the last comma ofh:close() -- Write index keys. ofh = assert(io.open(keys_path, "w")) idx_keys_ls = {} for k in pairs(idx_keys) do table.insert(idx_keys_ls, k) end ofh:write(json.encode(idx_keys_ls)) ofh:close() return true end M.generate_idx = function() local obj_idx = {} -- Get all subject of type: Artifact. s_ts = repo.gr:term_set( pkar.RDF_TYPE, triple.POS_P, term.new_iriref_ns("pas:Artifact"), triple.POS_O ) for _, s in pairs(s_ts) do local title, created _, title = next(repo.gr:attr(s, pkar.DC_TITLE_P)) _, created = next(repo.gr:attr(s, pkar.DC_CREATED_P)) local obj = { href = pkar.gen_pairtree("/res", s.data, ".html", true), title = title, created = created.data, tn = get_tn_url(s):gsub(M.media_dir, "/media/tn"), } table.insert(obj_idx, obj) end table.sort(obj_idx, function(a, b) return a.created < b.created end) logger:debug(pp.write(obj_idx)) out_html = templates.idx.data({ webroot = M.webroot, title = pkar.config.site.title or pkar.default_title, site_title = pkar.config.site.title or pkar.default_title, head_tpl = templates.head.data, header_tpl = templates.header.data, nsm = nsm, obj_idx = obj_idx, }) local idx_path = plpath.join(pkar.config.htmlgen.out_dir, "index.html") local ofh = assert(io.open(idx_path, "w")) logger:debug("Writing info at ", idx_path) ofh:write(out_html) ofh:close() return true end M.generate_site = function() -- Reset target folders. -- TODO for larger sites, a selective update should be implemented by -- comparing RDF resource timestamps with HTML page timestamps. Post-MVP. if plpath.isdir(M.res_dir) then dir.rmtree(M.res_dir) end dir.makepath(M.res_dir) --[[ if plpath.isdir(M.asset_dir) then dir.rmtree(M.asset_dir) end dir.makepath(M.asset_dir) --]] if plpath.isdir(M.media_dir) then dir.rmtree(M.media_dir) end dir.makepath(plpath.join(M.media_dir, "tn")) -- Copy static assets. dir.clonetree("templates/assets", M.asset_dir, dir.copyfile) -- Generate individual resource pages, RDF, and JSON index. assert(M.generate_resources()) -- Generate index page. assert(M.generate_idx()) end return M