local csv = require "ftcsv" local datafile = require "datafile" local dir = require "pl.dir" local etlua = require "etlua" local json = require "cjson" local markdown = require "markdown" local path = require "pl.path" local pp = require "pl.pretty" local nsm = require "volksdata.namespace" local term = require "volksdata.term" local triple = require "volksdata.triple" local graph = require "volksdata.graph" local pkar = require "pocket_archive" local logger = pkar.logger local model = require "pocket_archive.model" local repo = require "pocket_archive.repo" local get_single_v = repo.get_single_v local transformers = require "pocket_archive.transformers" local dbg = require "debugger" -- "nil" table - for missing key fallback in chaining. local NT = {} -- Extension for type-based icon files. local ICON_EXT = ".svg" local asset_dir = pkar.config.htmlgen.out_dir local index_path = path.join(asset_dir, "js", "fuse_index.json") local keys_path = path.join(asset_dir, "js", "fuse_keys.json") local idx_ignore = {first = true, next = true} -- Collector for all search term keys. local idx_keys -- HTML templates. Compile them only once. -- TODO Add override for user-maintained templates. local templates = { idx = {file = "index.html"}, coll = {file = "coll.html"}, dres = {file = "dres.html"}, ores = {file = "ores.html"}, head = {file = "head_common.html"}, header = {file = "header.html"}, } for _, tpl in pairs(templates) do local fh = datafile.open(path.join("templates", tpl.file)) tpl.data = assert(etlua.compile(fh:read("a"))) end -- HTML generator module. local M = { res_dir = path.join(pkar.config.htmlgen.out_dir, "res"), asset_dir = asset_dir, icon_dir = path.join(asset_dir, "icons"), media_dir = path.join(pkar.config.htmlgen.out_dir, "media"), webroot = "", -- TODO switch depending on local FS or webserver generation. } local MEDIA_WEB_PATH = M.media_dir:gsub(pkar.config.htmlgen.out_dir, "") local TN_FS_PATH = path.join(M.media_dir, "thumbnail") local TN_WEB_PATH = TN_FS_PATH:gsub(pkar.config.htmlgen.out_dir, "") -- Get model configuration from subject URI. local function get_mconf(s) local ctype = get_single_v(s, "content_type") return model.types[model.uri_to_id[nsm.denormalize_uri(ctype.data)]] end local function get_breadcrumbs(mconf) -- Breadcrumbs, from top class to current class. -- Also verify if it's a File subclass. local breadcrumbs = {} for i = 1, #mconf.lineage do breadcrumbs[i] = { mconf.lineage[i], model.types[mconf.lineage[i]].label } end return breadcrumbs end --[[ Infer thumbnail web path from the resource subject URI. If the resource is not a file and as such does not have a thumbnail of its own, traverse the list of first children and use the first one found with a thumbnail. @param[in] s Subject (resource) URI. @param[in] ext Optional extension to add, including the extension separator (`.`). If not provided, `.jpg` is used. --]] local function get_tn_url(s, ext) -- TODO caller needs to pass correct ext if repo.gr:contains(triple.new(s, pkar.RDF_TYPE, model.id_to_uri.file)) then -- The subject is a file. return pkar.gen_pairtree(TN_WEB_PATH, s.data, ext or ".jpg", true) end -- If it's a brick, look for its ref. local ref = get_single_v(s, "ref") if ref then return get_tn_url(ref, ext) end local pref_rep = get_single_v(s, "pref_rep") if pref_rep then return get_tn_url(pref_rep, ext) end -- Recurse through all first children until one with a thumbnail, or a -- leaf without children, is found. -- Look for preferred rep first. local t = get_single_v(s, "pref_rep") if not t then -- If not found, look for reference of first child. t = get_single_v(s, "first") if t then t = get_single_v(t, "ref") end end if t then return get_tn_url(t, ext) end end --[[ Find closest available type icon to the given type. --]] local function get_icon_url(lineage) for i = #lineage, 1, -1 do if path.isfile(path.join(M.icon_dir, lineage[i] .. ICON_EXT)) then return M.webroot .. "/icons/" .. lineage[i] .. ICON_EXT end end end local function generate_coll(s, mconf) local pref_rep = get_single_v(s, "pref_rep") local members = {} local child_s = get_single_v(s, "first") --[[ FIXME this should check for the ref attribute of the proxy. if not repo.gr:contains(triple.new( s, model.id_to_uri.has_member, first )) then error(("first child %s is not a member of %s!") :format(first.data, s.data) ) end --]] -- First child is the alternative pref representation. if not pref_rep then pref_rep = child_s end local pref_rep_url if pref_rep then pref_rep_url = pkar.gen_pairtree("/res", pref_rep.data, ".html", true) -- Collection page uses full size image, shrunk to size if necessary. pref_rep_file = get_tn_url(pref_rep):gsub(TN_WEB_PATH, MEDIA_WEB_PATH) end local child_ref, child_label, child_mconf while child_s do child_ref = get_single_v(child_s, "ref") -- Skip relationship with long description doc. if repo.gr:contains( triple.new(s, model.id_to_uri.long_description, child_ref) ) then goto skip end child_mconf = get_mconf(child_ref) --if not child_ref then child_ref = child_s end child_label = get_single_v(child_s, "label") if not child_label then if child_mconf.types.file then child_label = path.basename( get_single_v(child_ref, "source_path").data) else child_label = get_single_v(child_ref, "label") end end if child_label.data then child_label = child_label.data end table.insert(members, { icon = get_icon_url(child_mconf.lineage), tn = get_tn_url(child_s), href = pkar.gen_pairtree("/res", child_ref.data, ".html", true), label = child_label, ctype_label = child_mconf.label, }) ::skip:: child_s = get_single_v(child_s, "next") end local title = get_single_v(s, "label") local description = get_single_v(s, "description") local body_rel = get_single_v(s, "long_description") local body if body_rel then local body_res_path = get_single_v(body_rel, "archive_path").data local bfh = assert(io.open(body_res_path, "r")) body = markdown(bfh:read("a")) bfh:close() end out_html = templates.coll.data({ --webroot = M.webroot, site_title = pkar.config.site.title, title = title.data, description = description.data, body = body, head_tpl = templates.head.data, header_tpl = templates.header.data, mconf = mconf, uri = s, members = members, tn_url = get_tn_url(s), pref_rep = { url = pref_rep_url, file = pref_rep_file, }, icon_url = get_icon_url(mconf.lineage), --breadcrumbs = get_breadcrumbs(mconf), rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true), }) local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".html") local ofh = assert(io.open(res_path, "w")) ofh:write(out_html) ofh:close() return true end local function generate_dres(s, mconf) local dmd = {} local rel = {} local children = {} local title -- Metadata local attrs = repo.gr:connections(s, term.LINK_OUTBOUND) local pref_rep = get_single_v(s, "pref_rep") or get_single_v(s, "has_member") for p, ots_it in attrs:iter() do local pname = model.uri_to_id[nsm.denormalize_uri(p.data)] or p.data logger:debug("DRES pname: " .. pname) local pconf = ((mconf.properties or NT)[pname] or NT) -- RDF types are shown in in breadcrumbs. if pname == pkar.RDF_TYPE.data then goto skip end if pname == "first" then -- Build a linked list for every first found. for o in ots_it do -- Loop through all first children. local child_s = o if not pref_rep then pref_rep = child_s end logger:debug("local child_s: ", child_s.data) local ll = {} -- Fallback labels. local label local ref = get_single_v(child_s, "ref") if ref then label = (get_single_v(ref, "label") or NT).data else label = (get_single_v(child_s, "label") or NT).data end if not label then label = (get_single_v(child_s, "source_path") or NT).data if label then label = path.basename(label) else label = child_s.data end end while child_s do -- Loop trough all next nodes for each first child. local ref = get_single_v(child_s, "ref") table.insert(ll, { href = pkar.gen_pairtree("/res", ref.data, ".html", true), label = label, tn = get_tn_url(ref), }) child_s = get_single_v(child_s, "next") end table.insert(children, ll) end elseif pname == "next" then -- Sibling. for o in ots_it do ls_next = o.data break end elseif pconf.type == "resource" then -- Relationship. rel[pname] = { label = pconf.label, description = pconf.description, uri = pconf.uri, } for o in ots_it do table.insert(rel[pname], { href = pkar.gen_pairtree("/res", o.data, ".html", true), label = nsm.denormalize_uri(o.data), }) end else -- Descriptive metadata. local attr = { label = pconf.label or pname, description = pconf.description, uri = pconf.uri, } -- TODO differentiate term types for o in ots_it do table.insert(attr, o.data) end table.sort(attr) if p == model.id_to_uri.label then title = attr[1] end table.insert(dmd, attr) end ::skip:: end table.sort( dmd, function(a, b) return ((a.label or a.uri) < (b.label or b.uri)) end ) table.sort(rel) table.sort(children) logger:debug("Lineage:", pp.write(mconf.lineage)) logger:debug("DMD:", pp.write(dmd)) logger:debug("REL:", pp.write(rel)) logger:debug("Children:", pp.write(children)) logger:debug("Breadcrumbs:", pp.write(get_breadcrumbs(mconf))) local pref_rep_url if pref_rep then pref_rep_url = pkar.gen_pairtree("/res", pref_rep.data, ".html", true) pref_rep_file = get_tn_url(pref_rep):gsub(TN_WEB_PATH, MEDIA_WEB_PATH) end out_html = templates.dres.data({ --webroot = M.webroot, site_title = pkar.config.site.title, title = title or s.data, head_tpl = templates.head.data, header_tpl = templates.header.data, mconf = mconf, uri = s, dmd = dmd, rel = rel, children = children, ls_next = ls_next, tn_url = get_tn_url(s), pref_rep = { url = pref_rep_url, file = pref_rep_file, }, icon_url = get_icon_url(mconf.lineage), breadcrumbs = get_breadcrumbs(mconf), rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true), }) local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".html") local ofh = assert(io.open(res_path, "w")) ofh:write(out_html) ofh:close() return true end local function generate_ores(s, mconf) local techmd = {} local rel = {} -- Metadata local attrs = repo.gr:connections(s, term.LINK_OUTBOUND) for p, ots_it in attrs:iter() do local pname = model.uri_to_id[nsm.denormalize_uri(p.data)] or p.data local pconf = ((mconf.properties or NT)[pname] or NT) -- RDF types are shown in in breadcrumbs. if pname == pkar.RDF_TYPE.data then goto skip end if pname == "next" then -- Sibling. for o in ots_it do ls_next = o.data break end elseif pconf.type == "resource" then -- Relationship. rel[pname] = { label = pconf.label, description = pconf.description, uri = pconf.uri, } for o in ots_it do table.insert(rel[pname], { href = pkar.gen_pairtree("/res", o.data, ".html", true), label = nsm.denormalize_uri(o.data), }) end else -- Descriptive metadata. techmd[pname] = { label = pconf.label, description = pconf.description, uri = pconf.uri, } -- TODO differentiate term types for o in ots_it do table.insert(techmd[pname], o.data) end table.sort(techmd[pname]) end ::skip:: end table.sort(techmd) table.sort(rel) logger:debug("Lineage:", pp.write(mconf.lineage)) logger:debug("Breadcrumbs:", pp.write(get_breadcrumbs(mconf))) logger:debug("techmd:", pp.write(techmd)) logger:debug("REL:", pp.write(rel)) -- Transform and move media assets. local dest_fname, dest_dir, dest -- Reused for thumbnail. logger:debug("Transforming resource file.") local res_path = techmd.archive_path if not res_path then error("No file path for File resource!") end local txconf = (mconf.gen or NT).transformers or {} local pres_conf = txconf.pres or {fn = "copy"} -- Set file name to resource ID + source extension. dest_fname = ( s.data:gsub(nsm.get_ns("par"), "") .. (pres_conf.ext or path.extension(res_path[1]))) dest_dir = path.join( M.media_dir, dest_fname:sub(1, 2), dest_fname:sub(3, 4)) dest = path.join(dest_dir, dest_fname) assert(transformers[pres_conf.fn]( res_path[1], dest, table.unpack(pres_conf or NT))) local pres = dest:gsub(pkar.config.htmlgen.out_dir, "") logger:debug("Presentation file: ", pres) -- Thumbnail. local tn if txconf.thumbnail then if txconf.thumbnail.ext then dest_fname = path.splitext(dest_fname) .. txconf.thumbnail.ext end dest = pkar.gen_pairtree(TN_FS_PATH, dest_fname) assert(transformers[txconf.thumbnail.fn]( res_path[1], dest, table.unpack(txconf.thumbnail or NT))) tn = dest:gsub(M.media_dir, TN_WEB_PATH) logger:debug("Thumbnail: ", tn) end out_html = templates.ores.data({ --webroot = M.webroot, site_title = pkar.config.site.title, fname = path.basename(techmd.source_path[1]), head_tpl = templates.head.data, header_tpl = templates.header.data, mconf = mconf, uri = s, techmd = techmd, rel = rel, ls_next = ls_next, icon_url = get_icon_url(mconf.lineage), breadcrumbs = get_breadcrumbs(mconf), pres = pres, thumbnail = tn, rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true), }) local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".html") local ofh = assert(io.open(res_path, "w")) ofh:write(out_html) ofh:close() return true end M.generate_search_idx = function(s, mconf) local rrep = { id = s.data:gsub("^.*/", ""), tn = get_tn_url(s), href = pkar.gen_pairtree("/res", s.data, ".html", true), content_type = mconf.id, type = mconf.lineage, icon = get_icon_url(mconf.lineage), } local function format_value(pname, o) logger:debug("Adding value to " .. pname .. ": " .. ((o or NT).data or "nil")) local v if pname == "type" or pname == "content_type" then v = model.uri_to_id[p] else v = o.data end return v end local attrs = repo.gr:connections(s, term.LINK_OUTBOUND) local fpath for p, ots_it, ots_size in attrs:iter() do local pname if p == model.id_to_uri.content_type then goto skip end if p == model.id_to_uri.source_path then if mconf.types.file then fpath = ots_it() rrep.fname = path.basename(fpath.data) end goto skip end pname = model.uri_to_id[nsm.denormalize_uri(p.data)] if not pname then goto skip end local pconf = (mconf.properties or NT)[pname] or NT -- TODO dereference & index resource values. if idx_ignore[pname] or pconf.type == "resource" then goto skip end local attr -- Quick check if it's multi-valued if ots_size > 1 then attr = {} for o in ots_it do table.insert(attr, format_value(pname, o)) end else attr = format_value(pname, ots_it()) end rrep[pname] = attr -- Add to search index. idx_keys[pname] = true -- Add to search keys. ::skip:: end return rrep end M.generate_ll = function(s) local res_gr = repo.get_rsrc(s) local mconf = get_mconf(s) local tdata = { { id = path.basename(s.data), content_type = mconf.id, }, } for p, ots_it in pairs(res_gr:connections(s, term.LINK_OUTBOUND)) do local pname = model.uri_to_id[nsm.denormalize_uri(p.data)] --if p == pkar.RDF_TYPE then goto skip_p end if not pname then goto skip_p end if pname == "content_type" then goto skip_p end for o in ots_it do -- Find a row where the pname slot has not been occupied. if (mconf.properties[pname] or {}).type == "resource" then o = {data = o.data:gsub(nsm.get_ns("par"), "")} end for i = 1, math.huge do if (tdata[i] or NT)[pname] then goto continue else if tdata[i] then tdata[i][pname] = o.data else tdata[i] = {[pname] = o.data} end break end ::continue:: end end ::skip_p:: end -- FIXME ftcsv encodes nil values as `"nil"`. See -- https://github.com/FourierTransformer/ftcsv/issues/46 return csv.encode(tdata, {encodeNilAs = ""}) end M.generate_resource = function(s) local res_type res_type = get_single_v(s, "content_type") local mconf = model.from_uri(res_type) -- Generate RDF/Turtle doc. local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".ttl") dir.makepath(path.dirname(res_path)) local ofh = assert(io.open(res_path, "w")) for chunk in repo.serialize_rsrc(s, "ttl") do ofh:write(chunk) end ofh:close() -- Generate HTML doc. if mconf.types.collection then assert(generate_coll(s, mconf)) elseif mconf.types.file then assert(generate_ores(s, mconf)) else assert(generate_dres(s, mconf)) end -- Generate JSON rep and append to search index. idx_rep = M.generate_search_idx(s, mconf) json_rep = " " .. json.encode(idx_rep) ofh = assert(io.open(index_path, "a")) ofh:write(json_rep) ofh:write(",\n") -- Hack together the JSON objects in a list. ofh:close() return s end M.generate_resources = function(coll_id) -- TODO It's more complicated than this. Each member in the collection -- must be scanned recursively for outbound links and visited links must -- be noted down to avoid loops. --[[ if coll_id then subject_ts = repo.gr:term_set( term.new_iriref_ns("par:" .. coll_id), triple.POS_S, model.id_to_uri.has_member, triple.POS_P ) else subjects_ts = repo.gr:unique_terms(triple.POS_S) end --]] subjects_ts = repo.gr:unique_terms(triple.POS_S) -- Initialize the JSON template with an opening brace. local ofh = assert(io.open(index_path, "w")) ofh:write("[\n") ofh:close() -- TODO parallelize for s in subjects_ts:iter() do assert(M.generate_resource(s)) end -- Close the open list brace in the JSON template after all the resources -- have been added. ofh = assert(io.open(index_path, "a")) ofh:write(" {}\n]") -- Add empty object to validate the last comma ofh:close() -- Write index keys. ofh = assert(io.open(keys_path, "w")) idx_keys_ls = {} for k in pairs(idx_keys) do table.insert(idx_keys_ls, k) end ofh:write(json.encode(idx_keys_ls)) ofh:close() return true end M.generate_homepage = function() local idx_data = {objects = {}, collections = {}} -- Get all subject of type: Artifact. local s_ts = repo.gr:term_set( pkar.RDF_TYPE, triple.POS_P, term.new_iriref_ns("pas:Artifact"), triple.POS_O ) local i = 1 for s in s_ts:iter() do if i > (pkar.config.htmlgen.max_homepage_items or 10) then break end table.insert(idx_data.objects, { href = pkar.gen_pairtree("/res", s.data, ".html", true), title = get_single_v(s, "label"), submitted = get_single_v(s, "submitted").data, tn = get_tn_url(s), }) i = i + 1 end table.sort( idx_data.objects, function(a, b) return a.submitted < b.submitted end ) s_ts = repo.gr:term_set( pkar.RDF_TYPE, triple.POS_P, term.new_iriref_ns("pas:Collection"), triple.POS_O ) for s in s_ts:iter() do table.insert(idx_data.collections, { href = pkar.gen_pairtree("/res", s.data, ".html", true), title = get_single_v(s, "label"), submitted = get_single_v(s, "submitted").data, tn = get_tn_url(s), }) end table.sort( idx_data.collections, function(a, b) return a.submitted < b.submitted end ) logger:debug(pp.write(idx_data)) out_html = templates.idx.data({ webroot = M.webroot, site_title = pkar.config.site.title, head_tpl = templates.head.data, header_tpl = templates.header.data, nsm = nsm, idx_data = idx_data, }) local idx_path = path.join(pkar.config.htmlgen.out_dir, "index.html") local ofh = assert(io.open(idx_path, "w")) logger:debug("Writing info at ", idx_path) ofh:write(out_html) ofh:close() return true end M.reset_site = function() -- Reset target folders. -- TODO for larger sites, a selective update should be implemented by -- comparing RDF resource timestamps with HTML page timestamps. Post-MVP. if path.isdir(pkar.config.htmlgen.out_dir) then logger:warn("Removing existing web site.") dir.rmtree(pkar.config.htmlgen.out_dir) end -- Recreate asset dir. if path.isdir(M.asset_dir) then logger:warn("Removing existing web assets.") dir.rmtree(M.asset_dir) end dir.makepath(M.asset_dir) -- Copy static assets. logger:info("Copying templates dir " .. datafile.path("templates/assets") .. " to " .. M.asset_dir) assert(dir.clonetree( datafile.path("templates/assets"), M.asset_dir, dir.copyfile) ) end M.generate_site = function(keep) if not keep then M.reset_site() end -- Clear local search index keys. idx_keys = { id = true, content_type = true, type = true, fname = true, } -- Generate individual resource pages, RDF, and JSON index. assert(M.generate_resources()) -- Generate index page. assert(M.generate_homepage()) end return M