generator.lua 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. local csv = require "ftcsv"
  2. local datafile = require "datafile"
  3. local dir = require "pl.dir"
  4. local etlua = require "etlua"
  5. local json = require "cjson"
  6. local plpath = require "pl.path"
  7. local pp = require "pl.pretty"
  8. local nsm = require "volksdata.namespace"
  9. local term = require "volksdata.term"
  10. local triple = require "volksdata.triple"
  11. local graph = require "volksdata.graph"
  12. local pkar = require "pocket_archive"
  13. local logger = pkar.logger
  14. local model = require "pocket_archive.model"
  15. local repo = require "pocket_archive.repo"
  16. local transformers = require "pocket_archive.transformers"
  17. local dbg = require "debugger"
  18. -- "nil" table - for missing key fallback in chaining.
  19. local NT = {}
  20. -- All resource subjects.
  21. local subjects
  22. local asset_dir = pkar.config.htmlgen.out_dir
  23. local index_path = plpath.join(asset_dir, "js", "fuse_index.json")
  24. local keys_path = plpath.join(asset_dir, "js", "fuse_keys.json")
  25. local idx_ignore = {["pas_first"] = true, ["pas:next"] = true,}
  26. -- Collector for all search term keys.
  27. local idx_keys = {}
  28. -- HTML templates. Compile them only once.
  29. -- TODO Add override for user-maintained templates.
  30. local templates = {
  31. idx = {file = "templates/index.html"},
  32. dres = {file = "templates/dres.html"},
  33. ores = {file = "templates/ores.html"},
  34. head = {file = "templates/head_common.html"},
  35. header = {file = "templates/header.html"},
  36. }
  37. for _, tpl in pairs(templates) do
  38. local fh = datafile.open(tpl.file)
  39. tpl.data = assert(etlua.compile(fh:read("a")))
  40. end
  41. -- HTML generator module.
  42. local M = {
  43. res_dir = plpath.join(pkar.config.htmlgen.out_dir, "res"),
  44. asset_dir = asset_dir,
  45. media_dir = plpath.join(pkar.config.htmlgen.out_dir, "media"),
  46. webroot = "", -- TODO switch depending on local FS or webserver generation.
  47. }
  48. local function get_breadcrumbs(mconf)
  49. -- Breadcrumbs, from top class to current class.
  50. -- Also verify if it's a File subclass.
  51. local breadcrumbs = {}
  52. for i = 1, #mconf.lineage do
  53. breadcrumbs[i] = {
  54. mconf.lineage[i],
  55. model.models[mconf.lineage[i]].label
  56. }
  57. end
  58. return breadcrumbs
  59. end
  60. local function get_tn_url(s)
  61. if repo.gr:attr(s, pkar.RDF_TYPE)[pkar.FILE_T.hash] then
  62. -- The subject is a file.
  63. tn_fname = (s.data:gsub(pkar.PAR_NS, "") .. ".jpg") -- FIXME do not hardcode.
  64. return plpath.join(
  65. M.media_dir, tn_fname:sub(1, 2), tn_fname:sub(3, 4), tn_fname)
  66. end
  67. -- Recurse through all first children until one with a thumbnail, or a
  68. -- leaf without children, is found.
  69. local first_child
  70. _, first_child = next(repo.gr:attr(s, pkar.FIRST_P))
  71. if first_child then return get_tn_url(first_child) end
  72. end
  73. local function generate_dres(s, mconf)
  74. local dmd = {}
  75. local rel = {}
  76. local children = {}
  77. local title
  78. -- Metadata
  79. local attrs = repo.gr:connections(s, term.LINK_OUTBOUND)
  80. for p, ots in pairs(attrs) do
  81. local pname = nsm.denormalize_uri(p.data)
  82. p_label = ((mconf.properties or NT)[pname] or NT).label
  83. -- RDF types are shown in in breadcrumbs.
  84. if pname == "rdf:type" then goto skip
  85. elseif ((mconf.properties or NT)[pname] or NT).type == "rel" then
  86. -- Relationship.
  87. rel[pname] = {label = p_label, uri = pname}
  88. for _, o in pairs(ots) do table.insert(dmd[pname], o.data) end
  89. elseif pname == "pas:first" then
  90. -- Build a linked list for every first found.
  91. for _, o in pairs(ots) do
  92. -- Loop through all first children.
  93. local child_s = o
  94. logger:debug("local child_s: ", child_s.data)
  95. local ll = {}
  96. -- Fallback labels.
  97. local label
  98. _, label = next(repo.gr:attr(child_s, pkar.DC_TITLE_P))
  99. if label then label = label.data
  100. else
  101. _, label = next(repo.gr:attr(child_s, pkar.PATH_P))
  102. if label then label = plpath.basename(label.data)
  103. else label = child_s.data end
  104. end
  105. while child_s do
  106. -- Loop trough all next nodes for each first child.
  107. --require "debugger".assert(get_tn_url(child_s))
  108. table.insert(ll, {
  109. href = pkar.gen_pairtree(
  110. "/res", child_s.data, ".html", true),
  111. label = label,
  112. tn = get_tn_url(child_s):gsub(M.media_dir, "/media/tn"),
  113. })
  114. logger:debug("Child label for ", child_s.data, ": ", ll[#ll].label or "nil")
  115. -- There can only be one "next"
  116. _, child_s = next(repo.gr:attr(child_s, pkar.NEXT_P))
  117. end
  118. table.insert(children, ll)
  119. end
  120. elseif pname == "pas:next" then
  121. -- Sibling.
  122. for _, o in pairs(ots) do ls_next = o.data break end
  123. else
  124. -- Descriptive metadata.
  125. local attr = {label = p_label, uri = pname}
  126. -- TODO differentiate term types
  127. for _, o in pairs(ots) do table.insert(attr, o.data) end
  128. table.sort(attr)
  129. if p == pkar.DC_TITLE_P then title = attr[1] end
  130. table.insert(dmd, attr)
  131. end
  132. ::skip::
  133. end
  134. table.sort(
  135. dmd, function(a, b)
  136. return ((a.label or a.uri) < (b.label or b.uri))
  137. end
  138. )
  139. table.sort(rel)
  140. table.sort(children)
  141. logger:debug("Lineage:", pp.write(mconf.lineage))
  142. logger:debug("DMD:", pp.write(dmd))
  143. logger:debug("REL:", pp.write(rel))
  144. logger:debug("Children:", pp.write(children))
  145. logger:debug("Breadcrumbs:", pp.write(get_breadcrumbs(mconf)))
  146. out_html = templates.dres.data({
  147. --webroot = M.webroot,
  148. site_title = pkar.config.site.title or pkar.default_title,
  149. title = title or s.data,
  150. head_tpl = templates.head.data,
  151. header_tpl = templates.header.data,
  152. mconf = mconf,
  153. uri = s,
  154. dmd = dmd,
  155. rel = rel,
  156. children = children,
  157. ls_next = ls_next,
  158. breadcrumbs = get_breadcrumbs(mconf),
  159. rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true),
  160. })
  161. local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".html")
  162. local ofh = assert(io.open(res_path, "w"))
  163. ofh:write(out_html)
  164. ofh:close()
  165. return true
  166. end
  167. local function generate_ores(s, mconf)
  168. local techmd = {}
  169. local rel = {}
  170. -- Metadata
  171. local attrs = repo.gr:connections(s, term.LINK_OUTBOUND)
  172. for p, ots in pairs(attrs) do
  173. local pname = nsm.denormalize_uri(p.data)
  174. p_label = ((mconf.properties or NT)[pname] or NT).label
  175. -- RDF types are shown in in breadcrumbs.
  176. if pname == "rdf:type" then goto skip
  177. elseif ((mconf.properties or NT)[pname] or NT).type == "rel" then
  178. -- Relationship.
  179. rel[pname] = {label = p_label, uri = pname}
  180. for _, o in pairs(ots) do table.insert(techmd[pname], o.data) end
  181. elseif pname == "pas:next" then
  182. -- Sibling.
  183. for _, o in pairs(ots) do ls_next = o.data break end
  184. else
  185. -- Descriptive metadata.
  186. techmd[pname] = {label = p_label, uri = pname}
  187. -- TODO differentiate term types
  188. for _, o in pairs(ots) do table.insert(techmd[pname], o.data) end
  189. table.sort(techmd[pname])
  190. end
  191. ::skip::
  192. end
  193. table.sort(techmd)
  194. table.sort(rel)
  195. logger:debug("Lineage:", pp.write(mconf.lineage))
  196. logger:debug("Breadcrumbs:", pp.write(get_breadcrumbs(mconf)))
  197. logger:debug("techmd:", pp.write(techmd))
  198. logger:debug("REL:", pp.write(rel))
  199. -- Transform and move media assets.
  200. local dest_fname, dest_dir, dest -- Reused for thumbnail.
  201. logger:info("Transforming resource file.")
  202. local res_path = techmd["pas:path"]
  203. if not res_path then error("No file path for File resource!") end
  204. local txconf = (mconf.transformers or NT).deliverable or {fn = "copy"}
  205. -- Set file name to resource ID + source extension.
  206. dest_fname = (
  207. s.data:gsub(pkar.PAR_NS, "") ..
  208. (txconf.ext or plpath.extension(res_path[1])))
  209. dest_dir = plpath.join(
  210. M.media_dir, dest_fname:sub(1, 2), dest_fname:sub(3, 4))
  211. dir.makepath(dest_dir)
  212. dest = plpath.join(dest_dir, dest_fname)
  213. assert(transformers[txconf.fn](
  214. res_path[1], dest, table.unpack(txconf or NT)))
  215. local deliverable = dest:gsub(pkar.config.htmlgen.out_dir, "")
  216. logger:info("Access file: ", deliverable)
  217. -- Thumbnail.
  218. local tn
  219. txconf = (mconf.transformers or NT).thumbnail
  220. if txconf then
  221. if txconf.ext then
  222. dest_fname = plpath.splitext(dest_fname) .. txconf.ext
  223. end
  224. dest_dir = plpath.join(
  225. M.media_dir, "tn", dest_fname:sub(1, 2), dest_fname:sub(3, 4))
  226. dir.makepath(dest_dir)
  227. dest = plpath.join(dest_dir, dest_fname)
  228. assert(transformers[txconf.fn](
  229. res_path[1], dest, table.unpack(txconf or NT)))
  230. tn = dest:gsub(M.media_dir, "/media/tn")
  231. logger:info("Thumbnail: ", tn)
  232. end
  233. out_html = templates.ores.data({
  234. --webroot = M.webroot,
  235. site_title = pkar.config.site.title or pkar.default_title,
  236. pname = plpath.basename(techmd["pas:sourcePath"][1]),
  237. head_tpl = templates.head.data,
  238. header_tpl = templates.header.data,
  239. mconf = mconf,
  240. uri = s,
  241. techmd = techmd,
  242. rel = rel,
  243. ls_next = ls_next,
  244. breadcrumbs = get_breadcrumbs(mconf),
  245. deliverable = deliverable,
  246. thumbnail = tn,
  247. rdf_href = pkar.gen_pairtree("/res", s.data, ".ttl", true),
  248. })
  249. local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".html")
  250. local ofh = assert(io.open(res_path, "w"))
  251. ofh:write(out_html)
  252. ofh:close()
  253. return true
  254. end
  255. M.generate_res_idx = function(s, mconf)
  256. local rrep = {
  257. id = nsm.denormalize_uri(s.data),
  258. tn = get_tn_url(s):gsub(M.media_dir, "/media/tn"),
  259. href = pkar.gen_pairtree("/res", s.data, ".html", true),
  260. }
  261. local attrs = repo.gr:connections(s, term.LINK_OUTBOUND)
  262. local function format_value(pname, o)
  263. logger:debug("Adding value to " .. pname .. ": " .. ((o or NT).data or "nil"))
  264. local v
  265. if pname == "rdf:type" or pname == "pas:contentType" then
  266. v = nsm.denormalize_uri(o.data)
  267. else v = o.data
  268. end
  269. return v
  270. end
  271. for p, ots in pairs(attrs) do
  272. local pname = nsm.denormalize_uri(p.data)
  273. local pconf = (mconf.properties or NT)[pname] or NT
  274. if idx_ignore[pname] or pconf.type == "resource" then goto skip end
  275. local attr
  276. -- Quick check if it's multi-valued
  277. local o
  278. if next(ots, next(ots)) then
  279. attr = {}
  280. for _, o in pairs(ots) do
  281. table.insert(attr, format_value(pname, o))
  282. end
  283. else
  284. _, o = next(ots)
  285. attr = format_value(pname, o)
  286. end
  287. rrep[pconf.label or pname] = attr -- Add to search index.
  288. idx_keys[pname] = true -- Add to search keys.
  289. ::skip::
  290. end
  291. return rrep
  292. end
  293. M.generate_ll = function(s)
  294. local res_gr = repo.get_rsrc(s)
  295. tdata = {}
  296. for p, ots in pairs(res_gr:connections(s, term.LINK_OUTBOUND)) do
  297. pname = nsm.denormalize_uri(p.data)
  298. for _, o in pairs (ots) do
  299. -- Find a row where the pname slot has not been occupied.
  300. for i = 1, math.huge do
  301. if (tdata[i] or NT)[pname] then goto continue
  302. else
  303. if tdata[i] then tdata[i][pname] = o.data
  304. else tdata[i] = {[pname] = o.data} end
  305. break
  306. end
  307. ::continue::
  308. end
  309. end
  310. end
  311. -- FIXME ftcsv encodes nil values as `"nil"`. See
  312. -- https://github.com/FourierTransformer/ftcsv/issues/46
  313. return csv.encode(tdata)
  314. end
  315. M.generate_resource = function(s)
  316. local res_type
  317. _, res_type = next(repo.gr:attr(s, pkar.CONTENT_TYPE_P))
  318. local mconf = model.models[res_type.data]
  319. -- Generate RDF/Turtle doc.
  320. local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".ttl")
  321. local ofh = assert(io.open(res_path, "w"))
  322. ofh:write(repo.serialze_rsrc(s, "ttl"))
  323. ofh:close()
  324. -- Generate HTML doc.
  325. if mconf.types["pas:File"] then assert(generate_ores(s, mconf))
  326. else assert(generate_dres(s, mconf)) end
  327. -- Generate JSON rep and append to search index.
  328. idx_rep = M.generate_res_idx(s, mconf)
  329. json_rep = " " .. json.encode(idx_rep)
  330. ofh = assert(io.open(index_path, "a"))
  331. ofh:write(json_rep)
  332. ofh:write(",\n") -- Hack together the JSON objects in a list.
  333. ofh:close()
  334. return s
  335. end
  336. M.generate_resources = function()
  337. -- Look up if subjects are already populated.
  338. subjects = subjects or repo.gr:unique_terms(triple.POS_S)
  339. -- Initialize the JSON template with an opening brace.
  340. local ofh = assert(io.open(index_path, "w"))
  341. ofh:write("[\n")
  342. ofh:close()
  343. -- TODO parallelize
  344. for _, s in pairs(subjects) do assert(M.generate_resource(s)) end
  345. -- Close the open list brace in the JSON template after all the resources
  346. -- have been added.
  347. ofh = assert(io.open(index_path, "a"))
  348. ofh:write("{}]") -- Add empty object to validate the last comma
  349. ofh:close()
  350. -- Write index keys.
  351. ofh = assert(io.open(keys_path, "w"))
  352. idx_keys_ls = {}
  353. for k in pairs(idx_keys) do table.insert(idx_keys_ls, k) end
  354. ofh:write(json.encode(idx_keys_ls))
  355. ofh:close()
  356. return true
  357. end
  358. M.generate_idx = function()
  359. local obj_idx = {}
  360. -- Get all subject of type: Artifact.
  361. s_ts = repo.gr:term_set(
  362. pkar.RDF_TYPE, triple.POS_P,
  363. term.new_iriref_ns("pas:Artifact"), triple.POS_O
  364. )
  365. for _, s in pairs(s_ts) do
  366. local title, created
  367. _, title = next(repo.gr:attr(s, pkar.DC_TITLE_P))
  368. _, created = next(repo.gr:attr(s, pkar.DC_CREATED_P))
  369. local obj = {
  370. href = pkar.gen_pairtree("/res", s.data, ".html", true),
  371. title = title,
  372. created = created.data,
  373. tn = get_tn_url(s):gsub(M.media_dir, "/media/tn"),
  374. }
  375. table.insert(obj_idx, obj)
  376. end
  377. table.sort(obj_idx, function(a, b) return a.created < b.created end)
  378. logger:debug(pp.write(obj_idx))
  379. out_html = templates.idx.data({
  380. webroot = M.webroot,
  381. title = pkar.config.site.title or pkar.default_title,
  382. site_title = pkar.config.site.title or pkar.default_title,
  383. head_tpl = templates.head.data,
  384. header_tpl = templates.header.data,
  385. nsm = nsm,
  386. obj_idx = obj_idx,
  387. })
  388. local idx_path = plpath.join(pkar.config.htmlgen.out_dir, "index.html")
  389. local ofh = assert(io.open(idx_path, "w"))
  390. logger:debug("Writing info at ", idx_path)
  391. ofh:write(out_html)
  392. ofh:close()
  393. return true
  394. end
  395. M.generate_site = function()
  396. -- Reset target folders.
  397. -- TODO for larger sites, a selective update should be implemented by
  398. -- comparing RDF resource timestamps with HTML page timestamps. Post-MVP.
  399. if plpath.isdir(M.res_dir) then dir.rmtree(M.res_dir) end
  400. dir.makepath(M.res_dir)
  401. --[[
  402. if plpath.isdir(M.asset_dir) then dir.rmtree(M.asset_dir) end
  403. dir.makepath(M.asset_dir)
  404. --]]
  405. if plpath.isdir(M.media_dir) then dir.rmtree(M.media_dir) end
  406. dir.makepath(plpath.join(M.media_dir, "tn"))
  407. -- Copy static assets.
  408. dir.clonetree("templates/assets", M.asset_dir, dir.copyfile)
  409. -- Generate individual resource pages, RDF, and JSON index.
  410. assert(M.generate_resources())
  411. -- Generate index page.
  412. assert(M.generate_idx())
  413. end
  414. return M