Przeglądaj źródła

Initial search implementation (index + JS, no UI).

scossu 16 godzin temu
rodzic
commit
d85a5338c4

+ 6 - 2
README.md

@@ -161,10 +161,14 @@ Simple road map for a rough prototype:
   - ✓ Index
   - ✓ Resource
   - ✓ Static assets
-  - ✓ Transformers
 - ⚒ Non-HTML generators
   - ✓ RDF (turtle)
   - ⎊ LL
+  - ✓ Transformers
+  - ⚒ JS search engine index
 - ⎊ Front end
-  - ⎊ JS search engine
+  -  JS search engine
   - ⎊ Styling
+      - ⎊ Default type icons
+- ⎊ QA
+  - ⎊ >100 resource data set

+ 1 - 1
config/model/typedef/artifact.lua

@@ -12,7 +12,7 @@ return {
         },
         ["pas:hasFile"] = {
             label = "Has file",
-            type = "rel",
+            type = "resource",
             range = {["pas:File"] = true},
         },
     },

+ 2 - 2
config/model/typedef/postcard.lua

@@ -5,12 +5,12 @@ return {
     properties = {
         ["pas:recto"] = {
             label = "Recto",
-            type = "rel",
+            type = "resource",
             range = {["pas:Part"] = true},
         },
         ["pas:verso"] = {
             label = "Verso",
-            type = "rel",
+            type = "resource",
             range = {["pas:Part"] = true},
         },
     }

+ 1 - 0
pocket_archive-scm-1.rockspec

@@ -23,6 +23,7 @@ dependencies = {
    "csv",
    "datafile",
    "etlua",
+   "lua-cjson",
    "lua-vips",
    "penlight",
    "sllog",

+ 96 - 5
src/html_generator.lua

@@ -1,6 +1,7 @@
 local datafile = require "datafile"
 local dir = require "pl.dir"
 local etlua = require "etlua"
+local json = require "cjson"
 local plpath = require "pl.path"
 local pp = require "pl.pretty"
 
@@ -23,6 +24,16 @@ local NT = {}
 -- Default store graph to search all triples.
 local gr
 
+-- All resource subjects.
+local subjects
+
+local asset_dir = pkar.config.htmlgen.out_dir
+local index_path = plpath.join(asset_dir, "js", "fuse_index.json")
+local keys_path = plpath.join(asset_dir, "js", "fuse_keys.json")
+local idx_ignore = {["pas_first"] = true, ["pas:next"] = true,}
+-- Collector for all search term keys.
+local idx_keys = {}
+
 -- HTML templates. Compile them only once.
 -- TODO Add override for user-maintained templates.
 local fh, idx_tpl, dres_tpl, ores_tpl
@@ -46,7 +57,7 @@ fh:close()
 -- HTML generator module.
 local M = {
     res_dir = plpath.join(pkar.config.htmlgen.out_dir, "res"),
-    asset_dir = plpath.join(pkar.config.htmlgen.out_dir, "assets"),
+    asset_dir = asset_dir,
     media_dir = plpath.join(pkar.config.htmlgen.out_dir, "media"),
 }
 
@@ -282,6 +293,46 @@ M.get_graph = function(s)
 end
 
 
+M.generate_res_idx = function(s, mconf)
+    local rrep = {id = nsm.denormalize_uri(s.data)}
+    local attrs = gr:connections(s, term.LINK_OUTBOUND)
+
+    local function format_value(fname, o)
+        logger:debug("Adding value to " .. fname .. ": " .. ((o or NT).data or "nil"))
+        local v
+        if fname == "rdf:type" or fname == "pas:contentType" then
+            v = nsm.denormalize_uri(o.data)
+        else v = o.data
+        end
+        return v
+    end
+
+    for p, ots in pairs(attrs) do
+        local fname = nsm.denormalize_uri(p.data)
+        local pconf = (mconf.properties or NT)[fname] or NT
+        if idx_ignore[fname] or pconf.type == "resource" then goto skip end
+
+        local attr
+        -- Quick check if it's multi-valued
+        local o
+        if next(ots, next(ots)) then
+            attr = {}
+            for _, o in pairs(ots) do
+                table.insert(attr, format_value(fname, o))
+            end
+        else
+            _, o = next(ots)
+            attr = format_value(fname, o)
+        end
+
+        rrep[pconf.label or fname] = attr  -- Add to search index.
+        idx_keys[fname] = true  -- Add to search keys.
+        ::skip::
+    end
+    return rrep
+end
+
+
 M.generate_resource = function(s)
     local res_type
     _, res_type = next(gr:attr(s, pkar.CONTENT_TYPE_P))
@@ -296,17 +347,46 @@ M.generate_resource = function(s)
     ofh:close()
 
     -- Generate HTML doc.
-    if mconf.types["pas:File"] then return generate_ores(s, mconf)
-    else return generate_dres(s, mconf) end
+    if mconf.types["pas:File"] then assert(generate_ores(s, mconf))
+    else assert(generate_dres(s, mconf)) end
+
+    -- Generate JSON rep and append to search index.
+    idx_rep = M.generate_res_idx(s, mconf)
+    json_rep = "  " .. json.encode(idx_rep)
+    ofh = assert(io.open(index_path, "a"))
+    ofh:write(json_rep)
+    ofh:write(",\n")  -- Hack together the JSON objects in a list.
+    ofh:close()
+
+    return s
 end
 
 
 M.generate_resources = function()
-    local subjects = gr:unique_terms(triple.POS_S)
+    -- Look up if subjects are already populated.
+    subjects = subjects or gr:unique_terms(triple.POS_S)
+
+    -- Initialize the JSON template with an opening brace.
+    local ofh = assert(io.open(index_path, "w"))
+    ofh:write("[\n")
+    ofh:close()
 
     -- TODO parallelize
     for _, s in pairs(subjects) do assert(M.generate_resource(s)) end
 
+    -- Close the open list brace in the JSON template after all the resources
+    -- have been added.
+    ofh = assert(io.open(index_path, "a"))
+    ofh:write("{}]")  -- Add empty object to validate the last comma
+    ofh:close()
+
+    -- Write index keys.
+    ofh = assert(io.open(keys_path, "w"))
+    idx_keys_ls = {}
+    for k in pairs(idx_keys) do table.insert(idx_keys_ls, k) end
+    ofh:write(json.encode(idx_keys_ls))
+    ofh:close()
+
     return true
 end
 
@@ -353,18 +433,29 @@ end
 
 
 M.generate_site = function()
+    -- Reset target folders.
+    -- TODO for larger sites, a selective update should be implemented by
+    -- comparing RDF resource timestamps with HTML page timestamps. Post-MVP.
     if plpath.isdir(M.res_dir) then dir.rmtree(M.res_dir) end
     dir.makepath(M.res_dir)
+    --[[
     if plpath.isdir(M.asset_dir) then dir.rmtree(M.asset_dir) end
     dir.makepath(M.asset_dir)
+    --]]
     if plpath.isdir(M.media_dir) then dir.rmtree(M.media_dir) end
     dir.makepath(plpath.join(M.media_dir, "tn"))
 
+    -- Set module-level graph handle.
     gr = graph.new(pkar.store, term.DEFAULT_CTX)
 
+    -- Copy static assets.
+    dir.clonetree("templates/assets", M.asset_dir, dir.copyfile)
+
+    -- Generate individual resource pages, RDF, and JSON index.
     assert(M.generate_resources())
+
+    -- Generate index page.
     assert(M.generate_idx())
-    dir.clonetree("templates/assets", plpath.dirname(M.asset_dir), dir.copyfile)
 end
 
 

Plik diff jest za duży
+ 8 - 0
templates/assets/js/fuse.basic.min.js


Plik diff jest za duży
+ 8 - 0
templates/assets/js/fuse.min.js


+ 48 - 0
templates/index.html

@@ -2,6 +2,54 @@
 <html>
     <head>
         <%- header_tpl({site_title = site_title, title = "Index"}) %>
+        <!-- JS client-side search. -->
+        <script src="/js/fuse.min.js"></script>
+        <script type="application/javascript">
+            async function get_json(url) {
+                try {
+                    const response = await fetch(url);
+                    if (!response.ok) {
+                        throw new Error(`Response status: ${response.status}`);
+                    }
+
+                    const json = await response.json();
+                    console.log(json);
+
+                    return json;
+                } catch (error) {
+                    console.error(error.message);
+                }
+            }
+            async function fuse_init() {
+                let [idx_json, keys_json] = await Promise.all([
+                    get_json('/js/fuse_index.json'),
+                    get_json('/js/fuse_keys.json')
+                ]);
+
+                const fuseOptions = {
+                    // isCaseSensitive: false,
+                    // includeScore: false,
+                    // ignoreDiacritics: false,
+                    // shouldSort: true,
+                    // includeMatches: false,
+                    // findAllMatches: false,
+                    // minMatchCharLength: 1,
+                    // location: 0,
+                    // threshold: 0.6,
+                    // distance: 100,
+                    // useExtendedSearch: false,
+                    // ignoreLocation: false,
+                    // ignoreFieldNorm: false,
+                    // fieldNormWeight: 1,
+                    keys: keys_json,
+                };
+                return new Fuse(idx_json, fuseOptions);
+            }
+
+            let fuse;
+            (async () => {fuse = await fuse_init()})();
+        </script>
+
     </head>
     <body>
         <header>

Niektóre pliki nie zostały wyświetlone z powodu dużej ilości zmienionych plików