Selaa lähdekoodia

Dump archive RDF; fix volksdata binding errors.

scossu 5 päivää sitten
vanhempi
commit
6db9b4a36f
7 muutettua tiedostoa jossa 73 lisäystä ja 27 poistoa
  1. 8 6
      doc/roadmap.md
  2. 4 4
      src/core.lua
  3. 15 8
      src/generator.lua
  4. 22 5
      src/repo.lua
  5. 1 1
      src/submission.lua
  6. 1 1
      src/transformers.lua
  7. 22 2
      src/util/pkar.lua

+ 8 - 6
doc/roadmap.md

@@ -54,17 +54,21 @@ usage and opportunities for expanding adoption in relevant areas.
   - ✓ Watch local folder and trigger deposit
   - ✓ Option to regenerate site after deposit
   - ✓ Option to clean up sources & LL on success
--  Proper collection handling
+-  Proper collection handling
   - ✓ Dedicated template
   - ✓ Link to markdown doc for presentation page
-  - ❏ Handle artifacts as members
+  - ✓ Handle artifacts as members
+- ⚒ Preservation
+  - ✓ Dump archive RDF
+  - ❏ Backup full environment (including config)
+  - ❏ Restore whole archive from RDF & data folder
 - ❏ Content model
   - ❏ Multilingual support
   - ❏ Local overrides
   - ❏ Relatioships inference
   - ❏ Markdown support for individual fields
--  Generator
-  -  Generate site for one collection only
+-  Generator
+  -  Generate site for one collection only
   - ❏ htmlgen option for local file or webserver URL generation
   - ❏ Generate LL (multi)
   - ❏ Generate RDF (multi)
@@ -72,8 +76,6 @@ usage and opportunities for expanding adoption in relevant areas.
   - ❏ Category browsing
   - ❏ Improve search indexing
   - ❏ Enhanced styling and access
-- ❏ CLI
-  - ❏ Dump & restore (whole archive & individual resources)
 - ❏ Testing
     - ❏ Unit tests
     - ❏ >100 resource data set

+ 4 - 4
src/core.lua

@@ -39,10 +39,10 @@ local M = {
       {"warn",  "%T PKAR %-5L ",          "%n", io.stderr},
       {"info",  "%T PKAR %-5L ",          "%n", io.stderr},
       {"debug", "%T PKAR %-5L%f (%S) ",   "%n", io.stderr},
-      timefn=(socket or {}).gettime,  -- use socket.gettime if available
-      report="debug",   -- to which level should internal log events be passed?
-      hookrequire=true, -- also report calls to require()
-      level="debug",    -- output levels up to and including "dbg"
+      timefn=(require "socket" or {}).gettime,
+      report="debug",
+      hookrequire=true,
+      level="debug",
     },
 
     -- Commonly used terms.

+ 15 - 8
src/generator.lua

@@ -25,9 +25,6 @@ local dbg = require "debugger"
 -- "nil" table - for missing key fallback in chaining.
 local NT = {}
 
--- All resource subjects.
-local subjects
-
 -- Extension for type-based icon files.
 local ICON_EXT = ".svg"
 
@@ -584,7 +581,7 @@ M.generate_resource = function(s)
     local res_path = pkar.gen_pairtree(M.res_dir, s.data, ".ttl")
     dir.makepath(path.dirname(res_path))
     local ofh = assert(io.open(res_path, "w"))
-    ofh:write(repo.serialize_rsrc(s, "ttl"))
+    for chunk in repo.serialize_rsrc(s, "ttl") do ofh:write(chunk) end
     ofh:close()
 
     -- Generate HTML doc.
@@ -604,9 +601,19 @@ M.generate_resource = function(s)
 end
 
 
-M.generate_resources = function()
-    -- Look up if subjects are already populated.
-    subjects = subjects or repo.gr:unique_terms(triple.POS_S)
+M.generate_resources = function(coll_id)
+    -- TODO It's more complicated than this. Each member in the collection
+    -- must be scanned recursively for outbound links and visited links must
+    -- be noted down to avoid loops.
+    --[[
+    if coll_id then
+        subject_ts = repo.gr:term_set(
+            term.new_iriref_ns("par:" .. coll_id), triple.POS_S,
+            model.id_to_uri.has_member, triple.POS_P
+        )
+    else subjects_ts = repo.gr:unique_terms(triple.POS_S) end
+    --]]
+    subjects_ts = repo.gr:unique_terms(triple.POS_S)
 
     -- Initialize the JSON template with an opening brace.
     local ofh = assert(io.open(index_path, "w"))
@@ -614,7 +621,7 @@ M.generate_resources = function()
     ofh:close()
 
     -- TODO parallelize
-    for s in subjects:iter() do assert(M.generate_resource(s)) end
+    for s in subjects_ts:iter() do assert(M.generate_resource(s)) end
 
     -- Close the open list brace in the JSON template after all the resources
     -- have been added.

+ 22 - 5
src/repo.lua

@@ -48,15 +48,32 @@ end
 
 M.store_updates = function(tmp_gr, s)
     -- TODO use a transaction when volksdata_lua supports it.
-    logger:debug("Graph: ", tmp_gr:encode("ttl"))
-
-    local stored_gr = graph.new(pkar.store, term.DEFAULT_CTX)
+    logger:debug("Graph: ")
+    for chunk in tmp_gr:encode("ttl") do logger:debug(chunk) end
 
     logger:debug("Removing stored triples.")
-    stored_gr:remove(s)
+    M.gr:remove(s)
 
     logger:info("Storing triples.")
-    return tmp_gr:copy(stored_gr)
+    return tmp_gr:copy(M.gr)
+end
+
+
+--[[ Dump the whole archive RDF to a file stream.
+
+    The result is a software-agnostic RDF representation of the metadata
+    (Turtle) compressed with GZip. The ores data folder can be backed up via
+    OS-level file operations.
+
+    The restore() function, combined with a copy of
+    the ores folder, shall create a fully functional repo.
+
+    TODO configuration backup is not yet implemented and should be included for
+    a completely self-sufficient backup.
+]]
+M.dump = function(fpath, codec)
+    local fh = assert(io.open(fpath, "wb"))
+    for chunk in M.gr:encode(codec or "ttl") do fh:write(chunk) end
 end
 
 

+ 1 - 1
src/submission.lua

@@ -190,7 +190,7 @@ end
 --]]
 local function rsrc_to_graph(rsrc)
     local rmod = model.types[rsrc.content_type]
-    logger:info("Updating resource md: ", pp.write(rsrc))
+    logger:debug("Updating resource md: ", pp.write(rsrc))
 
     local s = term.new_iriref_ns(rsrc.id)
     local gr = graph.new(nil)

+ 1 - 1
src/transformers.lua

@@ -11,7 +11,7 @@ M = {}
 
 -- Resize an image to a maximum size on either dimension.
 M.img_resize = function(src, dest, size)
-    print(("Resizing image %s with size %d to %s"):format(src, size, dest))
+    logger:debug(("Resizing image %s with size %d to %s"):format(src, size, dest))
     -- TODO Make streaming if possible.
     dir.makepath(path.dirname(dest))
     local img = vips.Image.thumbnail(src, size)

+ 22 - 2
src/util/pkar.lua

@@ -63,7 +63,7 @@ gen_site = cli.command {
     function(args) gen.generate_site() end
 }
 
-gen_rdf = cli.command {
+dump_res = cli.command {
     "Generate an RDF representation of a resource.",
 
     cli.positional "id" {
@@ -93,7 +93,7 @@ gen_rdf = cli.command {
     end,
 }
 
-gen_ll = cli.command {
+dump_ll = cli.command {
     "Generate a laundry list for a stored resource.",
 
     cli.positional "id" {
@@ -118,6 +118,26 @@ gen_ll = cli.command {
     end
 }
 
+dump_archive = cli.command {
+    "Generate a RDF representation of the full archive.",
+
+    cli.positional "path" {
+        "Destination file path.",
+        type = cli.string,
+    },
+    cli.flag "f,format" {
+        "RDF serialization format. One of `ttl` [default], `nt`.",
+        type = cli.string,
+        default = "ttl",
+    },
+
+    function(args)
+        repo.dump(args.path, args.format)
+        print ("File written to ", args.path)
+    end,
+}
+
+
 cli.program {
     "Pocket Archive command line interface.",
 }