Browse Source

Merge branch 'local_config'

Stefano Cossu 4 days ago
parent
commit
5cfd245e61

+ 1 - 0
README.md

@@ -105,6 +105,7 @@ luarocks path >> ~/.bashrc
 cd ../
 git clone https://github.com/scossu/Penlight.git
 cd Penlight
+git checkout clonetree
 luarocks build --local
 cd ../pocket_archive
 ```

+ 2 - 1
config/app.lua

@@ -4,7 +4,8 @@ local plpath = require "pl.path"
 
 local ROOT = os.getenv("PKAR_ROOT")
 if not ROOT then
-    print(
+    io.output(io.stderr)
+    io.write(
         "PKAR_ROOT environment variable is not set.\n" ..
         "Data will be written to a TEMPORARY location that " ..
         "may NOT SURVIVE a reboot!"

+ 0 - 0
config/model/typedef/agent.lua → config/model/schema/agent.lua


+ 33 - 0
config/model/schema/anything.lua

@@ -0,0 +1,33 @@
+return {
+    core = true,
+
+    properties = {
+        ext_id = {
+            uri = "dc:identifier",
+            label = "External system ID",
+            type = "string",
+        },
+        alt_label = {
+            uri = "dc:alternative",
+            label = "Alternative Label",
+            type = "string",
+        },
+        description = {
+            uri = "dc:description",
+            label = "Description",
+            type = "string",
+        },
+        last_modified_by = {
+            uri = "dc:contributor",
+            label = "Last Updated By",
+            type = "rel",
+            range = {agent = true},
+        },
+        submitted_by = {
+            uri = "dc:creator",
+            label = "Submitted By",
+            type = "rel",
+            range = {agent = true},
+        },
+    },
+}

+ 24 - 0
config/model/schema/artifact.lua

@@ -0,0 +1,24 @@
+return {
+    core = true,
+
+    properties = {
+        date = {
+            uri = "pas:date",
+            label = "Date",
+            description = "Date related to the artifact.",
+            type = "string",
+        },
+        location_descr = {
+            uri = "pas:locationDescription",
+            label = "Location description",
+            description = "Verbal description of the artifact's location.",
+            type = "string",
+        },
+        location_url = {
+            uri = "pas:locationUrl",
+            label = "Geolocation URL",
+            description = "Link to a map locating the artifact.",
+            type = "string",
+        },
+    },
+}

+ 14 - 0
config/model/schema/collection.lua

@@ -0,0 +1,14 @@
+return {
+    core = true,
+
+    properties = {
+        long_description = {
+            uri = "pas:longDescription",
+            label = "Long description",
+            description = "Text document in Markdown format with a detailed \z
+                description of the collection.",
+            type = "resource",
+            range = {file = true},
+        },
+    }
+}

+ 0 - 0
config/model/typedef/document.lua → config/model/schema/document.lua


+ 12 - 0
config/model/schema/file.lua

@@ -0,0 +1,12 @@
+return {
+    core = true,
+
+    properties = {
+        thumbnail = {
+            uri = "pas:thumbnail",
+            label = "Thumbnail",
+            type = "string",
+            max_cardinality = 1,
+        },
+    }
+}

+ 0 - 0
config/model/typedef/part.lua → config/model/schema/part.lua


+ 0 - 0
config/model/typedef/postcard.lua → config/model/schema/postcard.lua


+ 0 - 0
config/model/typedef/proxy.lua → config/model/schema/proxy.lua


+ 0 - 0
config/model/typedef/still_image.lua → config/model/schema/still_image.lua


+ 0 - 0
config/model/typedef/still_image_file.lua → config/model/schema/still_image_file.lua


+ 6 - 34
config/model/typedef/anything.lua → core_schema/anything.lua

@@ -2,7 +2,7 @@ return {
     uri = "pas:Anything",
     label = "Anything",
     description = "Superclass of every resource type in the system.",
-    abstract = "true",
+    abstract = true,
 
     properties = {
         source_path = {
@@ -13,6 +13,7 @@ return {
                 root.",
             type = "string",
             max_cardinality = 1,
+            system = true,
         },
         content_type = {
             uri = "pas:contentType",
@@ -20,6 +21,7 @@ return {
             type = "resource",
             min_cardinality = 1,
             max_cardinality = 1,
+            system = true,
         },
         --[[
         id = {
@@ -37,11 +39,7 @@ return {
                 was created or updated in.",
             type = "resource",
             min_cardinality = 1,
-        },
-        ext_id = {
-            uri = "dc:identifier",
-            label = "External system ID",
-            type = "string",
+            system = true,
         },
         label = {
             uri = "dc:title",
@@ -49,22 +47,13 @@ return {
             type = "string",
             max_cardinality = 1,
         },
-        alt_label = {
-            uri = "dc:alternative",
-            label = "Alternative Label",
-            type = "string",
-        },
-        description = {
-            uri = "dc:description",
-            label = "Description",
-            type = "string",
-        },
         submitted = {
             uri = "dc:dateSubmitted",
             label = "Submitted On",
             type = "datetime",
             min_cardinality = 1,
             max_cardinality = 1,
+            system = true,
         },
         last_modified = {
             uri = "dc:modified",
@@ -72,24 +61,7 @@ return {
             type = "datetime",
             min_cardinality = 1,
             max_cardinality = 1,
-        },
-        submitted_by = {
-            uri = "dc:creator",
-            label = "Submitted By",
-            type = "rel",
-            range = {agent = true},
-        },
-        created_by = {
-            uri = "dc:creator",
-            label = "Created By",
-            type = "rel",
-            range = {agent = true},
-        },
-        last_modified_by = {
-            uri = "dc:contributor",
-            label = "Last Updated By",
-            type = "rel",
-            range = {agent = true},
+            system = true,
         },
     },
 }

+ 4 - 9
config/model/typedef/artifact.lua → core_schema/artifact.lua

@@ -15,6 +15,7 @@ return {
             label = "First child",
             type = "resource",
             range = {brick = true},
+            system = true,
         },
         pref_rep = {
             uri = "pas:hasPreferredRepresentation",
@@ -37,16 +38,10 @@ return {
             description = "Date related to the artifact.",
             type = "string",
         },
-        location_descr = {
+        location = {
             uri = "pas:locationDescription",
-            label = "Location description",
-            description = "Verbal description of the artifact's location.",
-            type = "string",
-        },
-        location_url = {
-            uri = "pas:locationUrl",
-            label = "Geolocation URL",
-            description = "Link to a map locating the artifact.",
+            label = "Location",
+            description = "Generic information about the artifact's location.",
             type = "string",
         },
     },

+ 4 - 0
config/model/typedef/brick.lua → core_schema/brick.lua

@@ -26,12 +26,14 @@ return {
             uri = "pas:first",
             label = "First child",
             type = "resource",
+            system = true,
         },
         next = {
             uri = "pas:next",
             label = "Next sibling",
             type = "resource",
             max_cardinality = 1,
+            system = true,
         },
         ref = {
             uri = "pas:ref",
@@ -39,11 +41,13 @@ return {
             description = "The actual resource the brick refers to.",
             type = "resource",
             max_cardinality = 1,
+            system = true,
         },
         has_member = {
             uri = "pas:hasMember",
             label = "Member",
             type = "resource",
+            system = true,
         },
     }
 }

+ 0 - 11
config/model/typedef/collection.lua → core_schema/collection.lua

@@ -1,7 +1,6 @@
 return {
     uri = "pas:Collection",
     label = "Collection",
-
     broader = "brick",
 
     properties = {
@@ -14,15 +13,5 @@ return {
             type = "resource",
             range = {file = true},
         },
-        long_description = {
-            uri = "pas:longDescription",
-            label = "Long description",
-            description = "Text document in Markdown format with a detailed \z
-                description of the collection.",
-            type = "resource",
-            range = {file = true},
-        },
     }
 }
-
-

+ 0 - 7
config/model/typedef/file.lua → core_schema/file.lua

@@ -18,13 +18,6 @@ return {
             range = {Part = true, File = true},
             max_cardinality = 1,
         },
-        thumbnail = {
-            uri = "pas:thumbnail",
-            label = "Thumbnail",
-            type = "string",
-            --min_cardinality = 1,
-            max_cardinality = 1,
-        },
         format = {
             uri = "dc:format",
             label = "MIME type",

+ 18 - 7
doc/content_model_manual.md

@@ -4,14 +4,25 @@
 
 For a generic introduction to content modeling in Pocket Archive, see
 the [content modeling primer](./content_model_primer.md)
-## Predefined content types
 
-Pocket Archive ships with some predefined content types. For some very simple
-archives, this may be enough to get started with little or no customization.
-For a setup which needs to define more numerous or complex content types in a
-more articulated way, additional types can be defined. Please look at the
-[default model configuration](../config/model/typedef) files that come with
-Pocket Archive. 
+## Core schema and predefined content types
+
+Some content types are considered part of the core functionality of Pocket
+Archive. These are defined by the configurations in `core_schema` which comes
+with the Pocket Archive installation and should not be altered. These core
+types include the foundational types, such as `Anything`, `Artifact`, `File`,
+etc.
+
+The core types are extensible in the user configuration by adding properties to
+them. A configuration that extends a core schema MUST have the `core` attribute
+set to `true` and no other top-level attribute set, except for `properties`.
+
+Pocket Archive ships with a sample configuration including extensions of core
+content types. For some very simple archives, this may be enough to get started
+with little or no customization.  For a setup which needs to define more
+numerous or complex content types in a more articulated way, additional types
+can be defined. Please look at the [default model
+configuration](../config/model/schema) files that come with Pocket Archive. 
 
 Each *type definition* is encoded in a configuration file defining a single
 content category type. One doesn't have to define all possible types in detail.

+ 3 - 3
doc/roadmap.md

@@ -55,7 +55,7 @@ usage and opportunities for expanding adoption in relevant areas.
     - ✓ Option to regenerate site after submission
     - ✓ Option to clean up sources & LL on success
   - ❏ Submission report
-  -  Deleting resources
+  -  Deleting resources
 - ✓ Proper collection handling
   - ✓ Dedicated template
   - ✓ Link to markdown doc for presentation page
@@ -67,8 +67,8 @@ usage and opportunities for expanding adoption in relevant areas.
 - ⚒ Content model
   - ✓ Generate content model documentation (HTML)
   - ❏ Multilingual support
-  -  Content model dump (CLI)
-  -  Local overrides
+  -  Content model dump (CLI)
+  -  Local overrides
 - ⚒ Presentation
   - ⎊ Generate site for one collection only
   - ✓ Generate LL for submission

+ 25 - 0
doc/submission.md

@@ -34,6 +34,8 @@ Archive.](./pkar_res_lifecycle.png)
 8. If the archivist wants to update the archived resources, they can either
    request a full copy of the SIP, or to only update metadata, only the laundry
    list, and re-submit it.
+9. The archivist can remove a resource and, optionally, all its members at any
+   time.
 
 Processing of the SIP (point 4 above) either succeeds or fails as a unit. This
 means that a submission will never perform only a part of the task that it is
@@ -167,6 +169,11 @@ depends on the content model used.
 - `source_path`: mandatory for files, single-valued. It refers to the
   file or folder path relative to the package, using forward slash `/`
   characters to separate folders and subfolders or files.
+- `has_member`: this behaves like all normal properties, but it has a special
+  meaning when deleting resources. If the `--members` option is provided,
+  resources linked via the `has_member` property to the resource bing deleted
+  are also deleted, along with their own members, recursively. See the
+  "Deleting resources" section below.
 
 Example of a table representing an artifact with two files:
 
@@ -413,3 +420,21 @@ with tools provided by most spreadsheet applications. A macro (a mini-program
 that runs in an application) for LibreOffice Calc is [provided
 here](../src/util/libreoffice_idgen.bas) to automatically generate 16-character
 IDs for all the cells selected in a table.
+
+## Deleting resources
+
+Although some archivists argue against deleting *anything* from an archive,
+Pocket Archive acknowledge that in real life things may actually need to be
+removed. The cause may be a duplicate, or something that was not supposed to
+be archived, etc. In any case, the resource-conservative alignment of Pocket
+Archive supports deleting resources immediately and irreversibly.
+
+A resource can be deleted via the `pkar remove` CLI method, or by uploading a
+special file to the drop box, named `pkar_remove*` (asterisk means zero or more
+characters—note that the file name does not need an extension). The delete file
+must be a list of arhchival IDs, in the short **URI** form (`par:<ID>`), one
+per line.
+
+If `pkar_watch`, the process watching the drop box, was started with the `-r`
+option, all members of the resources are recursively deleted (this means also
+members of members).

+ 1 - 1
pocket_archive-scm-1.rockspec

@@ -57,5 +57,5 @@ build = {
         pkar = "src/util/pkar.lua",
         pkar_watch = "src/util/watcher.lua",
     }},
-    copy_directories = {"config", "doc", "templates"},
+    copy_directories = {"config", "core_schema", "doc", "templates"},
 }

+ 39 - 12
src/model.lua

@@ -1,3 +1,4 @@
+local datafile = require "datafile"
 local dir = require "pl.dir"
 local path = require "pl.path"
 
@@ -44,7 +45,12 @@ end
 
 
 -- Parameters that do not get inherited.
-local NO_INHERIT = {abstract = true}
+local NO_INHERIT = {
+    abstract = true,
+    core = true,
+    description = true,
+}
+
 local MODEL_PATH = path.join(pkar.config_path, "model")
 
 local gen_config = dofile(path.join(MODEL_PATH, "generation.lua"))
@@ -62,10 +68,16 @@ end
 local function parse_model(mod_id)
     local hierarchy = {}
 
-    local function traverse(mod_id)
+    local function traverse(mod_id, core)
         logger:debug("traversing: " .. mod_id)
-        local model = dofile(path.join(
-                MODEL_PATH, "typedef", mod_id .. ".lua"))
+        local schema_path = path.join(MODEL_PATH, "schema", mod_id .. ".lua")
+        -- If core is true, or if the schema is not found in the user config,
+        -- look at the core schema.
+        if core or not path.isfile(schema_path) then
+            schema_path = datafile.path(
+                "core_schema/" .. mod_id .. ".lua")
+        end
+        local model = dofile(schema_path)
         -- Merge separate generator config
         model.gen = gen_config[mod_id]
         --model.id = mod_id
@@ -82,15 +94,18 @@ local function parse_model(mod_id)
 
         table.insert(hierarchy, 1, model)
 
-        if model.broader then traverse(model.broader) end
+        if model.broader then traverse(model.broader)
+        elseif model.core then traverse(mod_id, true) end
     end
     traverse(mod_id)
 
     local lineage = {}  -- Ordered lineage of types, from ancestor to leaf.
     local types = {}  -- Set of all types.
     for _, mod in ipairs(hierarchy) do
-        table.insert(lineage, mod.id)
-        types[mod.id] = true
+        if not mod.core then
+            table.insert(lineage, mod.id)
+            types[mod.id] = true
+        end
     end
 
     local function merge(src, dest)
@@ -119,17 +134,29 @@ end
 local function setup_model()
     -- Temp store (set) for property names.
     local all_pnames = {}
+
     -- Collect all type names from config file names.
     for _, fpath in ipairs(dir.getfiles(
-                path.join(MODEL_PATH, "typedef"), "*.lua")) do
+                path.join(MODEL_PATH, "schema"), "*.lua")) do
         local mname = path.basename(fpath):gsub(".lua$", "")
-        local typedef = parse_model(mname)
+        M.types[mname] = true
+    end
+    -- Add core schemata that were not extended by user config.
+    for _, fpath in ipairs(
+        dir.getfiles(datafile.path("core_schema"), "*.lua"
+    )) do
+        local mname = path.basename(fpath):gsub(".lua$", "")
+        M.types[mname] = true
+    end
+
+    for mname in pairs(M.types) do
+        local schema = parse_model(mname)
 
-        -- Store parsed typedef configurations.
-        M.types[mname] = typedef
+        -- Store parsed schema configurations.
+        M.types[mname] = schema
 
         -- Store unique prop names.
-        for pn in pairs(typedef.properties or NT) do
+        for pn in pairs(schema.properties or NT) do
             if not no_ll_pnames[pn] then all_pnames[pn] = true end
         end
     end

+ 37 - 9
src/repo.lua

@@ -65,17 +65,14 @@ M.store_updates = function(tmp_gr, s)
 end
 
 
---[[ Dump the whole archive RDF to a file stream.
+--[[--
+Dump the whole archive RDF to a file stream.
 
-    The result is a software-agnostic RDF representation of the metadata
-    (Turtle) compressed with GZip. The ores data folder can be backed up via
-    OS-level file operations.
+The restore() function, combined with a copy of
+the ores folder, shall create a fully functional repo.
 
-    The restore() function, combined with a copy of
-    the ores folder, shall create a fully functional repo.
-
-    TODO configuration backup is not yet implemented and should be included for
-    a completely self-sufficient backup.
+TODO configuration backup is not yet implemented and should be included for
+a completely self-sufficient backup.
 ]]
 M.dump = function(fpath, codec)
     local fh = assert(io.open(fpath, "wb"))
@@ -83,4 +80,35 @@ M.dump = function(fpath, codec)
 end
 
 
+--[[--
+Remove a single resource, and optionally its members.
+If a resource is not found, no triples are removed.
+
+@tparam id Resource ID in the shortened URI form, `par:<ID>`
+@tparam boolean members Whether to delete all members of this resource (with
+the `has_member` relationship) and, recursively, their members.
+
+@return table Set of IDs of resources removed.
+]]
+M.remove = function(id, members)
+    local s = term.new_iriref_ns(id)
+    local del_ids = {}
+    local function _remove(id, members)
+        if members then
+            local mds = repo.gr:term_set(
+                s, triple.POS_S,
+                model.id_to_uri.has_member, triple.POS_P
+            )
+            for m_uri in mds:iter() do _remove(m_uri, true) end
+        end
+        if (
+            M.gr:remove(s) > 0  -- Remove outbound
+            or gr:remove(nil, nil, s) > 0  -- Remove inbound
+        ) then del_ids[id] = true end
+    end
+
+    return del_ids
+end
+
+
 return M

+ 1 - 1
src/submission.lua

@@ -163,7 +163,7 @@ local function generate_sip(ll_path)
     -- Infer structure from paths and row ordering.
     for i, v in ipairs(sip) do
         local rmod = model.types[v.content_type]
-        --dbg.assert(v.source_path)
+        --require "debugger".assert(rmod)
         local fpath = path.join(sip.root_path, v.source_path)
         --dbg.assert(rmod)
         v.has_member = v.has_member or {}

+ 36 - 0
src/util/pkar.lua

@@ -19,6 +19,8 @@ local sub = require "pocket_archive.submission"
 
 cli.locale "en_US"  -- TODO set with multilingual support.
 
+io.output(io.stdout)
+
 init = cli.command {
     "Initialize a new Pocket Archive store.",
 
@@ -61,6 +63,40 @@ deposit = cli.command {
     end
 }
 
+
+remove = cli.command {
+    "Remove a list of resources.",
+
+    cli.positional "path" {
+        "Path of the delete list file or input stream. It must contain \z
+        one ID per line, in the short URI format (`par:<ID>'). If not \z
+        provided or `-', it is set to standard input.",
+        type = cli.string,
+        default = "-",
+    },
+
+    cli.flag "m,members" {
+        "Remove the resource members recursively.",
+        type = cli.boolean,
+        default = false,
+    },
+
+    function(args)
+        if args.path == "-" then args.path = nil end
+        local ct = 0
+        for id in io.lines(args.path) do
+            if #id > 0 then  -- skip blank lines.
+                io.write("Deleting: " .. id .. "\n")
+                local del_ids = repo.remove(id, args.members)
+                for _ in pairs(del_ids) do ct = ct + 1 end
+            end
+        end
+
+        io.write("Deleted " .. ct .. " resources.\n")
+    end
+}
+
+
 gen_site = cli.command {
     "Generate a static site from the archive.",
 

+ 23 - 0
src/util/watcher.lua

@@ -54,6 +54,11 @@ cli.program {
         type = cli.boolean,
     },
 
+    cli.flag "r,del-members" {
+        "Remove resource members recursively.",
+        type = cli.boolean,
+    },
+
     cli.flag "c,cleanup" {
         "Remove laundry list and SIP after successful submission.",
         type = cli.boolean,
@@ -117,6 +122,24 @@ cli.program {
                     os.exit(exit_code)
                 end
                 -- Else: main process keeps on running.
+            elseif ev.name:find("pkar_remove.*") then
+                local cpid, err = unistd.fork()
+                if cpid == nil then
+                    logger:error("Failed to fork delete process: " .. err)
+                elseif cpid == 0 then
+                    logger:info("Detected new remove list file: ", ev.name)
+                    for id in io.lines(ev.name) do
+                        if #id > 0 then
+                            local del_rc, ret = pcall(
+                                repo.remove,
+                                id, args.del_members
+                            )
+                            if not del_rc then os.exit(ret) end
+                        end
+                    end
+                    os.exit()
+                end
+                -- Else: main process keeps on running.
             end
         end)