Jelajahi Sumber

Initial submission CSV parser.

scossu 1 bulan lalu
induk
melakukan
7d1ed63595
3 mengubah file dengan 115 tambahan dan 7 penghapusan
  1. 0 1
      data/sample_submission/postcard-bag/data/submission.csv
  2. 3 6
      model_parser.lua
  3. 112 0
      submission.lua

+ 0 - 1
data/sample_submission/postcard-bag/data/submission.csv

@@ -1,4 +1,3 @@
-ref,mdkey,mdvalue
 12345,type,Postcard
 ,prefLabel,Example Postcard
 ,altLabel,This is an alternative label

+ 3 - 6
model_parser.lua

@@ -13,12 +13,9 @@ local MODEL_PATH = "./config/model/"
 
 
 local function camel2snake(src)
-    return string.lower(
-        string.gsub(
-            string.gsub (src, "^pas:", ""),  -- Strip namespace.
-            "([^^])(%u)", "%1_%2"  -- Uppercase (except initial) to underscore.
-        )
-    )
+    return src:lower()
+        :gsub("^pas:", "")  -- Strip namespace.
+        :gsub("([^^])(%u)", "%1_%2")  -- Uppercase (except initial) to _.
 end
 
 

+ 112 - 0
submission.lua

@@ -0,0 +1,112 @@
+local io = io
+
+local csv = require "csv"
+
+
+local M = {}  -- Submission module
+
+-- Adapted from lua-núcleo
+local function escape_pattern(s)
+    local matches = {
+        ["^"] = "%^";
+        ["$"] = "%$";
+        ["("] = "%(";
+        [")"] = "%)";
+        ["%"] = "%%";
+        ["."] = "%.";
+        ["["] = "%[";
+        ["]"] = "%]";
+        ["*"] = "%*";
+        ["+"] = "%+";
+        ["-"] = "%-";
+        ["?"] = "%?";
+        ["\0"] = "%z";
+    }
+
+    return (s:gsub(".", matches))
+end
+
+
+M.deposit = function(path)
+    local sub_data = assert(csv.open(path))
+    local md = {}
+    local prev_ref, prev_k
+
+    -- Collate metadata.
+    local i = 1
+    for row in sub_data:lines() do
+        ref, k, v = table.unpack(row)
+        -- nil-out empty cells (they come through as "")
+        if ref == "" then ref = nil end
+        if k == "" then k = nil end
+        if v == "" then v = nil end
+
+        print("Parsing row:", ref, k, v)
+        -- v can be a legit false value.
+        if ref and not k and v == nil then
+            -- This can be a placeholder for ordering purposes.
+            md[ref] = md_ref or {}
+            goto continue
+        elseif v == nil then
+            goto continue
+        else
+            -- If ref or k are missing, reuse the previous one.
+            if ref then prev_ref = ref
+            else
+                if not prev_ref then
+                    -- If column 1 is empty, it must have been set in a
+                    -- previous row.
+                    error(string.format(
+                        "Reference in column 1, row %d not found!", i), 2)
+                end
+                ref = prev_ref
+            end
+
+            if k then prev_k = k
+            else
+                if not prev_k then
+                    -- If column 2 is empty, it must have been set in a
+                    -- previous row.
+                    error(string.format(
+                        "Property key in column 2, row %d not found!", i), 2)
+                end
+                k = prev_k
+            end
+        end
+
+        md[ref] = md[ref] or {path = ref, _sort = i}
+        md[ref][k] = md[ref][k] or {}
+        table.insert(md[ref][k], v)
+
+        ::continue::
+        i = i + 1
+    end
+
+    -- Move md to an ordered list.
+    mdlist = {}
+    for _, v in pairs(md) do table.insert(mdlist, v) end
+
+    table.sort(mdlist, function (a, b) return (a._sort < b._sort) end)
+
+    -- Infer structure from paths and row ordering.
+    for i, v in ipairs(mdlist) do
+        for j = i + 1, #mdlist do
+            print(string.format("comparing %s : %s", v.path, mdlist[j].path))
+            if not v["next"] and
+                    mdlist[j].path:match("(.*/)") == v.path:match("(.*/)") then
+                print("next match.")
+                v["next"] = mdlist[j].path
+            end
+            if not v.firstChild and
+                    mdlist[j].path:match("^" .. escape_pattern(v.path)) then
+                print("First child match.")
+                v.firstChild = mdlist[j].path
+            end
+        end
+        v._sort = nil
+    end
+
+    return mdlist
+end
+
+return M