Forráskód Böngészése

Complete DB init function; fix some hidden YAML syntax errors.

scossu 10 hónapja
szülő
commit
44fa98a24b

+ 70 - 34
scriptshifter/tables/__init__.py

@@ -6,6 +6,7 @@ from functools import cache
 from importlib import import_module
 from json import dumps
 from os import R_OK, access, environ, makedirs, path, unlink
+from shutil import move
 
 from yaml import load
 try:
@@ -26,6 +27,9 @@ runtime.
 """
 
 
+TMP_DB_PATH = path.join(
+        path.dirname(DB_PATH), "~tmp." + path.basename(DB_PATH))
+
 DEFAULT_TABLE_DIR = path.join(path.dirname(path.realpath(__file__)), "data")
 # Can be overridden for tests.
 TABLE_DIR = environ.get("TXL_CONFIG_TABLE_DIR", DEFAULT_TABLE_DIR)
@@ -144,17 +148,17 @@ def init_db():
     This must be done only once at bootstrap. To update individual tables,
     see populate_table(), which this function calls iteratively.
     """
-    # Remove preexisting DB and create parent diretories if necessary.
-    makedirs(path.dirname(DB_PATH), exist_ok=True)
-    if path.isfile(DB_PATH):
-        unlink(DB_PATH)
+    # Create parent diretories if necessary.
+    # If the DB already exists, it will be overwritten ONLY on success at
+    # thhis point.
+    makedirs(path.dirname(TMP_DB_PATH), exist_ok=True)
 
-    conn = sqlite3.connect(DB_PATH)
+    conn = sqlite3.connect(TMP_DB_PATH)
 
     # Initialize schema.
     with open(path.join(path.dirname(DEFAULT_TABLE_DIR), "init.sql")) as fh:
         with conn:
-            conn.execute(fh.read())
+            conn.executescript(fh.read())
 
     # Populate tables.
     try:
@@ -163,15 +167,22 @@ def init_db():
                 res = conn.execute(
                     """INSERT INTO tbl_language (
                         name, label, marc_code, description
-                    ) (?, ?, ?, ?)""",
+                    ) VALUES (?, ?, ?, ?)""",
                     (
                         tname, tdata.get("name"), tdata.get("marc_code"),
                         tdata.get("description"),
                     )
                 )
                 populate_table(conn, res.lastrowid, tname)
+
+        # If the DB already exists, it will be overwritten ONLY on success at
+        # thhis point.
+        move(TMP_DB_PATH, DB_PATH)
     finally:
         conn.close()
+        if path.isfile(TMP_DB_PATH):
+            # Remove leftover temp files from bungled up operation.
+            unlink(TMP_DB_PATH)
 
 
 def populate_table(conn, tid, tname):
@@ -187,50 +198,75 @@ def populate_table(conn, tid, tname):
             (flags, tid))
 
     for t_dir in (FEAT_S2R, FEAT_R2S):
-        sec = "script_to_roman" if t_dir == FEAT_S2R else "roman_to_script"
+        # BEGIN per-section loop.
+
+        sec_name = (
+                "script_to_roman" if t_dir == FEAT_S2R else "roman_to_script")
+        sec = data.get(sec_name)
+        if not sec:
+            continue
 
         # Transliteration map.
-        for k, v in data.get(sec, {}).get("map", {}).items():
+        for k, v in sec.get("map", {}):
             conn.execute(
                     """INSERT INTO tbl_trans_map (
                         lang_id, dir, src, dest
-                    ) VALUES (? ? ? ?)""",
+                    ) VALUES (?, ?, ?, ?)""",
                     (tid, t_dir, k, v))
 
         # hooks.
-        for k, v in data.get(sec, {}).get("hooks", {}).items():
+        for k, v in sec.get("hooks", {}).items():
             for i, hook_data in enumerate(v, start=1):
                 conn.execute(
                         """INSERT INTO tbl_hook (
-                            lang_id, dir, name, order, fn, signature
-                        ) VALUES (? ? ? ?)""",
-                        (tid, t_dir, k, i, hook_data[0], dumps(hook_data[1:])))
-
-    # Ignore rules for R2S only.
-    for row in data.get("roman_to_script", {}).get("ignore", []):
-        if isinstance(row, dict):
-            if "re" in row:
-                flags = FEAT_RE
-                rule = row["re"]
+                            lang_id, dir, name, sort, fn, signature
+                        ) VALUES (?, ?, ?, ?, ?, ?)""",
+                        (
+                            tid, t_dir, k, i,
+                            hook_data[0].__name__, dumps(hook_data[1:])))
+
+        # Ignore rules (R2S only).
+        for row in sec.get("ignore", []):
+            if isinstance(row, dict):
+                if "re" in row:
+                    flags = FEAT_RE
+                    rule = row["re"]
             else:
                 flags = 0
                 rule = row
 
-        conn.execute(
-                """INSERT INTO tbl_ignore (
-                    lang_id, rule, features
-                ) VALUES (? ? ?)""",
-                (tid, rule, flags))
+            conn.execute(
+                    """INSERT INTO tbl_ignore (
+                        lang_id, rule, features
+                    ) VALUES (?, ?, ?)""",
+                    (tid, rule, flags))
 
-    # Double caps (S2R).
-    for rule in data.get("roman_to_script", {}).get("double_cap", []):
-        conn.execute(
-                """INSERT INTO tbl_double_cap (
-                    lang_id, rule
-                ) VALUES (? ?)""",
-                (tid, rule))
+        # Double caps (S2R only).
+        for rule in sec.get("double_cap", []):
+            conn.execute(
+                    """INSERT INTO tbl_double_cap (
+                        lang_id, rule
+                    ) VALUES (?, ?)""",
+                    (tid, rule))
+
+        # Normalize (S2R only).
+        for src, dest in sec.get("normalize", {}).items():
+            conn.execute(
+                    """INSERT INTO tbl_normalize (lang_id, src, dest)
+                    VALUES (?, ?, ?)""",
+                    (tid, src, dest))
 
-    # Normalize (S2R).
+        # END per-section loop.
+
+    # UI options
+    for opt in data.get("options", []):
+        conn.execute(
+                """INSERT INTO tbl_option (
+                    lang_id, name, label, description, dtype, default_v
+                ) VALUES (?, ?, ?, ?, ?, ?)""",
+                (
+                    tid, opt["id"], opt["label"], opt["description"],
+                    opt["type"], opt["default"]))
 
 
 @cache

+ 2 - 2
scriptshifter/tables/data/asian_cyrillic.yml

@@ -391,8 +391,8 @@ roman_to_script:
 script_to_roman:
   map:
     
-    "\u00AB": """
-    "\u00BB": """
+    "\u00AB": "\""
+    "\u00BB": "\""
     "\u2116": "No\u0332"
     "\u0400": "E\u0300"
     "\u0401": "E\u0308"

+ 1 - 1
scriptshifter/tables/data/bashkir_cyrillic.yml

@@ -23,7 +23,7 @@ roman_to_script:
     "U\u0307": "\u04AE"
     "u\u0307": "\u04AF"
     "TH": "\u04AA"
-    "Th": "\u04AA"s
+    "Th": "\u04AA"
     "th": "\u04AB"
     "J": "\u04B8"
     "j": "\u04B9"

+ 2 - 2
scriptshifter/tables/data/index.yml

@@ -47,7 +47,7 @@ bengali:
 bulgarian:
   marc_code: bul
   name: Bulgarian
-buriat:
+buriat_cyrillic:
   marc_code: bua
   name: Buriat (Cyrillic)
 burmese:
@@ -108,7 +108,7 @@ kannada:
 kara-kalpak_cyrillic:
   marc_code: kaa
   name: Kara-Kalpak (Cyrillic)
-karachai-balkar_cyrillic:
+karachay-balkar_cyrillic:
   marc_code: krc
   name: Karachay-Balkar  (Cyrillic)
 karelian_cyrillic:

+ 2 - 2
scriptshifter/tables/data/kara-kalpak_cyrillic.yml

@@ -27,11 +27,11 @@ roman_to_script:
 script_to_roman:
   map:
     "\u040E": "W"
-    "\u045E"" "w"
+    "\u045E": "w"
     "\u0492": "Gh"
     "\u0493": "gh"
     "\u049A": "Q"
-    "\u-49B": "q"
+    "\u049B": "q"
     "\u04A2": "N\uFE20G\uFE21"
     "\u04A3": "n\uFE20g\uFE21"
     "\u04AE": "U\u0307"

+ 4 - 4
scriptshifter/tables/data/komi_cyrillic.yml

@@ -5,10 +5,10 @@ general:
 
 roman_to_script:
   map:
-    "D\u0320Z\u0320\H\u\0320": "\u0496"
-    "D\u0320Z\u0320\h\u\0320": "\u0496"
-    "D\u0320z\u0320\h\u\0320": "\u0496"
-    "d\u0320z\u0320\h\u\0320": "\u0497"
+    "D\u0320Z\u0320H\u0320": "\u0496"
+    "D\u0320Z\u0320h\u0320": "\u0496"
+    "D\u0320z\u0320h\u0320": "\u0496"
+    "d\u0320z\u0320h\u0320": "\u0497"
     "D\uFE20Z\uFE21": "\u0506"
     "D\uFE20z\uFE21": "\u0506"
     "d\uFE20z\uFE21": "\u0507"

+ 2 - 2
scriptshifter/tables/data/mongolian_mongol_bichig.yml

@@ -6,12 +6,12 @@ general:
 roman_to_script:
 
   map:
-    "\u002Daca": "\u202F\u1820\u1834\u1820
+    "\u002Daca": "\u202F\u1820\u1834\u1820"
     "\u002DA": "\u180E\u1820"
     "\u002Da": "\u180E\u1820"
     "A": "\u1820"
     "a": "\u1820"
-    "\u002Dece": "\u202F\u1821\u1834\u1821
+    "\u002Dece": "\u202F\u1821\u1834\u1821"
     "\u002DE": "\u180E\u1821"
     "\u002De": "\u180E\u1821"
     "\u002D": "\u202F"

+ 1 - 1
scriptshifter/tables/data/yiddish.yml

@@ -4,7 +4,7 @@ general:
 options:
   - id: loshn_koydesh
     label: Loshn Koydesh
-    description: [TODO]
+    description: "[TODO]"
     type: boolean
     default: false
 

+ 21 - 19
scriptshifter/tables/init.sql

@@ -7,8 +7,8 @@ CREATE TABLE tbl_language (
     id INTEGER PRIMARY KEY,
     name TEXT UNIQUE,
     label TEXT,
-    marc_code TEXT NULL,
-    description TEXT NULL,
+    marc_code TEXT,
+    description TEXT,
     features TINYINT DEFAULT 0
 );
 
@@ -21,12 +21,12 @@ CREATE TABLE tbl_trans_map (
     id INTEGER PRIMARY KEY,
     lang_id INTEGER NOT NULL,
     dir TINYINT NOT NULL DEFAULT 0,  /* 1 = S2R; 2 = R2S */
-    src TEXT NOT NULL UNIQUE,
+    src TEXT NOT NULL,
     dest TEXT,
 
-    FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
 );
-CREATE_INDEX trans_lookup ON tbl_trans_map (lang_id, dir, src);
+CREATE UNIQUE INDEX idx_trans_lookup ON tbl_trans_map (lang_id, dir, src);
 
 /*
  * Processing hooks.
@@ -36,15 +36,15 @@ CREATE TABLE tbl_hook (
     lang_id INTEGER NOT NULL,
     dir TINYINT NOT NULL DEFAULT 0,  /* 1 = S2R; 2 = R2S */
     name TEXT NOT NULL,  /* Hook name. */
-    order INT NOT NULL,  /* Function sorting order within the hook. */
+    sort INT NOT NULL,  /* Function sorting order within the hook. */
     fn TEXT NOT NULL,   /* Function name. */
     signature TEXT,     /* Arguments as JSON blob. */
 
-    FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
 );
-CREATE INDEX hook_lookup ON tbl_hook (lang_id, dir);
-CREATE INDEX hookname_lookup ON tbl_hook (name);
-CREATE INDEX hook_order ON tbl_hook (order ASC);
+CREATE INDEX idx_hook_lookup ON tbl_hook (lang_id, dir);
+CREATE INDEX idx_hookname_lookup ON tbl_hook (name);
+CREATE INDEX idx_hook_sort ON tbl_hook (sort ASC);
 
 /*
  * Ignore lists for R2S.
@@ -55,7 +55,7 @@ CREATE TABLE tbl_ignore (
     rule TEXT NOT NULL,
     features TINYINT,  /* 1 = case insensitive; 2 = regular expression. */
 
-    FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
 );
 
 /*
@@ -66,19 +66,19 @@ CREATE TABLE tbl_double_cap (
     lang_id INTEGER NOT NULL,
     rule TEXT NOT NULL,
 
-    FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
 );
 
 /*
  * Normalization rules.
  */
-CREATE TABLE tbl_norm (
+CREATE TABLE tbl_normalize (
     id INTEGER PRIMARY KEY,
     lang_id INTEGER NOT NULL,
     src TEXT NOT NULL,
     dest TEXT NOT NULL,
 
-    FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
 );
 
 /*
@@ -87,12 +87,14 @@ CREATE TABLE tbl_norm (
 CREATE TABLE tbl_option (
     id INTEGER PRIMARY KEY,
     lang_id INTEGER NOT NULL,
-    name TEXT UNIQUE,
-    description TEXT NULL,
-    type TEXT,
-    default TEXT NULL,
+    name TEXT NOT NULL,
+    label TEXT NOT NULL,
+    description TEXT,
+    dtype TEXT,
+    default_v TEXT,
 
-    FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
 );
+CREATE UNIQUE INDEX idx_option_lookup ON tbl_option (lang_id, name);