Przeglądaj źródła

WIP build and populate tables.

scossu 10 miesięcy temu
rodzic
commit
ebf53e6b54
2 zmienionych plików z 115 dodań i 12 usunięć
  1. 80 8
      scriptshifter/tables/__init__.py
  2. 35 4
      scriptshifter/tables/init.sql

+ 80 - 8
scriptshifter/tables/__init__.py

@@ -4,6 +4,7 @@ import sqlite3
 
 from functools import cache
 from importlib import import_module
+from json import dumps
 from os import R_OK, access, environ, makedirs, path, unlink
 
 from yaml import load
@@ -19,8 +20,9 @@ from scriptshifter.exceptions import BREAK, ConfigError
 __doc__ = """
 Transliteration tables.
 
-These tables contain all transliteration information, grouped by script and
-language (or language and script? TBD)
+These tables contain all transliteration information. The static YML files are
+transformed and loaded into a database, which is the effective data source at
+runtime.
 """
 
 
@@ -55,9 +57,10 @@ BOW = 1 << 1
 EOW = 1 << 0
 
 # Feature flags used in database tables.
-FEAT_S2R = 1 << 0       # Has S2R
-FEAT_R2S = 1 << 1       # Has R2S
+FEAT_S2R = 1 << 0       # Has S2R.
+FEAT_R2S = 1 << 1       # Has R2S.
 FEAT_CASEI = 1 << 2     # Case-insensitive script.
+FEAT_RE = 1 << 3        # Regular expression.
 
 logger = logging.getLogger(__name__)
 
@@ -150,15 +153,84 @@ def init_db():
 
     # Initialize schema.
     with open(path.join(path.dirname(DEFAULT_TABLE_DIR), "init.sql")) as fh:
-        conn.execute(fh.read())
+        with conn:
+            conn.execute(fh.read())
 
     # Populate tables.
-    for tname in list_tables().keys():
-        populate_table(tname)
+    try:
+        with conn:
+            for tname, tdata in list_tables().items():
+                res = conn.execute(
+                    """INSERT INTO tbl_language (
+                        name, label, marc_code, description
+                    ) (?, ?, ?, ?)""",
+                    (
+                        tname, tdata.get("name"), tdata.get("marc_code"),
+                        tdata.get("description"),
+                    )
+                )
+                populate_table(conn, res.lastrowid, tname)
+    finally:
+        conn.close()
 
 
-def populate_table(tname):
+def populate_table(conn, tid, tname):
     data = load_table(tname)
+    flags = 0
+    if "script_to_roman" in data:
+        flags |= FEAT_S2R
+    if "roman_to_script" in data:
+        flags |= FEAT_R2S
+
+    conn.execute(
+            "UPDATE tbl_language SET features = ? WHERE id = ?",
+            (flags, tid))
+
+    for t_dir in (FEAT_S2R, FEAT_R2S):
+        sec = "script_to_roman" if t_dir == FEAT_S2R else "roman_to_script"
+
+        # Transliteration map.
+        for k, v in data.get(sec, {}).get("map", {}).items():
+            conn.execute(
+                    """INSERT INTO tbl_trans_map (
+                        lang_id, dir, src, dest
+                    ) VALUES (? ? ? ?)""",
+                    (tid, t_dir, k, v))
+
+        # hooks.
+        for k, v in data.get(sec, {}).get("hooks", {}).items():
+            for i, hook_data in enumerate(v, start=1):
+                conn.execute(
+                        """INSERT INTO tbl_hook (
+                            lang_id, dir, name, order, fn, signature
+                        ) VALUES (? ? ? ?)""",
+                        (tid, t_dir, k, i, hook_data[0], dumps(hook_data[1:])))
+
+    # Ignore rules for R2S only.
+    for row in data.get("roman_to_script", {}).get("ignore", []):
+        if isinstance(row, dict):
+            if "re" in row:
+                flags = FEAT_RE
+                rule = row["re"]
+            else:
+                flags = 0
+                rule = row
+
+        conn.execute(
+                """INSERT INTO tbl_ignore (
+                    lang_id, rule, features
+                ) VALUES (? ? ?)""",
+                (tid, rule, flags))
+
+    # Double caps (S2R).
+    for rule in data.get("roman_to_script", {}).get("double_cap", []):
+        conn.execute(
+                """INSERT INTO tbl_double_cap (
+                    lang_id, rule
+                ) VALUES (? ?)""",
+                (tid, rule))
+
+    # Normalize (S2R).
 
 
 @cache

+ 35 - 4
scriptshifter/tables/init.sql

@@ -5,9 +5,10 @@
  */
 CREATE TABLE tbl_language (
     id INTEGER PRIMARY KEY,
-    name VARCHAR UNIQUE,
-    label VARCHAR,
-    description VARCHAR NULL,
+    name TEXT UNIQUE,
+    label TEXT,
+    marc_code TEXT NULL,
+    description TEXT NULL,
     features TINYINT DEFAULT 0
 );
 
@@ -25,6 +26,7 @@ CREATE TABLE tbl_trans_map (
 
     FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
 );
+CREATE_INDEX trans_lookup ON tbl_trans_map (lang_id, dir, src);
 
 /*
  * Processing hooks.
@@ -33,13 +35,16 @@ CREATE TABLE tbl_hook (
     id INTEGER PRIMARY KEY,
     lang_id INTEGER NOT NULL,
     dir TINYINT NOT NULL DEFAULT 0,  /* 1 = S2R; 2 = R2S */
-    hook TEXT NOT NULL,  /* Hook name. */
+    name TEXT NOT NULL,  /* Hook name. */
     order INT NOT NULL,  /* Function sorting order within the hook. */
     fn TEXT NOT NULL,   /* Function name. */
     signature TEXT,     /* Arguments as JSON blob. */
 
     FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
 );
+CREATE INDEX hook_lookup ON tbl_hook (lang_id, dir);
+CREATE INDEX hookname_lookup ON tbl_hook (name);
+CREATE INDEX hook_order ON tbl_hook (order ASC);
 
 /*
  * Ignore lists for R2S.
@@ -53,6 +58,17 @@ CREATE TABLE tbl_ignore (
     FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
 );
 
+/*
+ * Double capitals.
+ */
+CREATE TABLE tbl_double_cap (
+    id INTEGER PRIMARY KEY,
+    lang_id INTEGER NOT NULL,
+    rule TEXT NOT NULL,
+
+    FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
+);
+
 /*
  * Normalization rules.
  */
@@ -65,3 +81,18 @@ CREATE TABLE tbl_norm (
     FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
 );
 
+/*
+ * Input options.
+ */
+CREATE TABLE tbl_option (
+    id INTEGER PRIMARY KEY,
+    lang_id INTEGER NOT NULL,
+    name TEXT UNIQUE,
+    description TEXT NULL,
+    type TEXT,
+    default TEXT NULL,
+
+    FOREIGN KEY (lang_id) REFERENCES tbl_language.id ON DELETE CASCADE
+);
+
+