1 tahun lalu · f5b1e37594
--- a/.gitignore
+++ b/.gitignore
@@ -137,5 +137,6 @@ tags.temp
 
				 
			
 
				 # Local
			
 
				 ext/arabic_rom/data
			
 
				+scriptshifter/data/*.db
			
 
				 !.keep
			
 
				 VERSION
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,8 +3,9 @@ ARG WORKROOT "/usr/local/scriptshifter/src"
 
				 
			
 
				 # Copy core application files.
			
 
				 WORKDIR ${WORKROOT}
			
 
				-COPY entrypoint.sh uwsgi.ini wsgi.py VERSION ./
			
 
				+COPY VERSION entrypoint.sh sscli uwsgi.ini wsgi.py ./
			
 
				 COPY scriptshifter ./scriptshifter/
			
 
				+COPY tests ./tests/
			
 
				 COPY requirements.txt ./
			
 
				 RUN pip install --no-cache-dir -r requirements.txt
			
 
				 
			
--- a/README.md
+++ b/README.md
@@ -18,6 +18,21 @@ Currently, the following environment variables are defined:
 
				 - `TXL_DICTA_EP`: Endpoint for the Dicta Hebrew transliteration service. This
			
 
				   is mandatory for using the Hebrew module.
			
 
				 
			
 
				+## Initial setup
			
 
				+
			
 
				+In order to run Scriptshifter, a local SQLite database must be created. The
			
 
				+simplest way to do that is via command-line:
			
 
				+
			
 
				+```bash
			
 
				+./sscli admin init-db
			
 
				+```
			
 
				+
			
 
				+This step is already included in the `entrypoint.sh` script that gets executed
			
 
				+by Docker, so no additional action is necessary.
			
 
				+
			
 
				+Note that the DB must be recreated every time any of the configuration tables
			
 
				+in `scriptshifter/tables/data` changes.
			
 
				+
			
 
				 ## Local development server
			
 
				 
			
 
				 For local development, it is easiest to run Flask without the WSGI wrapper,
			
@@ -73,11 +88,12 @@ string in a production environment.
 
				 
			
 
				 `TXL_LOGLEVEL`: Logging level. Use Python notation. The default is `WARN`.
			
 
				 
			
 
				-`TXL_SMTP_HOST`: SMTP host to send feedback messages through. Defaults to
			
 
				-`localhost`.
			
 
				+`TXL_SMTP_HOST`: SMTP host to send feedback messages through. If not defined,
			
 
				+the feedback form will not be shown in the UI.
			
 
				 
			
 
				 `TXL_SMTP_PORT`: Port of the SMTP server. Defaults to `1025`.
			
 
				 
			
 
				+
			
 
				 ## Web UI
			
 
				 
			
 
				 `/` renders a simple HTML form to test the transliteration service.
			
@@ -88,6 +104,25 @@ the drop-down automatically. The value must be one of the keys found in
 
				 `/languages`.
			
 
				 
			
 
				 
			
 
				+## Command-line interface
			
 
				+
			
 
				+Various Scriptshifter commands can be accessed via the shell command `sscli`.
			
 
				+At the moment only a few essential admin and testing tools are available. More
			
 
				+commands can be made avaliable on an as-needed basis.
			
 
				+
			
 
				+Help menu:
			
 
				+
			
 
				+```
			
 
				+/path/to/sscli --help
			
 
				+```
			
 
				+
			
 
				+Section help:
			
 
				+
			
 
				+```
			
 
				+/path/to/sscli admin --help
			
 
				+```
			
 
				+
			
 
				+
			
 
				 ## Contributing
			
 
				 
			
 
				 See the [contributing guide](./doc/contributing.md).
			
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -12,6 +12,8 @@ fi
 
				 host=${TXL_WEBAPP_HOST:-"0.0.0.0"}
			
 
				 port=${TXL_WEBAPP_PORT:-"8000"}
			
 
				 
			
 
				+./sscli admin init-db
			
 
				+
			
 
				 if [ "${FLASK_ENV}" == "development" ]; then
			
 
				     exec flask run -h $host -p $port
			
 
				 else
			
--- a/scriptshifter/__init__.py
+++ b/scriptshifter/__init__.py
@@ -9,6 +9,14 @@ env = load_dotenv()
 
				 
			
 
				 APP_ROOT = path.dirname(path.realpath(__file__))
			
 
				 
			
 
				+"""
			
 
				+SQLite database path.
			
 
				+
			
 
				+This DB stores all the runtime transliteration data.
			
 
				+"""
			
 
				+DB_PATH = environ.get(
			
 
				+        "DB_PATH", path.join(APP_ROOT, "data", "scriptshifter.db"))
			
 
				+
			
 
				 """
			
 
				 SMTP server for sending email. For a dummy server that just echoes the
			
 
				 messages, run: `python -m smtpd -n -c DebuggingServer localhost:1025`
			
--- a/scriptshifter/data/.keep
+++ b/scriptshifter/data/.keep
--- a/scriptshifter/hooks/greek/__init__.py
+++ b/scriptshifter/hooks/greek/__init__.py
@@ -6,9 +6,9 @@ from logging import getLogger
 
				 from scriptshifter.exceptions import CONT
			
 
				 
			
 
				 
			
 
				-# Suffixed by ʹ
			
 
				 # Indices are positions in the numeric string from the right
			
 
				 DIGITS = {
			
 
				+    # Suffixed by ʹ (U+0374)
			
 
				     1: {  # Units
			
 
				         "α": 1,
			
 
				         "β": 2,
			
@@ -45,7 +45,7 @@ DIGITS = {
 
				         "ω": 8,
			
 
				         "ϡ": 9,
			
 
				     },
			
 
				-    # Prefixed by ͵
			
 
				+    # Prefixed by ͵ (U+0375)
			
 
				     4: {
			
 
				         "α": 1,
			
 
				         "β": 2,
			
--- a/scriptshifter/rest_api.py
+++ b/scriptshifter/rest_api.py
@@ -1,7 +1,6 @@
 
				 import logging
			
 
				 
			
 
				 from base64 import b64encode
			
 
				-from copy import deepcopy
			
 
				 from email.message import EmailMessage
			
 
				 from json import dumps
			
 
				 from os import environ, urandom
			
@@ -15,7 +14,7 @@ from scriptshifter import (
 
				         GIT_COMMIT, GIT_TAG,
			
 
				         SMTP_HOST, SMTP_PORT)
			
 
				 from scriptshifter.exceptions import ApiError
			
 
				-from scriptshifter.tables import list_tables, load_table
			
 
				+from scriptshifter.tables import list_tables, get_language
			
 
				 from scriptshifter.trans import transliterate
			
 
				 
			
 
				 
			
@@ -89,16 +88,9 @@ def list_languages():
 
				 @app.route("/table/<lang>")
			
 
				 def dump_table(lang):
			
 
				     """
			
 
				-    Dump parsed transliteration table for a language.
			
 
				+    Dump a language configuration from the DB.
			
 
				     """
			
 
				-    tbl = deepcopy(load_table(lang))
			
 
				-    for sec_name in ("roman_to_script", "script_to_roman"):
			
 
				-        if sec_name in tbl:
			
 
				-            for hname, fn_defs in tbl[sec_name].get("hooks", {}).items():
			
 
				-                tbl[sec_name]["hooks"][hname] = [
			
 
				-                        (fn.__name__, kw) for (fn, kw) in fn_defs]
			
 
				-
			
 
				-    return jsonify(tbl)
			
 
				+    return get_language(lang)
			
 
				 
			
 
				 
			
 
				 @app.route("/options/<lang>", methods=["GET"])
			
@@ -106,7 +98,7 @@ def get_options(lang):
 
				     """
			
 
				     Get extra options for a table.
			
 
				     """
			
 
				-    tbl = load_table(lang)
			
 
				+    tbl = get_language(lang)
			
 
				 
			
 
				     return jsonify(tbl.get("options", []))
			
 
				 
			
--- a/scriptshifter/tables/__init__.py
+++ b/scriptshifter/tables/__init__.py
@@ -1,9 +1,13 @@
 
				 import logging
			
 
				 import re
			
 
				+import sqlite3
			
 
				 
			
 
				+from collections import defaultdict
			
 
				 from functools import cache
			
 
				 from importlib import import_module
			
 
				-from os import environ, path, access, R_OK
			
 
				+from json import dumps as jdumps, loads as jloads
			
 
				+from os import R_OK, access, environ, makedirs, path, unlink
			
 
				+from shutil import move
			
 
				 
			
 
				 from yaml import load
			
 
				 try:
			
@@ -11,17 +15,22 @@ try:
 
				 except ImportError:
			
 
				     from yaml import Loader
			
 
				 
			
 
				+from scriptshifter import DB_PATH
			
 
				 from scriptshifter.exceptions import BREAK, ConfigError
			
 
				 
			
 
				 
			
 
				 __doc__ = """
			
 
				 Transliteration tables.
			
 
				 
			
 
				-These tables contain all transliteration information, grouped by script and
			
 
				-language (or language and script? TBD)
			
 
				+These tables contain all transliteration information. The static YML files are
			
 
				+transformed and loaded into a database, which is the effective data source at
			
 
				+runtime.
			
 
				 """
			
 
				 
			
 
				 
			
 
				+TMP_DB_PATH = path.join(
			
 
				+        path.dirname(DB_PATH), "~tmp." + path.basename(DB_PATH))
			
 
				+
			
 
				 DEFAULT_TABLE_DIR = path.join(path.dirname(path.realpath(__file__)), "data")
			
 
				 # Can be overridden for tests.
			
 
				 TABLE_DIR = environ.get("TXL_CONFIG_TABLE_DIR", DEFAULT_TABLE_DIR)
			
@@ -52,6 +61,11 @@ TOKEN_WB_MARKER = "%"
 
				 BOW = 1 << 1
			
 
				 EOW = 1 << 0
			
 
				 
			
 
				+# Feature flags used in database tables.
			
 
				+FEAT_S2R = 1 << 0       # Has S2R.
			
 
				+FEAT_R2S = 1 << 1       # Has R2S.
			
 
				+FEAT_CASEI = 1 << 2     # Case-insensitive script.
			
 
				+FEAT_RE = 1 << 3        # Regular expression.
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
@@ -123,6 +137,158 @@ class Token(str):
 
				         return hash(self.content)
			
 
				 
			
 
				 
			
 
				+def init_db():
			
 
				+    """
			
 
				+    Populate database with language data.
			
 
				+
			
 
				+    This operation removes any preexisting database.
			
 
				+
			
 
				+    All tables in the index file (`./data/index.yml`) will be parsed
			
 
				+    (including inheritance rules) and loaded into the designated DB.
			
 
				+
			
 
				+    This must be done only once at bootstrap. To update individual tables,
			
 
				+    see populate_table(), which this function calls iteratively.
			
 
				+    """
			
 
				+    # Create parent diretories if necessary.
			
 
				+    # If the DB already exists, it will be overwritten ONLY on success at
			
 
				+    # hhis point.
			
 
				+    if path.isfile(TMP_DB_PATH):
			
 
				+        # Remove previous temp file (possibly from failed attempt)
			
 
				+        unlink(TMP_DB_PATH)
			
 
				+    else:
			
 
				+        makedirs(path.dirname(TMP_DB_PATH), exist_ok=True)
			
 
				+
			
 
				+    conn = sqlite3.connect(TMP_DB_PATH)
			
 
				+
			
 
				+    # Initialize schema.
			
 
				+    with open(path.join(path.dirname(DEFAULT_TABLE_DIR), "init.sql")) as fh:
			
 
				+        with conn:
			
 
				+            conn.executescript(fh.read())
			
 
				+
			
 
				+    # Populate tables.
			
 
				+    with open(path.join(TABLE_DIR, "index.yml")) as fh:
			
 
				+        tlist = load(fh, Loader=Loader)
			
 
				+    try:
			
 
				+        with conn:
			
 
				+            for tname, tdata in tlist.items():
			
 
				+                res = conn.execute(
			
 
				+                    """INSERT INTO tbl_language (
			
 
				+                        name, label, marc_code, description
			
 
				+                    ) VALUES (?, ?, ?, ?)""",
			
 
				+                    (
			
 
				+                        tname, tdata.get("name"), tdata.get("marc_code"),
			
 
				+                        tdata.get("description"),
			
 
				+                    )
			
 
				+                )
			
 
				+                populate_table(conn, res.lastrowid, tname)
			
 
				+
			
 
				+        # If the DB already exists, it will be overwritten ONLY on success at
			
 
				+        # thhis point.
			
 
				+        move(TMP_DB_PATH, DB_PATH)
			
 
				+    finally:
			
 
				+        conn.close()
			
 
				+        if path.isfile(TMP_DB_PATH):
			
 
				+            # Remove leftover temp files from bungled up operation.
			
 
				+            unlink(TMP_DB_PATH)
			
 
				+
			
 
				+
			
 
				+def get_connection():
			
 
				+    """
			
 
				+    Get the default DB connection object.
			
 
				+
			
 
				+    To be closed by the caller or used as a context.
			
 
				+    """
			
 
				+    return sqlite3.connect(DB_PATH)
			
 
				+
			
 
				+
			
 
				+def populate_table(conn, tid, tname):
			
 
				+    data = load_table(tname)
			
 
				+    flags = 0
			
 
				+    if "script_to_roman" in data:
			
 
				+        flags |= FEAT_S2R
			
 
				+    if "roman_to_script" in data:
			
 
				+        flags |= FEAT_R2S
			
 
				+
			
 
				+    conn.execute(
			
 
				+            "UPDATE tbl_language SET features = ? WHERE id = ?",
			
 
				+            (flags, tid))
			
 
				+
			
 
				+    for t_dir in (FEAT_S2R, FEAT_R2S):
			
 
				+        # BEGIN per-section loop.
			
 
				+
			
 
				+        sec_name = (
			
 
				+                "script_to_roman" if t_dir == FEAT_S2R else "roman_to_script")
			
 
				+        sec = data.get(sec_name)
			
 
				+        if not sec:
			
 
				+            continue
			
 
				+
			
 
				+        # Transliteration map.
			
 
				+        sort = 1
			
 
				+        for k, v in sec.get("map", {}):
			
 
				+            conn.execute(
			
 
				+                    """INSERT INTO tbl_trans_map (
			
 
				+                        lang_id, dir, src, dest, sort
			
 
				+                    ) VALUES (?, ?, ?, ?, ?)""",
			
 
				+                    (tid, t_dir, k, v, sort))
			
 
				+            sort += 1
			
 
				+
			
 
				+        # hooks.
			
 
				+        for k, v in sec.get("hooks", {}).items():
			
 
				+            for i, hook_data in enumerate(v, start=1):
			
 
				+                conn.execute(
			
 
				+                        """INSERT INTO tbl_hook (
			
 
				+                            lang_id, dir, name, sort, module, fn, kwargs
			
 
				+                        ) VALUES (?, ?, ?, ?, ?, ?, ?)""",
			
 
				+                        (
			
 
				+                            tid, t_dir, k, i, hook_data[0],
			
 
				+                            hook_data[1].__name__, jdumps(hook_data[2])))
			
 
				+
			
 
				+        # Ignore rules (R2S only).
			
 
				+        for row in sec.get("ignore", []):
			
 
				+            if isinstance(row, dict):
			
 
				+                if "re" in row:
			
 
				+                    flags = FEAT_RE
			
 
				+                    rule = row["re"]
			
 
				+            else:
			
 
				+                flags = 0
			
 
				+                rule = row
			
 
				+
			
 
				+            conn.execute(
			
 
				+                    """INSERT INTO tbl_ignore (
			
 
				+                        lang_id, rule, features
			
 
				+                    ) VALUES (?, ?, ?)""",
			
 
				+                    (tid, rule, flags))
			
 
				+
			
 
				+        # Double caps (S2R only).
			
 
				+        for rule in sec.get("double_cap", []):
			
 
				+            conn.execute(
			
 
				+                    """INSERT INTO tbl_double_cap (
			
 
				+                        lang_id, rule
			
 
				+                    ) VALUES (?, ?)""",
			
 
				+                    (tid, rule))
			
 
				+
			
 
				+        # Normalize (S2R only).
			
 
				+        for src, dest in sec.get("normalize", {}).items():
			
 
				+            conn.execute(
			
 
				+                    """INSERT INTO tbl_normalize (lang_id, src, dest)
			
 
				+                    VALUES (?, ?, ?)""",
			
 
				+                    (tid, src, dest))
			
 
				+
			
 
				+        # END per-section loop.
			
 
				+
			
 
				+    # UI options
			
 
				+    for opt in data.get("options", []):
			
 
				+        conn.execute(
			
 
				+                """INSERT INTO tbl_option (
			
 
				+                    lang_id, name, label, description, dtype,
			
 
				+                    options, default_v
			
 
				+                ) VALUES (?, ?, ?, ?, ?, ?, ?)""",
			
 
				+                (
			
 
				+                    tid, opt["id"], opt["label"], opt["description"],
			
 
				+                    opt["type"], jdumps(opt.get("options")),
			
 
				+                    opt["default"]))
			
 
				+
			
 
				+
			
 
				 @cache
			
 
				 def list_tables():
			
 
				     """
			
@@ -131,16 +297,29 @@ def list_tables():
 
				     Note that this may not correspond to all the table files in the data
			
 
				     folder, but only those exposed in the index.
			
 
				     """
			
 
				-    with open(path.join(TABLE_DIR, "index.yml")) as fh:
			
 
				-        tdata = load(fh, Loader=Loader)
			
 
				+    conn = get_connection()
			
 
				+
			
 
				+    with conn:
			
 
				+        data = conn.execute(
			
 
				+                """SELECT name, label, features, marc_code, description
			
 
				+                FROM tbl_language""")
			
 
				+        tdata = {
			
 
				+            row[0]: {
			
 
				+                "label": row[1],
			
 
				+                "has_s2r": bool(row[2] & FEAT_S2R),
			
 
				+                "has_r2s": bool(row[2] & FEAT_R2S),
			
 
				+                "case_sensitive": not (row[2] & FEAT_CASEI),
			
 
				+                "marc_code": row[3],
			
 
				+                "description": row[4],
			
 
				+            } for row in data
			
 
				+        }
			
 
				 
			
 
				     return tdata
			
 
				 
			
 
				 
			
 
				-@cache
			
 
				 def load_table(tname):
			
 
				     """
			
 
				-    Load one transliteration table and possible parents.
			
 
				+    Parse one transliteration table and possible parents from YML files.
			
 
				 
			
 
				     The table file is parsed into an in-memory configuration that contains
			
 
				     the language & script metadata and parsing rules.
			
@@ -304,6 +483,176 @@ def load_hook_fn(cname, sec):
 
				                     f"Hook function {fnname} defined in {cname} configuration "
			
 
				                     f"not found in module {HOOK_PKG_PATH}.{modname}!"
			
 
				                 )
			
 
				-            hook_fn[cfg_hook].append((fn, fn_kwargs))
			
 
				+            hook_fn[cfg_hook].append((modname, fn, fn_kwargs))
			
 
				 
			
 
				     return hook_fn
			
 
				+
			
 
				+
			
 
				+@cache
			
 
				+def get_language(lang):
			
 
				+    """ Get all language options from the DB. """
			
 
				+
			
 
				+    conn = get_connection()
			
 
				+
			
 
				+    with conn:
			
 
				+        general = get_lang_general(conn, lang)
			
 
				+        lang_id = general["id"]
			
 
				+        data = general["data"]
			
 
				+
			
 
				+        # Normalization.
			
 
				+
			
 
				+        norm_data = get_lang_normalize(conn, lang_id)
			
 
				+        if len(norm_data):
			
 
				+            data["normalize"] = norm_data
			
 
				+
			
 
				+        # Script to Roman map and hooks.
			
 
				+
			
 
				+        if data["has_s2r"]:
			
 
				+            data["script_to_roman"] = {}
			
 
				+            s2r_map = tuple(
			
 
				+                    row for row in get_lang_map(conn, lang_id, FEAT_S2R))
			
 
				+            if len(s2r_map):
			
 
				+                data["script_to_roman"]["map"] = s2r_map
			
 
				+
			
 
				+            s2r_hooks = get_lang_hooks(conn, lang_id, FEAT_S2R)
			
 
				+            if len(s2r_hooks):
			
 
				+                data["script_to_roman"]["hooks"] = s2r_hooks
			
 
				+
			
 
				+        # Roman to script map, ignore list, and hooks.
			
 
				+
			
 
				+        if data["has_r2s"]:
			
 
				+            data["roman_to_script"] = {}
			
 
				+            r2s_map = tuple(
			
 
				+                    row for row in get_lang_map(conn, lang_id, FEAT_R2S))
			
 
				+            if len(r2s_map):
			
 
				+                data["roman_to_script"]["map"] = r2s_map
			
 
				+
			
 
				+            r2s_ignore = get_lang_ignore(conn, lang_id)
			
 
				+            if len(r2s_ignore):
			
 
				+                data["roman_to_script"]["ignore"] = r2s_ignore
			
 
				+
			
 
				+            r2s_hooks = get_lang_hooks(conn, lang_id, FEAT_R2S)
			
 
				+            if len(r2s_hooks):
			
 
				+                data["roman_to_script"]["hooks"] = r2s_hooks
			
 
				+
			
 
				+        opt_data = get_lang_options(conn, lang_id)
			
 
				+        if len(opt_data):
			
 
				+            data["options"] = opt_data
			
 
				+
			
 
				+        double_cap = get_lang_dcap(conn, lang_id)
			
 
				+        if len(double_cap):
			
 
				+            data["double_cap"] = double_cap
			
 
				+
			
 
				+    conn.close()
			
 
				+
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+def get_lang_general(conn, lang):
			
 
				+    """ Language general attributes. """
			
 
				+    lang_q = conn.execute(
			
 
				+            """SELECT id, name, label, features, marc_code, description
			
 
				+            FROM tbl_language WHERE name = ?""", (lang,))
			
 
				+    lang_data = lang_q.fetchone()
			
 
				+
			
 
				+    return {
			
 
				+        "id": lang_data[0],
			
 
				+        "data": {
			
 
				+            "name": lang_data[1],
			
 
				+            "label": lang_data[2],
			
 
				+            "has_s2r": bool(lang_data[3] & FEAT_S2R),
			
 
				+            "has_r2s": bool(lang_data[3] & FEAT_R2S),
			
 
				+            "case_sensitive": not (lang_data[3] & FEAT_CASEI),
			
 
				+            "marc_code": lang_data[4],
			
 
				+            "description": lang_data[5],
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def get_lang_normalize(conn, lang_id):
			
 
				+    qry = conn.execute(
			
 
				+            """SELECT src, dest FROM tbl_normalize
			
 
				+            WHERE lang_id = ?""",
			
 
				+            (lang_id,))
			
 
				+    return {row[0]: row[1] for row in qry}
			
 
				+
			
 
				+
			
 
				+def get_lang_ignore(conn, lang_id):
			
 
				+    """
			
 
				+    Ignore list as a tuple.
			
 
				+    """
			
 
				+    qry = conn.execute(
			
 
				+            """SELECT rule, features FROM tbl_ignore
			
 
				+            WHERE lang_id = ?""",
			
 
				+            (lang_id,))
			
 
				+    # Features (regular expressions) not implemented yet.
			
 
				+    return tuple(row[0] for row in qry)
			
 
				+
			
 
				+
			
 
				+@cache
			
 
				+def get_lang_map(conn, lang_id, t_dir):
			
 
				+    """
			
 
				+    S2R or R2S map.
			
 
				+
			
 
				+    Generator of tuples (source, destination).
			
 
				+    """
			
 
				+    qry = conn.execute(
			
 
				+            """SELECT src, dest FROM tbl_trans_map
			
 
				+            WHERE lang_id = ? AND dir = ?
			
 
				+            ORDER BY sort ASC""",
			
 
				+            (lang_id, t_dir))
			
 
				+
			
 
				+    for row in qry:
			
 
				+        yield (Token(row[0]), row[1])
			
 
				+
			
 
				+
			
 
				+def get_lang_options(conn, lang_id):
			
 
				+    """ Language options as a tuple of dictionaries. """
			
 
				+    qry = conn.execute(
			
 
				+            """SELECT name, label, description, dtype, options, default_v
			
 
				+            FROM tbl_option
			
 
				+            WHERE lang_id = ?""",
			
 
				+            (lang_id,))
			
 
				+
			
 
				+    return tuple(
			
 
				+        {
			
 
				+            "id": row[0],
			
 
				+            "label": row[1],
			
 
				+            "description": row[2],
			
 
				+            "type": row[3],
			
 
				+            "options": jloads(row[4]) if row[4] else None,
			
 
				+            "default": row[5],
			
 
				+        }
			
 
				+        for row in qry
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def get_lang_hooks(conn, lang_id, t_dir):
			
 
				+    """ Language hooks in sorting order. """
			
 
				+    hooks = defaultdict(list)
			
 
				+
			
 
				+    qry = conn.execute(
			
 
				+            """SELECT name, module, fn, kwargs
			
 
				+            FROM tbl_hook WHERE lang_id = ? AND dir = ?
			
 
				+            ORDER BY name, sort""",
			
 
				+            (lang_id, t_dir))
			
 
				+
			
 
				+    for row in qry:
			
 
				+        hooks[row[0]].append(
			
 
				+            {
			
 
				+                "module_name": row[1],
			
 
				+                "fn_name": row[2],
			
 
				+                "kwargs": jloads(row[3]),
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    return hooks
			
 
				+
			
 
				+
			
 
				+def get_lang_dcap(conn, lang_id):
			
 
				+    qry = conn.execute(
			
 
				+            """SELECT rule
			
 
				+            FROM tbl_double_cap WHERE lang_id = ?""",
			
 
				+            (lang_id,))
			
 
				+
			
 
				+    return tuple(row[0] for row in qry)
			
--- a/scriptshifter/tables/data/asian_cyrillic.yml
+++ b/scriptshifter/tables/data/asian_cyrillic.yml
@@ -391,8 +391,8 @@ roman_to_script:
 
				 script_to_roman:

			
 
				   map:

			
 
				     

			
 
				-    "\u00AB": """

			
 
				-    "\u00BB": """

			
 
				+    "\u00AB": "\""

			
 
				+    "\u00BB": "\""

			
 
				     "\u2116": "No\u0332"

			
 
				     "\u0400": "E\u0300"

			
 
				     "\u0401": "E\u0308"

			
--- a/scriptshifter/tables/data/bashkir_cyrillic.yml
+++ b/scriptshifter/tables/data/bashkir_cyrillic.yml
@@ -23,7 +23,7 @@ roman_to_script:
 
				     "U\u0307": "\u04AE"

			
 
				     "u\u0307": "\u04AF"

			
 
				     "TH": "\u04AA"

			
 
				-    "Th": "\u04AA"s

			
 
				+    "Th": "\u04AA"

			
 
				     "th": "\u04AB"

			
 
				     "J": "\u04B8"

			
 
				     "j": "\u04B9"

			
--- a/scriptshifter/tables/data/index.yml
+++ b/scriptshifter/tables/data/index.yml
@@ -47,7 +47,7 @@ bengali:
 
				 bulgarian:
			
 
				   marc_code: bul
			
 
				   name: Bulgarian
			
 
				-buriat:
			
 
				+buriat_cyrillic:
			
 
				   marc_code: bua
			
 
				   name: Buriat (Cyrillic)
			
 
				 burmese:
			
@@ -111,7 +111,7 @@ kannada:
 
				 kara-kalpak_cyrillic:
			
 
				   marc_code: kaa
			
 
				   name: Kara-Kalpak (Cyrillic)
			
 
				-karachai-balkar_cyrillic:
			
 
				+karachay-balkar_cyrillic:
			
 
				   marc_code: krc
			
 
				   name: Karachay-Balkar  (Cyrillic)
			
 
				 karelian_cyrillic:
			
--- a/scriptshifter/tables/data/kara-kalpak_cyrillic.yml
+++ b/scriptshifter/tables/data/kara-kalpak_cyrillic.yml
@@ -27,11 +27,11 @@ roman_to_script:
 
				 script_to_roman:

			
 
				   map:

			
 
				     "\u040E": "W"

			
 
				-    "\u045E"" "w"

			
 
				+    "\u045E": "w"

			
 
				     "\u0492": "Gh"

			
 
				     "\u0493": "gh"

			
 
				     "\u049A": "Q"

			
 
				-    "\u-49B": "q"

			
 
				+    "\u049B": "q"

			
 
				     "\u04A2": "N\uFE20G\uFE21"

			
 
				     "\u04A3": "n\uFE20g\uFE21"

			
 
				     "\u04AE": "U\u0307"

			
--- a/scriptshifter/tables/data/komi_cyrillic.yml
+++ b/scriptshifter/tables/data/komi_cyrillic.yml
@@ -5,10 +5,10 @@ general:
 
				 

			
 
				 roman_to_script:

			
 
				   map:

			
 
				-    "D\u0320Z\u0320\H\u\0320": "\u0496"

			
 
				-    "D\u0320Z\u0320\h\u\0320": "\u0496"

			
 
				-    "D\u0320z\u0320\h\u\0320": "\u0496"

			
 
				-    "d\u0320z\u0320\h\u\0320": "\u0497"

			
 
				+    "D\u0320Z\u0320H\u0320": "\u0496"

			
 
				+    "D\u0320Z\u0320h\u0320": "\u0496"

			
 
				+    "D\u0320z\u0320h\u0320": "\u0496"

			
 
				+    "d\u0320z\u0320h\u0320": "\u0497"

			
 
				     "D\uFE20Z\uFE21": "\u0506"

			
 
				     "D\uFE20z\uFE21": "\u0506"

			
 
				     "d\uFE20z\uFE21": "\u0507"

			
--- a/scriptshifter/tables/data/mongolian_mongol_bichig.yml
+++ b/scriptshifter/tables/data/mongolian_mongol_bichig.yml
@@ -6,12 +6,12 @@ general:
 
				 roman_to_script:

			
 
				 

			
 
				   map:

			
 
				-    "\u002Daca": "\u202F\u1820\u1834\u1820

			
 
				+    "\u002Daca": "\u202F\u1820\u1834\u1820"

			
 
				     "\u002DA": "\u180E\u1820"

			
 
				     "\u002Da": "\u180E\u1820"

			
 
				     "A": "\u1820"

			
 
				     "a": "\u1820"

			
 
				-    "\u002Dece": "\u202F\u1821\u1834\u1821

			
 
				+    "\u002Dece": "\u202F\u1821\u1834\u1821"

			
 
				     "\u002DE": "\u180E\u1821"

			
 
				     "\u002De": "\u180E\u1821"

			
 
				     "\u002D": "\u202F"

			
--- a/scriptshifter/tables/data/yiddish.yml
+++ b/scriptshifter/tables/data/yiddish.yml
@@ -4,7 +4,7 @@ general:
 
				 options:
			
 
				   - id: loshn_koydesh
			
 
				     label: Loshn Koydesh
			
 
				-    description: [TODO]
			
 
				+    description: "Apply Loshn Koydesh vocalization."
			
 
				     type: boolean
			
 
				     default: false
			
 
				 
			
--- a/scriptshifter/tables/init.sql
+++ b/scriptshifter/tables/init.sql
@@ -0,0 +1,107 @@
 
				+/*
			
 
				+ * Master language table.
			
 
				+ *
			
 
				+ * Overview of languages available in Scriptshifter.
			
 
				+ */
			
 
				+CREATE TABLE tbl_language (
			
 
				+    id INTEGER PRIMARY KEY,
			
 
				+    name TEXT UNIQUE,
			
 
				+    label TEXT,
			
 
				+    marc_code TEXT,
			
 
				+    description TEXT,
			
 
				+    features TINYINT DEFAULT 0
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Transliteration maps.
			
 
				+ *
			
 
				+ * Each row is a S2R or R2S pair associated with a language ID.
			
 
				+ */
			
 
				+CREATE TABLE tbl_trans_map (
			
 
				+    id INTEGER PRIMARY KEY,
			
 
				+    lang_id INTEGER NOT NULL,
			
 
				+    dir TINYINT NOT NULL DEFAULT 0,  /* 1 = S2R; 2 = R2S */
			
 
				+    src TEXT NOT NULL,
			
 
				+    dest TEXT,
			
 
				+    sort INT NOT NULL,  /* Smaller values have higher priority. */
			
 
				+
			
 
				+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
			
 
				+);
			
 
				+CREATE UNIQUE INDEX idx_trans_lookup ON tbl_trans_map (lang_id, dir, src);
			
 
				+CREATE INDEX idx_trans_map_sort ON tbl_trans_map (sort ASC);
			
 
				+
			
 
				+/*
			
 
				+ * Processing hooks.
			
 
				+ *
			
 
				+ * Note that multiple functions may be grouped under the same hook, lang, and
			
 
				+ * direction. These are ordered by `sort`.
			
 
				+ */
			
 
				+CREATE TABLE tbl_hook (
			
 
				+    id INTEGER PRIMARY KEY,
			
 
				+    lang_id INTEGER NOT NULL,
			
 
				+    dir TINYINT NOT NULL DEFAULT 0,  /* 1 = S2R; 2 = R2S */
			
 
				+    name TEXT NOT NULL, /* Hook name. */
			
 
				+    sort INT NOT NULL,  /* Function sorting order within the hook. */
			
 
				+    module TEXT NOT NULL, /* Module name. */
			
 
				+    fn TEXT NOT NULL,   /* Function name. */
			
 
				+    kwargs TEXT,        /* KW arguments as JSON blob. */
			
 
				+
			
 
				+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
			
 
				+);
			
 
				+CREATE INDEX idx_hook_lookup ON tbl_hook (lang_id, dir);
			
 
				+CREATE INDEX idx_hookname_lookup ON tbl_hook (name);
			
 
				+CREATE INDEX idx_hook_sort ON tbl_hook (sort ASC);
			
 
				+
			
 
				+/*
			
 
				+ * Ignore lists for R2S.
			
 
				+ */
			
 
				+CREATE TABLE tbl_ignore (
			
 
				+    id INTEGER PRIMARY KEY,
			
 
				+    lang_id INTEGER NOT NULL,
			
 
				+    rule TEXT NOT NULL,
			
 
				+    features TINYINT,  /* 1 = case insensitive; 2 = regular expression. */
			
 
				+
			
 
				+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Double capitals.
			
 
				+ */
			
 
				+CREATE TABLE tbl_double_cap (
			
 
				+    id INTEGER PRIMARY KEY,
			
 
				+    lang_id INTEGER NOT NULL,
			
 
				+    rule TEXT NOT NULL,
			
 
				+
			
 
				+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Normalization rules.
			
 
				+ */
			
 
				+CREATE TABLE tbl_normalize (
			
 
				+    id INTEGER PRIMARY KEY,
			
 
				+    lang_id INTEGER NOT NULL,
			
 
				+    src TEXT NOT NULL,
			
 
				+    dest TEXT NOT NULL,
			
 
				+
			
 
				+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Input options.
			
 
				+ */
			
 
				+CREATE TABLE tbl_option (
			
 
				+    id INTEGER PRIMARY KEY,
			
 
				+    lang_id INTEGER NOT NULL,
			
 
				+    name TEXT NOT NULL,
			
 
				+    label TEXT NOT NULL,
			
 
				+    description TEXT,
			
 
				+    dtype TEXT,
			
 
				+    options TEXT,
			
 
				+    default_v TEXT,
			
 
				+
			
 
				+    FOREIGN KEY (lang_id) REFERENCES tbl_language(id) ON DELETE CASCADE
			
 
				+);
			
 
				+CREATE UNIQUE INDEX idx_option_lookup ON tbl_option (lang_id, name);
			
 
				+
			
 
				+
			
--- a/scriptshifter/templates/index.html
+++ b/scriptshifter/templates/index.html
@@ -60,7 +60,7 @@
 
				             <label for="lang">Language</label>
			
 
				             <select id="lang" name="lang">
			
 
				                 {% for k, v in languages.items() %}
			
 
				-                    <option value="{{ k }}">{{ v["name"] }}</option>
			
 
				+                    <option value="{{ k }}">{{ v["label"] }}</option>
			
 
				                 {% endfor %}
			
 
				             </select>
			
 
				         </fieldset>
			
@@ -176,4 +176,4 @@
 
				     {% endif %}
			
 
				 
			
 
				     <script type="text/javascript" src="/static/ss.js"></script>
			
 
				-{% endblock %}
			
 
				+{% endblock %}
			
--- a/scriptshifter/trans.py
+++ b/scriptshifter/trans.py
@@ -1,9 +1,13 @@
 
				 import logging
			
 
				 
			
 
				+from importlib import import_module
			
 
				 from re import compile
			
 
				 
			
 
				 from scriptshifter.exceptions import BREAK, CONT
			
 
				-from scriptshifter.tables import BOW, EOW, WORD_BOUNDARY, load_table
			
 
				+from scriptshifter.tables import (
			
 
				+        BOW, EOW, WORD_BOUNDARY, FEAT_R2S, FEAT_S2R, HOOK_PKG_PATH,
			
 
				+        get_connection, get_lang_dcap, get_lang_general, get_lang_hooks,
			
 
				+        get_lang_ignore, get_lang_map, get_lang_normalize)
			
 
				 
			
 
				 
			
 
				 # Match multiple spaces.
			
@@ -15,6 +19,8 @@ logger = logging.getLogger(__name__)
 
				 class Context:
			
 
				     """
			
 
				     Context used within the transliteration and passed to hook functions.
			
 
				+
			
 
				+    Use within a `with` block for proper cleanup.
			
 
				     """
			
 
				     @property
			
 
				     def src(self):
			
@@ -28,23 +34,35 @@ class Context:
 
				     def src(self):
			
 
				         raise NotImplementedError("Attribute is read-only.")
			
 
				 
			
 
				-    def __init__(self, src, general, langsec, options={}):
			
 
				+    def __init__(self, lang, src, t_dir, options={}):
			
 
				         """
			
 
				         Initialize a context.
			
 
				 
			
 
				         Args:
			
 
				             src (str): The original text. Read-only.
			
 
				-            general (dict): general section of the current config.
			
 
				-            langsec (dict): Language configuration section being used.
			
 
				+            t_dir (int): the direction of transliteration.
			
 
				+                    Either FEAT_R2S or FEAT_S2R.
			
 
				             options (dict): extra options as a dict.
			
 
				         """
			
 
				+        self.lang = lang
			
 
				         self._src = src
			
 
				-        self.general = general
			
 
				+        self.t_dir = t_dir
			
 
				+        self.conn = get_connection()
			
 
				+        with self.conn as conn:
			
 
				+            general = get_lang_general(conn, self.lang)
			
 
				+        self.general = general["data"]
			
 
				+        self.lang_id = general["id"]
			
 
				         self.options = options
			
 
				-        self.langsec = langsec
			
 
				+        self.hooks = get_lang_hooks(self.conn, self.lang_id, self.t_dir)
			
 
				         self.dest_ls = []
			
 
				         self.warnings = []
			
 
				 
			
 
				+    def __enter__(self):
			
 
				+        return self
			
 
				+
			
 
				+    def __exit__(self, exc_type, exc_value, traceback):
			
 
				+        self.conn.close()
			
 
				+
			
 
				 
			
 
				 def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
			
 
				     """
			
@@ -73,234 +91,225 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
 
				     Return:
			
 
				         str: The transliterated string.
			
 
				     """
			
 
				-    source_str = "Latin" if t_dir == "r2s" else lang
			
 
				-    target_str = lang if t_dir == "r2s" else "Latin"
			
 
				-    logger.info(f"Transliteration is from {source_str} to {target_str}.")
			
 
				-
			
 
				-    cfg = load_table(lang)
			
 
				-    logger.info(f"Loaded table for {lang}.")
			
 
				-
			
 
				-    # General directives.
			
 
				-    general = cfg.get("general", {})
			
 
				-
			
 
				-    if t_dir == "s2r" and "script_to_roman" not in cfg:
			
 
				-        raise NotImplementedError(
			
 
				-            f"Script-to-Roman transliteration not yet supported for {lang}."
			
 
				-        )
			
 
				-    elif t_dir == "r2s" and "roman_to_script" not in cfg:
			
 
				-        raise NotImplementedError(
			
 
				-            f"Roman-to-script transliteration not yet supported for {lang}."
			
 
				-        )
			
 
				+    # Map t_dir to constant.
			
 
				+    t_dir = FEAT_S2R if t_dir == "s2r" else FEAT_R2S
			
 
				 
			
 
				-    langsec = (
			
 
				-            cfg["script_to_roman"] if t_dir == "s2r"
			
 
				-            else cfg["roman_to_script"])
			
 
				-    # langsec_dir = langsec.get("directives", {})
			
 
				-    langsec_hooks = langsec.get("hooks", {})
			
 
				+    source_str = "Roman" if t_dir == FEAT_R2S else lang
			
 
				+    target_str = lang if t_dir == FEAT_R2S else "Roman"
			
 
				+    logger.info(f"Transliteration is from {source_str} to {target_str}.")
			
 
				 
			
 
				     src = src.strip()
			
 
				     options["capitalize"] = capitalize
			
 
				-    ctx = Context(src, general, langsec, options)
			
 
				-
			
 
				-    # This hook may take over the whole transliteration process or delegate it
			
 
				-    # to some external process, and return the output string directly.
			
 
				-    if _run_hook("post_config", ctx, langsec_hooks) == BREAK:
			
 
				-        return getattr(ctx, "dest", ""), ctx.warnings
			
 
				-
			
 
				-    if "normalize" in ctx.langsec:
			
 
				-        _normalize_src(ctx)
			
 
				-
			
 
				-    if _run_hook("post_normalize", ctx, langsec_hooks) == BREAK:
			
 
				-        return getattr(ctx, "dest", ""), ctx.warnings
			
 
				-
			
 
				-    # Loop through source characters. The increment of each loop depends on
			
 
				-    # the length of the token that eventually matches.
			
 
				-    ignore_list = langsec.get("ignore", [])  # Only present in R2S
			
 
				-    ctx.cur = 0
			
 
				-    word_boundary = langsec.get("word_boundary", WORD_BOUNDARY)
			
 
				-
			
 
				-    while ctx.cur < len(ctx.src):
			
 
				-        # Reset cursor position flags.
			
 
				-        # Carry over extended "beginning of word" flag.
			
 
				-        ctx.cur_flags = 0
			
 
				-        cur_char = ctx.src[ctx.cur]
			
 
				-
			
 
				-        # Look for a word boundary and flag word beginning/end it if found.
			
 
				-        if _is_bow(ctx.cur, ctx, word_boundary):
			
 
				-            # Beginning of word.
			
 
				-            logger.debug(f"Beginning of word at position {ctx.cur}.")
			
 
				-            ctx.cur_flags |= BOW
			
 
				-        if _is_eow(ctx.cur, ctx, word_boundary):
			
 
				-            # End of word.
			
 
				-            logger.debug(f"End of word at position {ctx.cur}.")
			
 
				-            ctx.cur_flags |= EOW
			
 
				-
			
 
				-        # This hook may skip the parsing of the current
			
 
				-        # token or exit the scanning loop altogether.
			
 
				-        hret = _run_hook("begin_input_token", ctx, langsec_hooks)
			
 
				-        if hret == BREAK:
			
 
				-            logger.debug("Breaking text scanning from hook signal.")
			
 
				-            break
			
 
				-        if hret == CONT:
			
 
				-            logger.debug("Skipping scanning iteration from hook signal.")
			
 
				-            continue
			
 
				-
			
 
				-        # Check ignore list. Find as many subsequent ignore tokens
			
 
				-        # as possible before moving on to looking for match tokens.
			
 
				-        ctx.tk = None
			
 
				-        while True:
			
 
				-            ctx.ignoring = False
			
 
				-            for ctx.tk in ignore_list:
			
 
				-                hret = _run_hook("pre_ignore_token", ctx, langsec_hooks)
			
 
				-                if hret == BREAK:
			
 
				-                    break
			
 
				-                if hret == CONT:
			
 
				-                    continue
			
 
				+    with Context(lang, src, t_dir, options) as ctx:
			
 
				+
			
 
				+        if t_dir == FEAT_S2R and not ctx.general["has_s2r"]:
			
 
				+            raise NotImplementedError(
			
 
				+                f"Script-to-Roman not yet supported for {lang}."
			
 
				+            )
			
 
				+        if t_dir == FEAT_R2S and not ctx.general["has_r2s"]:
			
 
				+            raise NotImplementedError(
			
 
				+                f"Roman-to-script not yet supported for {lang}."
			
 
				+            )
			
 
				+
			
 
				+        # This hook may take over the whole transliteration process or delegate
			
 
				+        # it to some external process, and return the output string directly.
			
 
				+        if _run_hook("post_config", ctx) == BREAK:
			
 
				+            return getattr(ctx, "dest", ""), ctx.warnings
			
 
				+
			
 
				+        _normalize_src(ctx, get_lang_normalize(ctx.conn, ctx.lang_id))
			
 
				+
			
 
				+        if _run_hook("post_normalize", ctx) == BREAK:
			
 
				+            return getattr(ctx, "dest", ""), ctx.warnings
			
 
				+
			
 
				+        lang_map = list(get_lang_map(ctx.conn, ctx.lang_id, ctx.t_dir))
			
 
				+
			
 
				+        # Loop through source characters. The increment of each loop depends on
			
 
				+        # the length of the token that eventually matches.
			
 
				+        ctx.cur = 0
			
 
				+
			
 
				+        while ctx.cur < len(ctx.src):
			
 
				+            # Reset cursor position flags.
			
 
				+            # Carry over extended "beginning of word" flag.
			
 
				+            ctx.cur_flags = 0
			
 
				+            cur_char = ctx.src[ctx.cur]
			
 
				+
			
 
				+            # Look for a word boundary and flag word beginning/end it if found.
			
 
				+            if _is_bow(ctx.cur, ctx, WORD_BOUNDARY):
			
 
				+                # Beginning of word.
			
 
				+                logger.debug(f"Beginning of word at position {ctx.cur}.")
			
 
				+                ctx.cur_flags |= BOW
			
 
				+            if _is_eow(ctx.cur, ctx, WORD_BOUNDARY):
			
 
				+                # End of word.
			
 
				+                logger.debug(f"End of word at position {ctx.cur}.")
			
 
				+                ctx.cur_flags |= EOW
			
 
				+
			
 
				+            # This hook may skip the parsing of the current
			
 
				+            # token or exit the scanning loop altogether.
			
 
				+            hret = _run_hook("begin_input_token", ctx)
			
 
				+            if hret == BREAK:
			
 
				+                logger.debug("Breaking text scanning from hook signal.")
			
 
				+                break
			
 
				+            if hret == CONT:
			
 
				+                logger.debug("Skipping scanning iteration from hook signal.")
			
 
				+                continue
			
 
				 
			
 
				-                step = len(ctx.tk)
			
 
				-                if ctx.tk == ctx.src[ctx.cur:ctx.cur + step]:
			
 
				-                    # The position matches an ignore token.
			
 
				-                    hret = _run_hook("on_ignore_match", ctx, langsec_hooks)
			
 
				+            # Check ignore list. Find as many subsequent ignore tokens
			
 
				+            # as possible before moving on to looking for match tokens.
			
 
				+            ctx.tk = None
			
 
				+            while True:
			
 
				+                ctx.ignoring = False
			
 
				+                for ctx.tk in get_lang_ignore(ctx.conn, ctx.lang_id):
			
 
				+                    hret = _run_hook("pre_ignore_token", ctx)
			
 
				                     if hret == BREAK:
			
 
				                         break
			
 
				                     if hret == CONT:
			
 
				                         continue
			
 
				 
			
 
				-                    logger.info(f"Ignored token: {ctx.tk}")
			
 
				-                    ctx.dest_ls.append(ctx.tk)
			
 
				-                    ctx.cur += step
			
 
				-                    cur_char = ctx.src[ctx.cur]
			
 
				-                    ctx.ignoring = True
			
 
				+                    step = len(ctx.tk)
			
 
				+                    if ctx.tk == ctx.src[ctx.cur:ctx.cur + step]:
			
 
				+                        # The position matches an ignore token.
			
 
				+                        hret = _run_hook("on_ignore_match", ctx)
			
 
				+                        if hret == BREAK:
			
 
				+                            break
			
 
				+                        if hret == CONT:
			
 
				+                            continue
			
 
				+
			
 
				+                        logger.info(f"Ignored token: {ctx.tk}")
			
 
				+                        ctx.dest_ls.append(ctx.tk)
			
 
				+                        ctx.cur += step
			
 
				+                        cur_char = ctx.src[ctx.cur]
			
 
				+                        ctx.ignoring = True
			
 
				+                        break
			
 
				+                # We looked through all ignore tokens, not found any. Move on.
			
 
				+                if not ctx.ignoring:
			
 
				                     break
			
 
				-            # We looked through all ignore tokens, not found any. Move on.
			
 
				-            if not ctx.ignoring:
			
 
				-                break
			
 
				-            # Otherwise, if we found a match, check if the next position may be
			
 
				-            # ignored as well.
			
 
				-
			
 
				-        delattr(ctx, "tk")
			
 
				-        delattr(ctx, "ignoring")
			
 
				-
			
 
				-        # Begin transliteration token lookup.
			
 
				-        ctx.match = False
			
 
				-
			
 
				-        for ctx.src_tk, ctx.dest_str in langsec["map"]:
			
 
				-            hret = _run_hook("pre_tx_token", ctx, langsec_hooks)
			
 
				-            if hret == BREAK:
			
 
				-                break
			
 
				-            if hret == CONT:
			
 
				-                continue
			
 
				+                # Otherwise, if we found a match, check if the next position
			
 
				+                # may be ignored as well.
			
 
				 
			
 
				-            step = len(ctx.src_tk.content)
			
 
				-            # If the token is longer than the remaining of the string,
			
 
				-            # it surely won't match.
			
 
				-            if ctx.cur + step > len(ctx.src):
			
 
				-                continue
			
 
				+            delattr(ctx, "tk")
			
 
				+            delattr(ctx, "ignoring")
			
 
				 
			
 
				-            # If the first character of the token is greater (= higher code
			
 
				-            # point value) than the current character, then break the loop
			
 
				-            # without a match, because we know there won't be any more match
			
 
				-            # due to the alphabetical ordering.
			
 
				-            if ctx.src_tk.content[0] > cur_char:
			
 
				-                logger.debug(
			
 
				-                        f"{ctx.src_tk.content} is after "
			
 
				-                        f"{ctx.src[ctx.cur:ctx.cur + step]}. Breaking loop.")
			
 
				-                break
			
 
				+            # Begin transliteration token lookup.
			
 
				+            ctx.match = False
			
 
				 
			
 
				-            # If src_tk has a WB flag but the token is not at WB, skip.
			
 
				-            if (
			
 
				-                (ctx.src_tk.flags & BOW and not ctx.cur_flags & BOW)
			
 
				-                or
			
 
				-                # Can't rely on EOW flag, we must check on the last character
			
 
				-                # of the potential match.
			
 
				-                (ctx.src_tk.flags & EOW and not _is_eow(
			
 
				-                        ctx.cur + step - 1, ctx, word_boundary))
			
 
				-            ):
			
 
				-                continue
			
 
				-
			
 
				-            # Longer tokens should be guaranteed to be scanned before their
			
 
				-            # substrings at this point.
			
 
				-            # Similarly, flagged tokens are evaluated first.
			
 
				-            if ctx.src_tk.content == ctx.src[ctx.cur:ctx.cur + step]:
			
 
				-                ctx.match = True
			
 
				-                # This hook may skip this token or break out of the token
			
 
				-                # lookup for the current position.
			
 
				-                hret = _run_hook("on_tx_token_match", ctx, langsec_hooks)
			
 
				+            for ctx.src_tk, ctx.dest_str in lang_map:
			
 
				+                hret = _run_hook("pre_tx_token", ctx)
			
 
				                 if hret == BREAK:
			
 
				                     break
			
 
				                 if hret == CONT:
			
 
				                     continue
			
 
				 
			
 
				-                # A match is found. Stop scanning tokens, append result, and
			
 
				-                # proceed scanning the source.
			
 
				+                step = len(ctx.src_tk.content)
			
 
				+                # If the token is longer than the remaining of the string,
			
 
				+                # it surely won't match.
			
 
				+                if ctx.cur + step > len(ctx.src):
			
 
				+                    continue
			
 
				 
			
 
				-                # Capitalization.
			
 
				+                # If the first character of the token is greater (= higher code
			
 
				+                # point value) than the current character, then break the loop
			
 
				+                # without a match, because we know there won't be any more
			
 
				+                # match due to the alphabetical ordering.
			
 
				+                if ctx.src_tk.content[0] > cur_char:
			
 
				+                    logger.debug(
			
 
				+                            f"{ctx.src_tk.content} is after "
			
 
				+                            f"{ctx.src[ctx.cur:ctx.cur + step]}. "
			
 
				+                            "Breaking loop.")
			
 
				+                    break
			
 
				+
			
 
				+                # If src_tk has a WB flag but the token is not at WB, skip.
			
 
				                 if (
			
 
				-                    (ctx.options["capitalize"] == "first" and ctx.cur == 0)
			
 
				+                    (ctx.src_tk.flags & BOW and not ctx.cur_flags & BOW)
			
 
				                     or
			
 
				-                    (
			
 
				-                        ctx.options["capitalize"] == "all"
			
 
				-                        and ctx.cur_flags & BOW
			
 
				-                    )
			
 
				+                    # Can't rely on EOW flag, we must check on the last
			
 
				+                    # character of the potential match.
			
 
				+                    (ctx.src_tk.flags & EOW and not _is_eow(
			
 
				+                            ctx.cur + step - 1, ctx, WORD_BOUNDARY))
			
 
				                 ):
			
 
				-                    logger.info("Capitalizing token.")
			
 
				-                    double_cap = False
			
 
				-                    for dcap_rule in ctx.langsec.get("double_cap", []):
			
 
				-                        if ctx.dest_str == dcap_rule:
			
 
				-                            ctx.dest_str = ctx.dest_str.upper()
			
 
				-                            double_cap = True
			
 
				-                            break
			
 
				-                    if not double_cap:
			
 
				-                        ctx.dest_str = (
			
 
				-                                ctx.dest_str[0].upper() + ctx.dest_str[1:])
			
 
				+                    continue
			
 
				 
			
 
				-                ctx.dest_ls.append(ctx.dest_str)
			
 
				-                ctx.cur += step
			
 
				-                break
			
 
				+                # Longer tokens should be guaranteed to be scanned before their
			
 
				+                # substrings at this point.
			
 
				+                # Similarly, flagged tokens are evaluated first.
			
 
				+                if ctx.src_tk.content == ctx.src[ctx.cur:ctx.cur + step]:
			
 
				+                    ctx.match = True
			
 
				+                    # This hook may skip this token or break out of the token
			
 
				+                    # lookup for the current position.
			
 
				+                    hret = _run_hook("on_tx_token_match", ctx)
			
 
				+                    if hret == BREAK:
			
 
				+                        break
			
 
				+                    if hret == CONT:
			
 
				+                        continue
			
 
				 
			
 
				-        if ctx.match is False:
			
 
				-            delattr(ctx, "match")
			
 
				-            hret = _run_hook("on_no_tx_token_match", ctx, langsec_hooks)
			
 
				-            if hret == BREAK:
			
 
				-                break
			
 
				-            if hret == CONT:
			
 
				-                continue
			
 
				+                    # A match is found. Stop scanning tokens, append result,
			
 
				+                    # and proceed scanning the source.
			
 
				+
			
 
				+                    # Capitalization.
			
 
				+                    if (
			
 
				+                        (ctx.options["capitalize"] == "first" and ctx.cur == 0)
			
 
				+                        or
			
 
				+                        (
			
 
				+                            ctx.options["capitalize"] == "all"
			
 
				+                            and ctx.cur_flags & BOW
			
 
				+                        )
			
 
				+                    ):
			
 
				+                        logger.info("Capitalizing token.")
			
 
				+                        double_cap = False
			
 
				+                        for dcap_rule in get_lang_dcap(ctx.conn, ctx.lang_id):
			
 
				+                            if ctx.dest_str == dcap_rule:
			
 
				+                                ctx.dest_str = ctx.dest_str.upper()
			
 
				+                                double_cap = True
			
 
				+                                break
			
 
				+                        if not double_cap:
			
 
				+                            ctx.dest_str = (
			
 
				+                                    ctx.dest_str[0].upper() + ctx.dest_str[1:])
			
 
				+
			
 
				+                    ctx.dest_ls.append(ctx.dest_str)
			
 
				+                    ctx.cur += step
			
 
				+                    break
			
 
				+
			
 
				+            if ctx.match is False:
			
 
				+                delattr(ctx, "match")
			
 
				+                hret = _run_hook("on_no_tx_token_match", ctx)
			
 
				+                if hret == BREAK:
			
 
				+                    break
			
 
				+                if hret == CONT:
			
 
				+                    continue
			
 
				 
			
 
				-            # No match found. Copy non-mapped character (one at a time).
			
 
				-            logger.info(
			
 
				-                    f"Token {cur_char} (\\u{hex(ord(cur_char))[2:]}) "
			
 
				-                    f"at position {ctx.cur} is not mapped.")
			
 
				-            ctx.dest_ls.append(cur_char)
			
 
				-            ctx.cur += 1
			
 
				-        else:
			
 
				-            delattr(ctx, "match")
			
 
				-        delattr(ctx, "cur_flags")
			
 
				+                # No match found. Copy non-mapped character (one at a time).
			
 
				+                logger.info(
			
 
				+                        f"Token {cur_char} (\\u{hex(ord(cur_char))[2:]}) "
			
 
				+                        f"at position {ctx.cur} is not mapped.")
			
 
				+                ctx.dest_ls.append(cur_char)
			
 
				+                ctx.cur += 1
			
 
				+            else:
			
 
				+                delattr(ctx, "match")
			
 
				+            delattr(ctx, "cur_flags")
			
 
				 
			
 
				-    delattr(ctx, "cur")
			
 
				+        delattr(ctx, "cur")
			
 
				 
			
 
				-    # This hook may take care of the assembly and cause the function to return
			
 
				-    # its own return value.
			
 
				-    hret = _run_hook("pre_assembly", ctx, langsec_hooks)
			
 
				-    if hret is not None:
			
 
				-        return hret, ctx.warnings
			
 
				+        # This hook may take care of the assembly and cause the function to
			
 
				+        # return its own return value.
			
 
				+        hret = _run_hook("pre_assembly", ctx)
			
 
				+        if hret is not None:
			
 
				+            return hret, ctx.warnings
			
 
				 
			
 
				-    logger.debug(f"Output list: {ctx.dest_ls}")
			
 
				-    ctx.dest = "".join(ctx.dest_ls)
			
 
				+        logger.debug(f"Output list: {ctx.dest_ls}")
			
 
				+        ctx.dest = "".join(ctx.dest_ls)
			
 
				 
			
 
				-    # This hook may reassign the output string and/or cause the function to
			
 
				-    # return it immediately.
			
 
				-    hret = _run_hook("post_assembly", ctx, langsec_hooks)
			
 
				-    if hret is not None:
			
 
				-        return hret, ctx.warnings
			
 
				+        # This hook may reassign the output string and/or cause the function to
			
 
				+        # return it immediately.
			
 
				+        hret = _run_hook("post_assembly", ctx)
			
 
				+        if hret is not None:
			
 
				+            return hret, ctx.warnings
			
 
				 
			
 
				-    # Strip multiple spaces and leading/trailing whitespace.
			
 
				-    ctx.dest = MULTI_WS_RE.sub(r"\1", ctx.dest.strip())
			
 
				+        # Strip multiple spaces and leading/trailing whitespace.
			
 
				+        ctx.dest = MULTI_WS_RE.sub(r"\1", ctx.dest.strip())
			
 
				 
			
 
				-    return ctx.dest, ctx.warnings
			
 
				+        return ctx.dest, ctx.warnings
			
 
				 
			
 
				 
			
 
				-def _normalize_src(ctx):
			
 
				-    for nk, nv in ctx.langsec.get("normalize", {}).items():
			
 
				+def _normalize_src(ctx, norm_rules):
			
 
				+    for nk, nv in norm_rules.items():
			
 
				         ctx._src = ctx.src.replace(nk, nv)
			
 
				     logger.debug(f"Normalized source: {ctx.src}")
			
 
				 
			
@@ -317,11 +326,13 @@ def _is_eow(cur, ctx, word_boundary):
 
				     ) and (ctx.src[cur] not in word_boundary)
			
 
				 
			
 
				 
			
 
				-def _run_hook(hname, ctx, hooks):
			
 
				+def _run_hook(hname, ctx):
			
 
				     ret = None
			
 
				-    for hook_def in hooks.get(hname, []):
			
 
				-        kwargs = hook_def[1] if len(hook_def) > 1 else {}
			
 
				-        ret = hook_def[0](ctx, **kwargs)
			
 
				+    for hook_def in ctx.hooks.get(hname, []):
			
 
				+        fn = getattr(
			
 
				+                import_module("." + hook_def["module_name"], HOOK_PKG_PATH),
			
 
				+                hook_def["fn_name"])
			
 
				+        ret = fn(ctx, **hook_def["kwargs"])
			
 
				         if ret in (BREAK, CONT):
			
 
				             # This will stop parsing hooks functions and tell the caller to
			
 
				             # break out of the outer loop or skip iteration.
			
--- a/sscli
+++ b/sscli
@@ -0,0 +1,74 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+__doc__ = """ Scriptshifter command line interface. """
			
 
				+
			
 
				+
			
 
				+import click
			
 
				+
			
 
				+from glob import glob
			
 
				+from os import path
			
 
				+
			
 
				+from scriptshifter import DB_PATH
			
 
				+from scriptshifter.tables import init_db as _init_db
			
 
				+from tests import test_sample
			
 
				+
			
 
				+
			
 
				+@click.group()
			
 
				+def cli():
			
 
				+    """ Scriptshifter CLI. """
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+@cli.group(name="admin")
			
 
				+def admin_grp():
			
 
				+    """ Admin operations. """
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+@admin_grp.command()
			
 
				+def init_db():
			
 
				+    """ Initialize SS database. """
			
 
				+    _init_db()
			
 
				+
			
 
				+    click.echo(f"Initialized Scriptshifter DB in {DB_PATH}")
			
 
				+
			
 
				+
			
 
				+@cli.group(name="test")
			
 
				+def test_grp():
			
 
				+    """ Test operations. """
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+@test_grp.command()
			
 
				+def list_samples():
			
 
				+    """ List string sample sets that can be tested. """
			
 
				+    path_ptn = path.join(
			
 
				+            path.dirname(path.realpath(__file__)),
			
 
				+            "tests", "data", "script_samples", "*.csv")
			
 
				+
			
 
				+    click.echo("Sample string sets available for batch testing:")
			
 
				+    for fn in glob(path_ptn):
			
 
				+        click.echo(path.splitext(path.basename(fn))[0])
			
 
				+
			
 
				+
			
 
				+@test_grp.command()
			
 
				+@click.argument("lang")
			
 
				+def samples(lang):
			
 
				+    """
			
 
				+    Test sample strings for language LANG.
			
 
				+
			
 
				+    LANG must match one of the names obtained with `test list-samples` command.
			
 
				+
			
 
				+    The command will generate a test report file.
			
 
				+    """
			
 
				+    return test_sample(lang)
			
 
				+
			
 
				+
			
 
				+@cli.group(name="trans")
			
 
				+def trans_grp():
			
 
				+    """ Transliteration and transcription operations. """
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    cli()
			
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -20,7 +20,8 @@ def reload_tables():
 
				     reload(scriptshifter.tables)  # Reload new config dir.
			
 
				     from scriptshifter import tables
			
 
				     tables.list_tables.cache_clear()
			
 
				-    tables.load_table.cache_clear()
			
 
				+    tables.get_language.cache_clear()
			
 
				+    tables.get_lang_map.cache_clear()
			
 
				 
			
 
				     return tables
			
 
				 
			
--- a/tests/data/script_samples/arabic.csv
+++ b/tests/data/script_samples/arabic.csv
@@ -0,0 +1,3 @@
 
				+arabic,نظام الحكم في عمان : من إمامة الإنتخاب الى السلطنة الوراثية,Niẓām al-ḥukm fī ʻUmān : min imāmat al-intikhāb ilá al-salṭanah al-wirāthīyah,,
			
 
				+arabic,ندوة علاقات مصر بدول حوض النيل في ظل رئاسة مصر للاتحاد الإفريقي,Nadwat ʻAlāqāt Miṣr bi-Duwal Ḥawḍ al-Nīl fī ẓill Riʼāsat Miṣr lil-Ittiḥād al-Ifrīqī,,
			
 
				+arabic,تهذيب البيان والجمع في الفرق بين التكليف والوضع,Tahdhīb al-bayān wa-al-jamʻ fī al-farq bayna al-taklīf wa-al-waḍʻ,,
			
--- a/tests/data/script_samples/arabic2.csv
+++ b/tests/data/script_samples/arabic2.csv
@@ -0,0 +1,94 @@
 
				+arabic,قضايا فكرية و سياسية باقلام كردية عراقية ,Qaḍāyā fikrīyah wa siyāsīyah bi-aqlām Kurdīyah ʻIrāqīyah,,
			
 
				+arabic,‏الأستاذ الدكتور عماد الجواهري؛ مراجعة و تقديم الأستاذ الدكتور عبد الفتاح علي البوتاني,al-Ustādh al-Duktūr ʻImād al-Jawāhirī; murājaʻat wa taqdīm al-Ustādh al-Duktūr ʻAbd al-Fattāḥ ʻAlī al-Būtānī,,
			
 
				+arabic,العلاقة الشيعية - الكوردية ومستقبلها,al-ʻAlāqah al-Shīʻīyah - al-Kūrdīyah wa-mustaqbaluhā,,
			
 
				+arabic,مركز دراسات رووداو,Markaz Dirāsāt Rūwūdāw,,
			
 
				+arabic,ماذا يخبئ الغربال في السياسة العراقية,Mādhā yukhabbiʼ al-ghurbāl fī al-siyāsah al-ʻIrāqīyah,,
			
 
				+arabic,الحزب الشيوعي العراقي .. المكونات السياسية .. الحكومة,al-Ḥizb al-Shuyūʻī al-ʻIrāqī .. al-mukawwināt al-siyāsīyah .. al-ḥukūmah,,
			
 
				+arabic,الدولة الأموية في الشام,al-Dawlah al-Umawīyah fī al-Shām,,
			
 
				+arabic,تأليف أنيس زكريا النصولي,taʼlīf Anīs Zakarīyā al-Nuṣūlī.,,
			
 
				+arabic,الدين وسياسة الدولة في بلاد الرافدين في ضوء النصوص المسمارية، (٢٨٠٠ ق.م-٥٣٩ ق.م)  ,"al-Dīn wa-siyāsat al-dawlah fī bilād al-Rāfidayn fī ḍawʼ al-nuṣūṣ al-mismārīyah, (2800 Q.M-539 Q.M)",,
			
 
				+arabic,المدن والموانيء التجارية في شرق الجزيرة العربية منذ بداية الالف الثالث ق.م حتى نهاية الالف الاول ق.م ,al-Mudun wa-al-mawānīʼ al-tijārīyah fī sharq al-jazīrah al-ʻArabīyah mundhu bidāyat al-alf al-thālith Q.M ḥattá nihāyat al-alf al-awwal Q.M,,
			
 
				+arabic,أمير الإنسانية وقائد الدبلماسية  ,Amīr al-insānīyah wa-qāʼid al-diblumāsīyah,,
			
 
				+arabic,النقد الادبي واللغوي المعاصر ,al-Naqd al-adabī wa-al-lughawī al-muʻāṣir,,
			
 
				+arabic,جدلية الاصالة والتجديد : المؤتمر النقدي الرابع والعشرون,Jadalīyat al-aṣālah wa-al-tajdīd : al-muʼtamar al-naqdī al-rābiʻ wa-al-ʻishrūn,,
			
 
				+arabic,أماني سراج عبدالوهاب أبوزيد,Amānī Sirāj ʻAbd al-Wahhāb Abū Zayd,,
			
 
				+arabic,المدن والموانيء التجارية في شرق الجزيرة العربية منذ بداية الالف الثالث ق.م حتى نهاية الالف الاول ق.م,al-Mudun wa-al-mawānīʼ al-tijārīyah fī sharq al-Jazīrah al-ʻArabīyah mundhu bidāyat al-alf al-thālith Q.M ḥattá nihāyat al-alf al-awwal Q.M,,
			
 
				+arabic,محمد صوضان,Muḥammad Ṣawḍān,,
			
 
				+arabic,كتاب سفينة السعادة لاهل الضعف والنجادة في مديح النبي، المعروفة، بالعشرينيات ‏ ," Kitāb Safīnat al-saʻādah li-ahl al-ḍaʻf wa-al-najādah fī madīḥ al-Nabī, al-maʻrūfah, bi-al-ʻIshrīniyāt",,
			
 
				+arabic,من الشريعة الموروثة إلى الإنسان الخليفة ,Min al-sharīʻah al-mawrūthah ilá al-insān al-khalīfah,,
			
 
				+arabic,إعداد محمد هشام بوعتور, iʻdād Muḥammad Hishām Bū ʻAttūr,,
			
 
				+arabic,موسوعة الحكايات الخرافية الفلسطينية ,Mawsūʻat al-ḥikāyāt al-khurāfīyah al-Filasṭīnīyah,,
			
 
				+arabic,مؤسسة تامر للتعليم المجتمعي,Muʼassasat Tāmir lil-Taʻlīm al-Mujtamaʻī,,
			
 
				+arabic,نصوص ودراسة في ‌الحكاية الشعبية الفلسطينية, nuṣūṣ wa-dirāsah fi al-ḥikāyah al-shaʻbīyah al-Filasṭīnīyah,,
			
 
				+arabic,تأليف إبراهيم مهوي و شريف كناعنه ,taʼlīf Ibrāhīm Muhawwī wa-Sharīf Kanāʻinah,,
			
 
				+arabic,التراث الفلسطيني بين الطمس والاحياء, al-Turāth al-Filasṭīnī bayna al-ṭams wa-al-iḥyāʼ,,
			
 
				+arabic,أشرف على تحريرها منعم حداد,ashrafa ʻalá taḥrīrihā Munʻim Ḥaddād,,
			
 
				+arabic,من تراثنا الشعبي في السهل الساحلي الفلسطيني ,Min turāthinā al-shaʻbī fī al-sahl al-sāḥilī al-Filastīnī,,
			
 
				+arabic,بقلم حسن محمد عوض,bi-qalam Ḥasan Muḥammad ʻAwaḍ,,
			
 
				+arabic,تاريخ ما لم يذكره التاريخ,Tārīkh mā lam yadhkurhu al-tārīkh,,
			
 
				+arabic,دراسة ميدانية فى التراث الشعبى الفلسطينى,dirāsah maydānīyah fī al-turāth al-shaʻbī al-Filasṭīnī ,,
			
 
				+arabic,بيت الفلاح الفلسطيني, Bayt al-falāḥ al-Filasṭīnī,,
			
 
				+arabic,معان ثقافية وعادات وتقاليد اجتماعية، اثاث وفراش وادوات,"maʻānin thaqāfīyah wa-ʻādāt wa-taqālīd ijtimāʻīyah, athāth wa-firāsh wa-adawāt",,
			
 
				+arabic,الحزازير والألعاب الشعبية الفلسطينية,al-Ḥazāzīr wa-al-alʻāb al-shaʻbīyah al-Filasṭīnīyah,,
			
 
				+arabic,المرأة في المثل الشعبي في الأردن وفلسطين,al-Marʼah fī al-mathal al-shaʻbī fī al-Urdun wa-Filasṭīn,,
			
 
				+arabic,الأحاجي والالغاز الادبية ,al-Aḥājī wa-al-alghāz al-adabīyah,,
			
 
				+arabic,فصول الحياة في قريتي,fuṣūl al-ḥayāh fī qaryatī,,
			
 
				+arabic,قرية الدمينة الشرقية بين الماضي والحاضر,Qaryat al-Dumaynah al-Sharqīyah bayna al-māḍī wa-al-ḥāḍir,,
			
 
				+arabic,الألعاب الشعبية في الجزيرة السورية,al-Alʻāb al-shaʻbīyah fī al-Jazīrah al-Sūrīyah,,
			
 
				+arabic,وزارة الثقافة، منشورات الهيئه العامة السورية للكتاب,"Wizārat al-Thaqāfah, Manshūrāt al-Hayʼah al-ʻĀmmah al-Sūrīyah lil-Kitāb",,
			
 
				+arabic,طرائف الأمس غرائب اليوم,Ṭarāʼif al-ams gharāʼib al-Yawm,,
			
 
				+arabic,صور من حياة النبك وجبل القلمون في أواسط القرن التاسع عشر,ṣuwar min ḥayāt al-Nabk wa-Jabal al-Qalamūn fī awāsiṭ al-qarn al-tāsiʻ ʻashar,,
			
 
				+arabic,ولدت مرتين,Wulidtu marratayn,,
			
 
				+arabic,من حكايا الدمع في سوريا,min Ḥakāyā al-damʻ fī Sūriyā,,
			
 
				+arabic,العين والماء والفخار في التراث الساحلي الريفي,al-ʻAyn wa-al-māʼ wa-al-fukhkhār fī al-turāth al-sāḥilī al-rīfī,,
			
 
				+arabic,المواسم التقليدية بمنطقة الأبيض سيدي الشيخ، الوعدات,"al-Mawāsim al-taqlīdīyah bi-minṭaqat al-Abyaḍ Sīdī al-Shaykh, al-Waʻdāt",,
			
 
				+arabic,فضاءات تلقي الادب الشعبي,Faḍāʼāt talaqqī al-adab al-shaʻbī,,
			
 
				+arabic,المجتمع الجزائري وفعالياته في العهد العثماني,al-Mujtamaʻ al-Jazāʼirī wa-faʻʻālīyātuhu fī al-ʻahd al-ʻUthmānī,,
			
 
				+arabic,بدو الطوارق بين الثبات والتغير ,Badw al-Ṭawāriq bayna al-thabāt wa-al-taghayyur,,
			
 
				+arabic,النظم الإجتماعية والتغيرات المرافقة للمد العربي,al-nuẓum al-ijtimāʻīyah wa-al-taghayyurāt al-murāfiqah lil-madd al-ʻArabī,,
			
 
				+arabic,لماذا يصحو مارد الهضبة ويغفو مارد السهل,Li-mādhā yaṣʹḥū mārid al-haḍabah wa-yaghfū mārid al-sahl,,
			
 
				+arabic,رؤى الحداثة وآفاق التحولات في الخطاب الأدبي الأردني الحداثي,ruʼá al-ḥadāthah wa-āfāq al-taḥawwulāt fī al-khiṭāb al-Adabī al-Urdunī al-ḥadāthī,,
			
 
				+arabic,الحقيبة الملكية على الطائر الميمون ‏ ,al-Ḥaqībah al-malakīyah ʻalá al-ṭāʼir al-maymūn,,
			
 
				+arabic,عيسى الناعوري وجهوده في مجال الدراسات الادبية والنقدية,ʻĪsá al-Nāʻūrī wa-juhūduh fī majāl al-dirāsāt al-adabīyah wa-al-naqdīyah,,
			
 
				+arabic,أقحوان على ضفاف النهر ,Uqḥuwān ʻalá ḍifāf al-nahr,,
			
 
				+arabic,صورة المرأة في... السرد النسوي الأردني,Ṣūrat al-marʼah fī ... al-sard al-niswī al-Urdunī,,
			
 
				+arabic,آراء ونصوص في تجربته الادبية,Ārāʼ wa-nuṣūṣ fī tajribatih al-adabīyah,,
			
 
				+arabic,مدخل الى أدبنا المعاصر,Madkhal ilá adabinā al-muʻāṣir,,
			
 
				+arabic,صاحب المئة كتاب والستين عاما في خدمة التربية والتعليم,ṣāḥib al-miʼat kitāb wa-al-sittīn ʻāman fī khidmat al-tarbiyah wa-al-taʻlīm,,
			
 
				+arabic,خمسة رواد يحاورون العصر,khamsat rūwād yuḥāwirūn al-ʻaṣr,,
			
 
				+arabic,حوار مع رواد النهضة العربية,Ḥiwār maʻa rūwād al-nahḍah al-ʻArabīyah,,
			
 
				+arabic,أعلام الحركة الادبية في الرقة,Aʻlām al-ḥarakah al-adabīyah fī al-Raqqah,,
			
 
				+arabic,دراسة تحليلية في أدب الأطفال لدى الكرد في سوريا وأبرز نماذجه المدونة,dirāsah taḥlīlīyah fī adab al-aṭfāl ladá al-Kurd fī Sūriyā wa-abraz namādhijihi al-mudawwanah,,
			
 
				+arabic,دراسات ومقالات حول حياة الكتاب والكتاب, dirāsāt wa-maqālāt ḥawla ḥayāt al-kuttāb wa-al-kitāb,,
			
 
				+arabic,القصص القرآني :  إيحاؤه ونفحاته ,al-Qaṣaṣ al-Qurʼānī : īḥāʼuhu wa-nafaḥātuh ,,
			
 
				+arabic,للسائلين عن، أخلاق وطبائع بني إسرائيل في قصة يوسف عليه السلام,"Lil-sāʼilīn ʻan, Akhlāq wa-ṭabāʼiʻ Banī Isrāʼīl fī qiṣṣat Yūsuf ʻalayhi al-Salām",,
			
 
				+arabic,إبراهيم الدسوقي عبد الرحمن,Ibrāhīm al-Dasūqī ʻAbd al-Raḥmān,,
			
 
				+arabic,لا تكن كابني آدم,Lā takun ka-ibnay Ādam ,,
			
 
				+arabic,لا قاتلا ولا مقتولا,lā qātilan wa-lā maqtūlan,,
			
 
				+arabic,الجانب الفني في القصة القرآنية,al-jānib al-fannī fī al-qiṣṣah al-Qurʼānīyah,,
			
 
				+arabic,منهجها، وأسس بنائها,"manhajuhā, wa-usus bināʼihā ",,
			
 
				+arabic,المبادىء التربوية والأسس النفسية في القصص القرآني,al-Mabādiʼ al-tarbawīyah wa-al-usus al-nafsīyah fī al-qaṣaṣ al-Qurʼānī ,,
			
 
				+arabic,الابتلاءات الشديدة عند مخالفة الشريعة,al-Ibtilāʼāt al-shadīdah ʻinda mukhālafat al-Sharīʻah,,
			
 
				+arabic,للداعية الإسلامي الشيخ محمد ياسين أبو يحيى,lil-Dāʻiyah al-Islāmī al-Shaykh Muḥammad Yāsīn Abū Yaḥyá,,
			
 
				+arabic,روضة المشتاقين في فضائل الأنبياء والمرسلين وشيء من أخبارهم,Rawḍat al-mushtāqīn fī faḍāʼil al-anbiyāʼ wa-al-mursalīn wa-shayʼ min akhbārihim,,
			
 
				+arabic,يحيى خذ الكتاب بقوة,Yaḥyá khudh al-kitāb bi-qūwah,,
			
 
				+arabic,خصائص التراكيب ودلالاتها في القصص القرآني,Khaṣāʼiṣ al-tarākīb wa-dalālātuhā fī al-qaṣaṣ al-Qurʼānī,,
			
 
				+arabic,الخطيئة والصراع,al-khaṭīʼah wa-al-ṣirāʻ,,
			
 
				+arabic,اللاموضوعية عند المفسرين :‏,al-Lāmawḍūʻīyah ʻinda al-mufassirīn ,,
			
 
				+arabic,القصص القرآني بين الآباء والابناء :‏ ,al-Qaṣaṣ al-Qurʼānī bayna al-ābāʼ wa-al-abnāʼ ,,
			
 
				+arabic,الاتساع النصي في القصص القرآني بين الاستباق والاسترجاع ‏ ,al-Ittisāʻ al-naṣṣī fī al-qaṣaṣ al-Qurʼānī bayna al-istibāq wa-al-istirjāʻ,,
			
 
				+arabic,فأزلهما الشيطان عنها فأخرجهما مما كانا فيه,fa-azallahumā al-Shayṭān ʻanhā fa-akhrajahumā mimmā kānā fīhi,,
			
 
				+arabic,آثار المشتق البليغ من قصة يوسف الصديق ,Āthār al-mushtaqq al-balīgh min qiṣṣat Yūsuf al-Ṣiddīq,,
			
 
				+arabic,الجامع الصحيح في القصص النبوي,al-Jāmiʻ al-ṣaḥīḥ fī al-qaṣaṣ al-Nabawī,,
			
 
				+arabic,يطبع لاول مرة محققا عا نسخة الحافظ الذهبي التي كتبها بخطة,Yuṭbaʻu li-awwal marrah muḥaqqiqan ʻan nuskhah al-Ḥāfiẓ al-Dhahabī allatī katabahā bi-khuṭṭat.,,
			
 
				+arabic,,Yuṭbaʻu li-awwal marrah ʻan nuskhah Nafīsah manqūlah bi-khaṭṭ al-muʼallif bi-khaṭṭ al-muʼallif,,
			
 
				+arabic,العربية ولهجاتها ,al-ʻArabīyah wa-lahajātuhā,,
			
 
				+arabic,اللغة المهرية المعاصرة بين عربيتين,al-Lughah al-Mahrīyah al-muʻāṣirah bayna ʻArabīyatayn,,
			
 
				+arabic,نحو عربية ميسرة‏, Naḥwa ʻArabīyah muyassarah,,
			
 
				+arabic,لغات القبائل في كتب إعراب القرآن ومعانيه,Lughāt al-qabāʼil fī kutub iʻrāb al-Qurʼān wa-maʻānīh,,
			
 
				+arabic,الأدب الجاهلي بين لهجات القبائل وللغة الموحدة,al-adab al-Jāhilī bayna Lahajāt al-qabāʼil wa-al-lughah al-muwaḥḥadah,,
			
 
				+arabic,التحليل العام للغة العوام,al-Tahḷīl al-ʻāmm li-lughat al-ʻawāmm,,
			
 
				+arabic,تاريخ الدعوة إلى العامية وآثارها في مصر  ‏ ,Tārīkh al-Daʻwah ilá al-ʻāmmīyah wa-āthāruhā fī Miṣr,,
			
 
				+arabic,الفصيح الذي حفظته العامية العراقية بين الدراسة والتطبيق,al-faṣīḥ alladhī ḥafiẓatʹhu al-ʻāmmīyah al-ʻIrāqīyah bayna al-dirāsah wa-al-taṭbīq,,
			
 
				+arabic,ويلي ذلك معجم بألفاظ اللهجة الشائعة في العراق,wa-yalī dhālika Muʻjam bi-alfāẓ al-lahjah al-shāʼiʻah fī al-ʻIrāq,,
			
 
				+arabic,كلمات فارسية مستعملة في عامية الموصل وفي انحاء العراق,Kalimāt Fārisīyah mustaʻmalah fī ʻāmmīyat al-Mūṣil wa-fī anḥāʼ al-ʻIrāq,,
			
--- a/tests/test01_cfg.py
+++ b/tests/test01_cfg.py
@@ -113,7 +113,7 @@ class TestHooks(TestCase):
 
				                 tbl["script_to_roman"]["hooks"],
			
 
				                 {
			
 
				                     "begin_input_token": [
			
 
				-                        (scriptshifter.hooks.test.rotate, {"n": -3})
			
 
				+                        ("test", scriptshifter.hooks.test.rotate, {"n": -3})
			
 
				                     ]
			
 
				                 })