Browse Source

Implement double caps; sample Belarusian config.

scossu 1 year ago
parent
commit
76f9727776
3 changed files with 36 additions and 8 deletions
  1. 3 0
      scriptshifter/tables/data/belarusian.yml
  2. 13 5
      scriptshifter/trans.py
  3. 20 3
      tests/test03_capitalization.py

+ 3 - 0
scriptshifter/tables/data/belarusian.yml

@@ -44,6 +44,9 @@ roman_to_script:
     "\u0027": "\u044C"
 
 script_to_roman:
+  double_cap:
+    - "i\uFE20o\uFE21"
+    - "z\uFE20h\uFE21"
   map:
     "\u0401": "I\uFE20O\uFE21"
     "\u0406": "I"

+ 13 - 5
scriptshifter/trans.py

@@ -9,8 +9,8 @@ from scriptshifter.tables import WORD_BOUNDARY, load_table
 MULTI_WS_RE = re.compile(r"\s{2,}")
 
 # Cursor bitwise flags.
-CUR_BOW = 1
-CUR_EOW = 2
+CUR_BOW = 1 << 0
+CUR_EOW = 1 << 1
 
 logger = logging.getLogger(__name__)
 
@@ -89,6 +89,7 @@ def transliterate(src, lang, r2s=False, capitalize=False):
     word_boundary = langsec.get("word_boundary", WORD_BOUNDARY)
     while ctx.cur < len(src):
         # Reset cursor position flags.
+        # Carry over extended "beginning of word" flag.
         ctx.cur_flags = 0
         cur_char = src[ctx.cur]
 
@@ -187,7 +188,6 @@ def transliterate(src, lang, r2s=False, capitalize=False):
 
                 # A match is found. Stop scanning tokens, append result, and
                 # proceed scanning the source.
-                tk = ctx.dest_tk
                 # Capitalization.
                 if (
                     (capitalize == "first" and ctx.cur == 0)
@@ -195,8 +195,16 @@ def transliterate(src, lang, r2s=False, capitalize=False):
                     (capitalize == "all" and ctx.cur_flags & CUR_BOW)
                 ):
                     logger.info("Capitalizing token.")
-                    tk = tk.capitalize()
-                ctx.dest_ls.append(tk)
+                    double_cap = False
+                    for dcap_rule in ctx.langsec.get("double_cap", []):
+                        if ctx.dest_tk == dcap_rule:
+                            ctx.dest_tk = ctx.dest_tk.upper()
+                            double_cap = True
+                            break
+                    if not double_cap:
+                        ctx.dest_tk = ctx.dest_tk.capitalize()
+
+                ctx.dest_ls.append(ctx.dest_tk)
                 ctx.cur += step
                 break
 

+ 20 - 3
tests/test03_capitalization.py

@@ -1,6 +1,7 @@
 from os import environ
 from unittest import TestCase
 
+from scriptshifter.trans import transliterate
 from tests import TEST_DATA_DIR, reload_tables
 
 
@@ -11,10 +12,26 @@ class TestCapitalization(TestCase):
 
     def setUp(self):
         environ["TXL_CONFIG_TABLE_DIR"] = TEST_DATA_DIR
-        reload_tables()
+        self.tables = reload_tables()
 
     def test_cap(self):
-        pass
+        tbl = "cap_inherited"
+        in_str = "зг іо"
+        tx = transliterate(in_str, tbl)
+        tx_cap = transliterate(in_str, tbl, capitalize="first")
+        tx_all = transliterate(in_str, tbl, capitalize="all")
+
+        self.assertEqual(tx, "zh io")
+        self.assertEqual(tx_cap, "Zh io")
+        self.assertEqual(tx_all, "Zh Io")
 
     def test_cap_ligatures(self):
-        pass
+        tbl = "cap_inherited"
+        in_str = "жзг ёіо зг іо"
+        tx = transliterate(in_str, tbl)
+        tx_cap = transliterate(in_str, tbl, capitalize="first")
+        tx_all = transliterate(in_str, tbl, capitalize="all")
+
+        self.assertEqual(tx, "z︠h︡zh i︠o︡io zh io")
+        self.assertEqual(tx_cap, "Z︠H︡zh i︠o︡io zh io")
+        self.assertEqual(tx_all, "Z︠H︡zh I︠o︡io Zh Io")