Browse Source

Merge branch 'decompose' into test

scossu 2 months ago
parent
commit
0543e7cf55

+ 0 - 102
scriptshifter/hooks/korean/data.yml

@@ -467,57 +467,6 @@ fkr047:
     "f8": "l"
     "f0": ""
 
-fkr050:
-  "!": "SB01KQ"
-  "\"": "SB02KQ"
-  "#": "SB03KQ"
-  "$": "SB04KQ"
-  "%": "SB05KQ"
-  "&": "SB06KQ"
-  "'": "SB07KQ"
-  "(": "SB08KQ"
-  ")": "SB09KQ"
-  "*": "SB10KQ"
-  "+": "SB11KQ"
-  ",": "SB12KQ"
-  "-": "SB13KQ"
-  ".": "SB14KQ"
-  "/": "SB15KQ"
-  ":": "SB16KQ"
-  ";": "SB17KQ"
-  "<": "SB18KQ"
-  "=": "SB19KQ"
-  ">": "SB20KQ"
-  "?": "SB21KQ"
-  "・": "SB22KQ"
-  "ǂ": "SB23KQ"
-  "「": "SB24KQ"
-  "」": "SB25KQ"
-  "『": "SB26KQ"
-  "』": "SB27KQ"
-  "@": "SB28KQ"
-  "[": "SB29KQ"
-  "\\": "SB30KQ"
-  "]": "SB31KQ"
-  "^": "SB32KQ"
-  "_": "SB33KQ"
-  "`": "SB34KQ"
-  "{": "SB35KQ"
-  "|": "SB36KQ"
-  "}": "SB37KQ"
-  "~": "SB38KQ"
-  "‡": "SB39KQ"
-  "‰": "SB40KQ"
-  "‘": "SB41KQ"
-  "’": "SB42KQ"
-  "“": "SB43KQ"
-  "”": "SB44KQ"
-  "–": "SB45KQ"
-  "—": "SB46KQ"
-  "˜": "SB47KQ"
-  "©": "SB48KQ"
-  "·": "SB49KQ"
-
 fkr052:
   rule_nu:
     " 무의식역 ": " 무의식🜹역 "
@@ -2764,57 +2713,6 @@ fkr065:
   " hanchaŏ ": " Hanchaŏ "
   " ch'ŏlchong ": " Ch'ŏlchong "
 
-fkr066:
-  "SB01KQ": "!"
-  "SB02KQ": "\""
-  "SB03KQ": "#"
-  "SB04KQ": "$"
-  "SB05KQ": "%"
-  "SB06KQ": "&"
-  "SB07KQ": "'"
-  "SB08KQ": "("
-  "SB09KQ": ")"
-  "SB10KQ": "*"
-  "SB11KQ": "+"
-  "SB12KQ": ","
-  "SB13KQ": "-"
-  "SB14KQ": "."
-  "SB15KQ": "/"
-  "SB16KQ": ":"
-  "SB17KQ": ";"
-  "SB18KQ": "<"
-  "SB19KQ": "="
-  "SB20KQ": ">"
-  "SB21KQ": "?"
-  "SB22KQ": ","
-  "SB23KQ": "ǂ"
-  "SB24KQ": "「"
-  "SB25KQ": "」"
-  "SB26KQ": "『"
-  "SB27KQ": "』"
-  "SB28KQ": "@"
-  "SB29KQ": "["
-  "SB30KQ": "\\"
-  "SB31KQ": "]"
-  "SB32KQ": "^"
-  "SB33KQ": "_"
-  "SB34KQ": "`"
-  "SB35KQ": "{"
-  "SB36KQ": "|"
-  "SB37KQ": "}"
-  "SB38KQ": "~"
-  "SB39KQ": "‡"
-  "SB40KQ": "‰"
-  "SB41KQ": "‘"
-  "SB42KQ": "’"
-  "SB43KQ": "“"
-  "SB44KQ": "”"
-  "SB45KQ": "–"
-  "SB46KQ": "—"
-  "SB47KQ": "˜"
-  "SB48KQ": "©"
-  "SB49KQ": ","
-
 fkr069:
   "학여울역": "학여울력"
   "값어치": "가버치"

+ 53 - 46
scriptshifter/tables/data/arabic.yml

@@ -4,7 +4,8 @@
 ---
 general:
   name: Arabic
-  description: Arabic R2S using a conversion table and S2R using a 3rd party library.
+  description: >
+    Arabic R2S using a conversion table and S2R using a 3rd party library.
   case_sensitive: false
 
   parents:
@@ -17,7 +18,8 @@ roman_to_script:
     # Original table by David Bucknum
     # Last updated 25 January 2019
     # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
-    # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin Scripts Conceptually"
+    # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
+    # Scripts Conceptually"
 
     # Punctuation marks:
     "*": "\u066D"
@@ -65,19 +67,19 @@ roman_to_script:
 
     # "sh[dot below] as in "Ishaq"
 
-    "%sh\u0323%": "\u0633\u062D"
+    "sh\u0323": "\u0633\u062D"
 
     # "s[prime]h" combos
 
-    "%s\u02B9h%": "\u0633\u0647"
+    "s\u02B9h": "\u0633\u0647"
 
     # "th[dot below]"
 
-    "%th\u0323%": "\u062A\u062D"
+    "th\u0323": "\u062A\u062D"
 
-    # dh[dot under] 
+    # dh[dot under]
 
-    "%dh\u0323%": "\u062F\u062D"
+    "dh\u0323": "\u062F\u062D"
 
     # La-hu
 
@@ -95,7 +97,9 @@ roman_to_script:
     "mi\u02BEat": "\u0645\u0627\u0626\u0629"
     "mi\u02BCat": "\u0645\u0627\u0626\u0629"
 
-    # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu will technically use \u06F0-06F9. This needs further discussion with PSD as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
+    # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu
+    # will technically use \u06F0-06F9. This needs further discussion with PSD
+    # as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
 
     # Edition statements with Latin number
     "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1"
@@ -131,15 +135,15 @@ roman_to_script:
     "ka-": "\u0643"
 
     # Vowels and vowel/consonant combinations
-    "%ah": "\u0629"
-    "%at": "\u0629"
+    "ah%": "\u0629"
+    "at%": "\u0629"
 
-    #tanwin
-    "%an": "\u0627"
+    # tanwin
+    "an%": "\u0627"
 
     # ayn-alif combo
-    "%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621"
-    "%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621"
+    "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
+    "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
 
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
@@ -152,24 +156,24 @@ roman_to_script:
     "\u02BBU": "\u0639"
     "\u02BBu": "\u0639"
 
-    "\u02BBA%": "\u0639"
-    #"\u02BBa%": "\u0639"
+    "%\u02BBA": "\u0639"
+    # "%\u02BBa": "\u0639"
 
     # alif and hamzas for all occasions
 
-    # truncation necessary? It seems to work fine with. 
+    # truncation necessary? It seems to work fine with.
 
-    "%i\u0304\u02BEah": "\u064A\u0626\u0629"
-    "%i\u0304\u02BCah": "\u064A\u0626\u0629"
+    "i\u0304\u02BEah%": "\u064A\u0626\u0629"
+    "i\u0304\u02BCah%": "\u064A\u0626\u0629"
 
-    "%i\u0304\u02BEat": "\u064A\u0626\u0629"
-    "%i\u0304\u02BCat": "\u064A\u0626\u0629"
+    "i\u0304\u02BEat%": "\u064A\u0626\u0629"
+    "i\u0304\u02BCat%": "\u064A\u0626\u0629"
 
-    "%i\u02BEa\u0304": "\u0626\u0627"
-    "%i\u02BCa\u0304": "\u0626\u0627"
+    "i\u02BEa\u0304%": "\u0626\u0627"
+    "i\u02BCa\u0304%": "\u0626\u0627"
 
-    "%i\u02BE": "\u0626"
-    "%i\u02BC": "\u0626"
+    "i\u02BE%": "\u0626"
+    "i\u02BC%": "\u0626"
     "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
     "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"
 
@@ -197,32 +201,34 @@ roman_to_script:
     "a\u0304\u02BEi": "\u0627\u0626"
     "a\u0304\u02BC": "\u0627\u0621"
     "a\u0304\u02BE": "\u0627\u0621"
-    "A\u0304%": "\u0622"
-    "a\u0304%": "\u0622"
+    "%A\u0304": "\u0622"
+    "%a\u0304": "\u0622"
     "A\u0304": "\u0627"
     "a\u0304": "\u0627"
 
-    # These next two lines were intended to convert to alif-ayn when it is at the beginning of a word, definite or indefinine (i.e. al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l" 
-    "A\u02BB%": "\u0623\u0639"
-    "a\u02BB%": "\u0623\u0639"
+    # These next two lines were intended to convert to alif-ayn when it is at
+    # # the beginning of a word, definite or indefinine (i.e.
+    # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
+    "%A\u02BB": "\u0623\u0639"
+    "%a\u02BB": "\u0623\u0639"
     "a\u02BB": "\u0639"
     "A\u0301": "\u0649"
     "a\u0301": "\u0649"
 
     "ayy": "\u064A"
-    "A%": "\u0623"
-    "a%": "\u0627"
+    "%A": "\u0623"
+    "%a": "\u0627"
     "A": "\u0623"
     "a": ""
 
     # I - Capital I at beginning of word is usually alif hamzah-below.
 
-    "%i\u0304": "\u064A"
+    "i\u0304%": "\u064A"
     "i\u0304y": "\u064A"
     "iy": "\u064A"
-    "I\u0304%": "\u0625\u064A"
+    "%I\u0304": "\u0625\u064A"
     "i\u0304": "\u064A"
-    "\u02BBI%": "\u0639"
+    "%\u02BBI": "\u0639"
 
     # "i\u02BB": "\u0625\u0639"
 
@@ -231,8 +237,8 @@ roman_to_script:
     "i\u02BE": "\u0626"
     "i\u02BC": "\u0627\u0626"
 
-    "I%": "\u0625"
-    "i%": "\u0625"
+    "%I": "\u0625"
+    "%i": "\u0625"
     "I": "\u0625"
     "i": ""
 
@@ -240,21 +246,21 @@ roman_to_script:
 
     "u\u0304\u02BE": "\u0624"
     "u\u0304\u02BC": "\u0624"
-    "U\u0304w%": "\u0623\u0648"
-    "u\u0304w%": "\u0623\u0648"
-    "U\u0304%": "\u0623\u0648"
-    "u\u0304%": "\u0623\u0648"
+    "%U\u0304w": "\u0623\u0648"
+    "%u\u0304w": "\u0623\u0648"
+    "%U\u0304": "\u0623\u0648"
+    "%u\u0304": "\u0623\u0648"
     "u\u0304w": "\u0648"
     "u\u0304": "\u0648"
     "u\u02BE": "\u0624"
     "u\u02BC": "\u0624"
 
-    "U%": "\u0623"
-    "u%": "\u0623"
+    "%U": "\u0623"
+    "%u": "\u0623"
     "U": "\u0623"
     "u": ""
 
-    # Consonants, with tashdid added 
+    # Consonants, with tashdid added
 
     "B": "\u0628"
     "bb": "\u0628"
@@ -351,8 +357,8 @@ roman_to_script:
     # hamza - not romanized
     # "\u0621"
     # hamza (alone in final position)
-    "%\u02BE": "\u0621"
-    "%\u02BC": "\u0621"
+    "\u02BE%": "\u0621"
+    "\u02BC%": "\u0621"
 
     # Do not know what, if anything, is needed here:
     # tatweel:
@@ -378,6 +384,7 @@ roman_to_script:
     # alef wasla
     # "\u0671"
 
+
 script_to_roman:
   hooks:
     post_config:

+ 2 - 2
scriptshifter/tables/data/bulgarian.yml

@@ -38,8 +38,8 @@ script_to_roman:
     "\u044C": ""
     "\u042C": ""
     "\u044A": ""
-    "\u042A%": "u\u0306"
-    "\u042A": ""
+    "\u042A%": ""  # Final
+    "\u042A": "u\u0306"
     "\u0413": "G"
     "\u0433": "g"
     "\u0416": "Zh"

+ 17 - 17
scriptshifter/tables/data/divehi_thaana.yml

@@ -30,8 +30,8 @@ roman_to_script:
     "h\u032E": "\u0781\u07B0"
 
     # THAANA LETTER ALIFU FINAL WITH SUKUN (LOW LINE 0332)
-    "%H\u0332": "\u0787\u07B0"
-    "%h\u0332": "\u0787\u07B0"
+    "H\u0332%": "\u0787\u07B0"
+    "h\u0332%": "\u0787\u07B0"
     "H": "\u0780"
     "h": "\u0780"
     "S\u0301": "\u0781"
@@ -70,13 +70,13 @@ roman_to_script:
     "F": "\u078A"
     "ff": "\u0787\u07B0\u078A"
     "f": "\u078A"
-    # THAANA LETTER "D/d" WITH DOT BELOW (0323) 
+    # THAANA LETTER "D/d" WITH DOT BELOW (0323)
     "D\u0323": "\u0791"
     "d\u0323": "\u0791"
     "D": "\u078B"
     "dd": "\u0787\u07B0\u078B"
     "d": "\u078B"
-    # THAANA LETTER "T/t" WITH DOT BELOW (0323) 
+    # THAANA LETTER "T/t" WITH DOT BELOW (0323)
     "T\u0323": "\u0793"
     "t\u0323": "\u0793"
     "T\u0324T": "\u078C\u07B0\u078C"
@@ -169,7 +169,7 @@ roman_to_script:
     "ghgh": "\u0787\u07B0\u07A3"
     "gh": "\u07A3"
 
-    # THAANA EXTENSION FOR ARABIC LETTER QAAFU 
+    # THAANA EXTENSION FOR ARABIC LETTER QAAFU
     "Q": "\u07A4"
     "qq": "\u0787\u07B0\u07A4"
     "q": "\u07A4"
@@ -181,24 +181,24 @@ roman_to_script:
 
     # INITIAL (AND UPPERCASE) VOWELS THAT CONVERT
     # TO ALIF FOLLOWED BY VOWEL (ALIF OMITTED IN ROMANIZATION)
-    "A\u0304%": "\u0787\u07A7"
-    "A%": "\u0787\u07A6"
+    "%A\u0304": "\u0787\u07A7"
+    "%A": "\u0787\u07A6"
     "\u0020a\u0304": "\u0020\u0787\u07A7"
     "\u0020a": "\u0020\u0787\u07A6"
-    "E\u0304%": "\u0787\u07AD"
-    "E%": "\u0787\u07AC"
+    "%E\u0304": "\u0787\u07AD"
+    "%E": "\u0787\u07AC"
     "\u0020e\u0304": "\u0020\u0787\u07AD"
     "\u0020e": "\u0020\u0787\u07AC"
-    "I\u0304%": "\u0787\u07A9"
-    "I%": "\u0787\u07A8"
+    "%I\u0304": "\u0787\u07A9"
+    "%I": "\u0787\u07A8"
     "\u0020i\u0304": "\u0020\u0787\u07A9"
     "\u0020i": "\u0020\u0787\u07A8"
-    "O\u0304%": "\u0787\u07AF"
-    "O%": "\u0787\u07AE"
+    "%O\u0304": "\u0787\u07AF"
+    "%O": "\u0787\u07AE"
     "\u0020o\u0304": "\u0020\u0787\u07AF"
     "\u0020o": "\u0020\u0787\u07AE"
-    "U\u0304%": "\u0787\u07AB"
-    "U%": "\u0787\u07AB"
+    "%U\u0304": "\u0787\u07AB"
+    "%U": "\u0787\u07AB"
     "\u0020u\u0304": "\u0020\u0787\u07AB"
     "\u0020u": "\u0020\u0787\u07AB"
 
@@ -301,7 +301,7 @@ script_to_roman:
     "\u0787\u07B0\u078A": "ff"
     "\u078A": "f"
 
-    # THAANA LETTER "D/d" WITH DOT BELOW (0323) 
+    # THAANA LETTER "D/d" WITH DOT BELOW (0323)
     "\u0787\u07B0\u0791": "d\u0323d\u0323"
     "\u0791": "d\u0323"
 
@@ -386,7 +386,7 @@ script_to_roman:
     "\u0787\u07B0\u07A3": "ghgh"
     "\u07A3": "gh"
 
-    # THAANA EXTENSION FOR ARABIC LETTER QAAFU 
+    # THAANA EXTENSION FOR ARABIC LETTER QAAFU
     "\u0787\u07B0\u07A4": "qq"
     "\u07A4": "q"
 

+ 49 - 49
scriptshifter/tables/data/persian.yml

@@ -34,7 +34,7 @@ roman_to_script:
     "ibn": "\u0628\u0646"
 
     # Parsing "sh[dot below] as in "Ishaq [name]"
-    "%sh\u0323%": "\u0633\u062D"
+    "sh\u0323": "\u0633\u062D"
 
     # Edition statements with Latin number
     "Cha\u0304p-i 1": "\u0686\u0627\u067E 1"
@@ -75,7 +75,7 @@ roman_to_script:
     "# prime ": " ZWNJ"
     "\u02B9a\u0304": "\u200C\u0622"
     "\u02B9a": "\u200C\u0627"
-    "%\u02B9i\u0304": "\u200C\u0627\u0649"
+    "\u02B9i\u0304%": "\u200C\u0627\u0649"
     "\u02B9i\u0304": "\u200C\u0627\u064A"
     "i\u0304\u02B9a\u0304": "\u0649\u200C\u0622"
     "i\u0304\u02B9a": "\u0649\u200C\u0627"
@@ -87,22 +87,22 @@ roman_to_script:
     # Vowel and vowel/consonant combinations
     # and hyphenated suffixes:
     # izafah here
-    "%h-\u02BEi": "\u06C0"
-    "%h-\u02BCi": "\u06C0 "
-    "%-\u02BEi": "\u06C0"
-    "%-\u02BCi": "\u06C0"
+    "h-\u02BEi%": "\u06C0"
+    "h-\u02BCi%": "\u06C0 "
+    "-\u02BEi%": "\u06C0"
+    "-\u02BCi%": "\u06C0"
 
-    "%h-yi": "\u0647\u200C\u0649"
-    "%-yi": "\u0649"
-    "%yi": "\u0649"
-    "%\u02BEi\u0304": "\u0649"
-    "%\u02BCi\u0304": "\u0649"
-    "%i\u0304-i": "\u0649"
-    "%i\u0304": "\u0649"
-    "%ayy-i": "\u0649"
-    "%ay": "\u0649"
-    "%al-i": "\u0644"
-    "%-i": ""
+    "h-yi%": "\u0647\u200C\u0649"
+    "-yi%": "\u0649"
+    "yi%": "\u0649"
+    "\u02BEi\u0304%": "\u0649"
+    "\u02BCi\u0304%": "\u0649"
+    "i\u0304-i%": "\u0649"
+    "i\u0304%": "\u0649"
+    "ayy-i%": "\u0649"
+    "ay%": "\u0649"
+    "al-i%": "\u0644"
+    "-i%": ""
 
     # Hyphenated prefixes:
     "wa-": "\u0648"
@@ -113,18 +113,18 @@ roman_to_script:
     "ka-": "\u0643"
 
     # ayn combo
-    "%\u02BBa\u0304%": "\u0639\u0627"
+    "\u02BBa\u0304": "\u0639\u0627"
     # alif combos
     # [final position]
-    "%a\u0304\u02BE": "\u0627\u0621"
-    "%a\u0304\u02BC": "\u0627\u0621"
-    "%a\u0304\u02BEi\u0304": "\u0627\u0626\u0649"
-    "%a\u0304\u02BCi\u0304": "\u0627\u0626\u0649"
+    "a\u0304\u02BE%": "\u0627\u0621"
+    "a\u0304\u02BC%": "\u0627\u0621"
+    "a\u0304\u02BEi\u0304%": "\u0627\u0626\u0649"
+    "a\u0304\u02BCi\u0304%": "\u0627\u0626\u0649"
     # [initial position]
-    "A\u0304\u02BEi\u0304%": "\u0622\u0626\u064A"
-    "A\u0304\u02BCi\u0304%": "\u0622\u0626\u064A"
-    "a\u0304\u02BEi\u0304%": "\u0622\u0626\u064A"
-    "a\u0304\u02BCi\u0304%": "\u0622\u0626\u064A"
+    "%A\u0304\u02BEi\u0304": "\u0622\u0626\u064A"
+    "%A\u0304\u02BCi\u0304": "\u0622\u0626\u064A"
+    "%a\u0304\u02BEi\u0304": "\u0622\u0626\u064A"
+    "%a\u0304\u02BCi\u0304": "\u0622\u0626\u064A"
     # [medial position]
     "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"
     "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A"
@@ -138,10 +138,10 @@ roman_to_script:
     # A
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
-    "\u02BBA%": "\u0639"
+    "%\u02BBA": "\u0639"
     "\u02BBa": "\u0639"
-    "A\u02BB%": "\u0627\u0639"
-    "a\u02BB%": "\u0627\u0639"
+    "%A\u02BB": "\u0627\u0639"
+    "%a\u02BB": "\u0627\u0639"
     "A\u02BB": "\u0623\u0639"
     "a\u02BB": "\u0639"
     "a\u02BE": "\u0623"
@@ -150,57 +150,57 @@ roman_to_script:
     "\u02BCa": "\u0623"
     "a\u0304\u02BE": "\u0621"
     "a\u0304\u02BC": "\u0621"
-    "A\u0304%": "\u0622"
-    "a\u0304%": "\u0622"
+    "%A\u0304": "\u0622"
+    "%a\u0304": "\u0622"
     # previously an alif:
     "A\u0304": "\u0622"
     "a\u0304": "\u0627"
     "A\u0301": "\u0649"
     "a\u0301": "\u0649"
     "ayy": "\u064A"
-    "A%": "\u0627"
-    "a%": "\u0627"
+    "%A": "\u0627"
+    "%a": "\u0627"
     "A": "\u0627"
     "a": ""
 
     # I
     "\u02BBI\u0304": "\u0639\u064A"
     "\u02BBi\u0304": "\u0639\u064A"
-    "I\u02BB%": "\u0627\u0639"
-    "i\u02BB%": "\u0627\u0639"
+    "%I\u02BB": "\u0627\u0639"
+    "%i\u02BB": "\u0627\u0639"
     "i\u02BB": "\u0639"
-    "\u02BBI%": "\u0639"
-    "I\u02BE%": "\u0627\u0626"
-    "I\u02BC%": "\u0627\u0626"
+    "%\u02BBI": "\u0639"
+    "%I\u02BE": "\u0627\u0626"
+    "%I\u02BC": "\u0627\u0626"
     "i\u02BE": "\u0626"
     "i\u02BC": "\u0626"
     "\u02BEi\u0304": "\u0626\u0649"
     "\u02BCi\u0304": "\u0626\u0649"
     "\u02BEi": "\u0626"
     "\u02BCi": "\u0626"
-    "I\u0304%": "\u0627\u064A"
-    "i\u0304%": "\u0627\u064A"
+    "%I\u0304": "\u0627\u064A"
+    "%i\u0304": "\u0627\u064A"
     "i\u0304y": "\u064A"
     "I\u0304": "\u0627\u0649"
     "i\u0304": "\u064A"
-    "I%": "\u0627"
-    "i%": "\u0627"
+    "%I": "\u0627"
+    "%i": "\u0627"
     "I": "\u0627"
     "i": ""
 
     # U
     "\u02BEu\u0304": "\u0626\u0648"
     "\u02BCu\u0304": "\u0626\u0648"
-    "U\u02BE%": "\u0627\u0624"
-    "U\u02BC%": "\u0627\u0624"
+    "%U\u02BE": "\u0627\u0624"
+    "%U\u02BC": "\u0627\u0624"
     "u\u02BE": "\u0624"
     "u\u02BC": "\u0624"
-    "U\u0304w%": "\u0627\u0628"
+    "%U\u0304w": "\u0627\u0628"
     "u\u0304w": "\u0628"
     "U\u0304": "\u0627\u0648"
     "u\u0304": "\u0648"
-    "U%": "\u0627"
-    "u%": "\u0627"
+    "%U": "\u0627"
+    "%u": "\u0627"
     "U": "\u0627"
     "u": ""
 
@@ -307,5 +307,5 @@ roman_to_script:
     "\u02BB": "\u0639"
 
     # hamza (alone in final position)
-    "%\u02BE": "\u0621"
-    "%\u02BC": "\u0621"
+    "\u02BE%": "\u0621"
+    "\u02BC%": "\u0621"

+ 61 - 61
scriptshifter/tables/data/pushto.yml

@@ -75,7 +75,7 @@ roman_to_script:
     "# prime ": " ZWNJ"
     "\u02B9a\u0304": "\u200C\u0622"
     "\u02B9a": "\u200C\u0627"
-    "%\u02B9i\u0304": "\u200C\u0627\u0649"
+    "\u02B9i\u0304%": "\u200C\u0627\u0649"
     "\u02B9i\u0304": "\u200C\u0627\u064A"
     "i\u0304\u02B9": "\u0649\u200C"
     "a\u0323y\u02B9": "\u06D3\u200C"
@@ -84,25 +84,25 @@ roman_to_script:
     # Vowel and vowel/consonant combinations
     # and hyphenated suffixes:
     # izafah here
-    "%h-\u02BEi": "\u06C0"
-    "%h-\u02BCi": "\u06C0 "
-    "%-\u02BEi": "\u06C0"
-    "%-\u02BCi": "\u06C0"
-
-    "%h-yi": "\u0647\u200C\u0649"
-    "%-yi": "\u0649"
-    "%yi": "\u0649"
-    "%\u02BEi\u0304": "\u0649"
-    "%\u02BCi\u0304": "\u0649"
-    "%i\u0304-i": "\u0649"
-    "%i\u0304": "\u0649"
-
-    "%a\u0323h": "\u06C0"
-    "%ayy-i": "\u0649"
-    "%a\u0304y": "\u0627\u0649"
-    "%a\u0301": "\u0649\u0670"
-    "%al-i": "\u0644"
-    "%-i": ""
+    "h-\u02BEi%": "\u06C0"
+    "h-\u02BCi%": "\u06C0 "
+    "-\u02BEi%": "\u06C0"
+    "-\u02BCi%": "\u06C0"
+
+    "h-yi%": "\u0647\u200C\u0649"
+    "-yi%": "\u0649"
+    "yi%": "\u0649"
+    "\u02BEi\u0304%": "\u0649"
+    "\u02BCi\u0304%": "\u0649"
+    "i\u0304-i%": "\u0649"
+    "i\u0304%": "\u0649"
+
+    "a\u0323h%": "\u06C0"
+    "ayy-i%": "\u0649"
+    "a\u0304y%": "\u0627\u0649"
+    "a\u0301%": "\u0649\u0670"
+    "al-i%": "\u0644"
+    "-i%": ""
 
     # Hyphenated prefixes:
     "wa-": "\u0648"
@@ -113,29 +113,29 @@ roman_to_script:
     "ka-": "\u0643"
 
     # Diphthongs here
-    "Ayy%": "\u0627\u064A"
-    "ayy%": "\u0627\u064A"
-    "%a\u0323y": "\u06D3"
-    "%ay": "\u0649"
+    "%Ayy": "\u0627\u064A"
+    "%ayy": "\u0627\u064A"
+    "a\u0323y%": "\u06D3"
+    "ay%": "\u0649"
     "\u02BBAw": "\u0639\u0648"
     "\u02BBaw": "\u0639\u0648"
     "Aw": "\u0627\u0648"
     "aw": "\u0648"
 
     # ayn combo
-    "%\u02BBa\u0304%": "\u0639\u0627"
+    "\u02BBa\u0304": "\u0639\u0627"
 
     # alif combos
     # [final position]
-    "%a\u0304\u02BE": "\u0627\u0621"
-    "%a\u0304\u02BC": "\u0627\u0621"
-    "%a\u0304\u02BEi\u0304": "\u0627\u0626\u0649"
-    "%a\u0304\u02BCi\u0304": "\u0627\u0626\u0649"
+    "a\u0304\u02BE%": "\u0627\u0621"
+    "a\u0304\u02BC%": "\u0627\u0621"
+    "a\u0304\u02BEi\u0304%": "\u0627\u0626\u0649"
+    "a\u0304\u02BCi\u0304%": "\u0627\u0626\u0649"
     # [initial position]
-    "A\u0304\u02BEi\u0304%": "\u0622\u0626\u064A"
-    "A\u0304\u02BCi\u0304%": "\u0622\u0626\u064A"
-    "a\u0304\u02BEi\u0304%": "\u0622\u0626\u064A"
-    "a\u0304\u02BCi\u0304%": "\u0622\u0626\u064A"
+    "%A\u0304\u02BEi\u0304": "\u0622\u0626\u064A"
+    "%A\u0304\u02BCi\u0304": "\u0622\u0626\u064A"
+    "%a\u0304\u02BEi\u0304": "\u0622\u0626\u064A"
+    "%a\u0304\u02BCi\u0304": "\u0622\u0626\u064A"
 
     # [medial position]
     "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"
@@ -145,7 +145,7 @@ roman_to_script:
     "a\u02BEi\u0304": "\u0626\u064A"
     "a\u02BCi\u0304": "\u0626\u064A"
 
-    #a [macron] hamza followed by e
+    # a [macron] hamza followed by e
     "a\u0304\u02BEe": "\u0627\u0626\u064A"
     "a\u0304\u02BCe": "\u0627\u0626\u064A"
 
@@ -157,7 +157,7 @@ roman_to_script:
     # A
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
-    "\u02BBA%": "\u0639"
+    "%\u02BBA": "\u0639"
     "\u02BBa": "\u0639"
     "A\u02BB": "\u0623\u0639"
     "a\u02BB": "\u0639"
@@ -167,8 +167,8 @@ roman_to_script:
     "\u02BCa": "\u0623"
     "a\u0304\u02BE": "\u0621"
     "a\u0304\u02BC": "\u0621"
-    "A\u0304%": "\u0622"
-    "a\u0304%": "\u0622"
+    "%A\u0304": "\u0622"
+    "%a\u0304": "\u0622"
     "A\u0304": "\u0627"
     "a\u0304": "\u0627"
     "A\u0301": "\u0649"
@@ -178,40 +178,40 @@ roman_to_script:
     # heh hamzah at end
     "a\u0323h": "\u06C0"
 
-    "A%": "\u0627"
-    "a%": "\u0627"
+    "%A": "\u0627"
+    "%a": "\u0627"
     "A": ""
     "a": ""
 
     # E
-    "%e": "\u06D0"
-    "E%": "\u0627\u064A"
-    "e%": "\u0627\u064A"
+    "e%": "\u06D0"
+    "%E": "\u0627\u064A"
+    "%e": "\u0627\u064A"
     "e": "\u06D0"
 
-    # I 
-    "I\u02BB%": "\u0627\u0639"
-    "i\u02BB%": "\u0627\u0639"
+    # I
+    "%I\u02BB": "\u0627\u0639"
+    "%i\u02BB": "\u0627\u0639"
     "i\u02BB": "\u0639"
-    "\u02BBI%": "\u0639"
-    "I\u02BE%": "\u0627\u0626"
-    "I\u02BC%": "\u0627\u0626"
+    "%\u02BBI": "\u0639"
+    "%I\u02BE": "\u0627\u0626"
+    "%I\u02BC": "\u0627\u0626"
     "i\u02BE": "\u0626"
     "i\u02BC": "\u0626"
     "\u02BEi": "\u0626"
     "\u02BCi": "\u0626"
-    "I\u0304%": "\u0627\u064A"
-    "i\u0304%": "\u0627\u064A"
+    "%I\u0304": "\u0627\u064A"
+    "%i\u0304": "\u0627\u064A"
     "i\u0304y": "\u064A"
     "i\u0304": "\u064A"
-    "I%": "\u0627"
-    "i%": "\u0627"
+    "%I": "\u0627"
+    "%i": "\u0627"
     "I": ""
     "i": ""
 
     # O
-    "o%": "\u0627\u0648"
-    "O%": "\u0627\u0648"
+    "%o": "\u0627\u0648"
+    "%O": "\u0627\u0648"
     "o": "\u0648"
 
     # U
@@ -221,16 +221,16 @@ roman_to_script:
 
     "\u02BEu\u0304": "\u0626\u0648"
     "\u02BCu\u0304": "\u0626\u0648"
-    "U\u02BE%": "\u0627\u0624"
-    "U\u02BC%": "\u0627\u0624"
+    "%U\u02BE": "\u0627\u0624"
+    "%U\u02BC": "\u0627\u0624"
     "u\u02BE": "\u0624"
     "u\u02BC": "\u0624"
-    "U\u0304w%": "\u0627\u0628"
+    "%U\u0304w": "\u0627\u0628"
     "u\u0304w": "\u0628"
     "U\u0304": "\u0627\u0648"
     "u\u0304": "\u0648"
-    "U%": "\u0627"
-    "u%": "\u0627"
+    "%U": "\u0627"
+    "%u": "\u0627"
     "U": ""
     "u": ""
 
@@ -388,5 +388,5 @@ roman_to_script:
     "\u02BB": "\u0639"
 
     # hamza (alone in final position)
-    "%\u02BE": "\u0621"
-    "%\u02BC": "\u0621"
+    "\u02BE%": "\u0621"
+    "\u02BC%": "\u0621"

+ 43 - 43
scriptshifter/tables/data/thai.yml

@@ -89,7 +89,7 @@ script_to_roman:
     "\u0E01\u0E44\u0E22": "kai"
     "\u0E01\u0E44": "kai"
     # FINAL CONSONANT KO KAI WITH NO VOWEL
-    "%\u0E01": "k"
+    "\u0E01%": "k"
     "\u0E01": "ko"
 
     # CONSONANT KHO KHAI WITH VOWELS
@@ -148,7 +148,7 @@ script_to_roman:
     "\u0E02\u0E44\u0E22": "khai"
     "\u0E02\u0E44": "khai"
     # FINAL CONSONANT KHO KHAI WITH NO VOWEL
-    "%\u0E02": "k"
+    "\u0E02%": "k"
     "\u0E02": "kho"
 
     # CONSONANT KHO KHUAT WITH VOWELS
@@ -207,7 +207,7 @@ script_to_roman:
     "\u0E03\u0E44\u0E22": "khai"
     "\u0E03\u0E44": "khai"
     # FINAL CONSONANT KHO KHUAT WITH NO VOWEL
-    "%\u0E03": "k"
+    "\u0E03%": "k"
     "\u0E03": "kho"
 
     # CONSONANT KHO KHWAI WITH VOWELS
@@ -266,7 +266,7 @@ script_to_roman:
     "\u0E04\u0E44\u0E22": "khai"
     "\u0E04\u0E44": "khai"
     # FINAL CONSONANT KHO KHWAI WITH NO VOWEL
-    "%\u0E04": "k"
+    "\u0E04%": "k"
     "\u0E04": "kho"
 
     # CONSONANT KHO KHON WITH VOWELS
@@ -325,7 +325,7 @@ script_to_roman:
     "\u0E05\u0E44\u0E22": "khai"
     "\u0E05\u0E44": "khai"
     # FINAL CONSONANT KHO KHON WITH NO VOWEL
-    "%\u0E05": "k"
+    "\u0E05%": "k"
     "\u0E05": "kho"
 
     # CONSONANT KHO RAKHANG WITH VOWELS
@@ -384,7 +384,7 @@ script_to_roman:
     "\u0E06\u0E44\u0E22": "khai"
     "\u0E06\u0E44": "khai"
     # FINAL CONSONANT KHO RAKHANG WITH NO VOWEL
-    "%\u0E06": "k"
+    "\u0E06%": "k"
     "\u0E06": "kho"
 
     # CONSONANT NGO NGU WITH VOWELS
@@ -443,7 +443,7 @@ script_to_roman:
     "\u0E07\u0E44\u0E22": "ngai"
     "\u0E07\u0E44": "ngai"
     # FINAL CONSONANT NGO NGU WITH NO VOWEL
-    "%\u0E07": "ng"
+    "\u0E07%": "ng"
     "\u0E07": "ngo"
 
     # CONSONANT CHO CHAN WITH VOWELS
@@ -502,7 +502,7 @@ script_to_roman:
     "\u0E08\u0E44\u0E22": "c\u030Chai"
     "\u0E08\u0E44": "c\u030Chai"
     # FINAL CONSONANT CHO CHAN WITH NO VOWEL
-    "%\u0E08": "t"
+    "\u0E08%": "t"
     "\u0E08": "c\u030Cho"
 
     # CONSONANT CHO CHING WITH VOWELS
@@ -561,7 +561,7 @@ script_to_roman:
     "\u0E09\u0E44\u0E22": "chai"
     "\u0E09\u0E44": "chai"
     # FINAL CONSONANT CHO CHING WITH NO VOWEL
-    "%\u0E09": "t"
+    "\u0E09%": "t"
     "\u0E09": "cho"
 
     # CONSONANT CHO CHANG WITH VOWELS
@@ -620,7 +620,7 @@ script_to_roman:
     "\u0E0A\u0E44\u0E22": "chai"
     "\u0E0A\u0E44": "chai"
     # FINAL CONSONANT CHO CHANG WITH NO VOWEL
-    "%\u0E0A": "t"
+    "\u0E0A%": "t"
     "\u0E0A": "cho"
 
     # CONSONANT SO SO WITH VOWELS
@@ -679,7 +679,7 @@ script_to_roman:
     "\u0E0B\u0E44\u0E22": "sai"
     "\u0E0B\u0E44": "sai"
     # FINAL CONSONANT SO SO WITH NO VOWEL
-    "%\u0E0B": "t"
+    "\u0E0B%": "t"
     "\u0E0B": "so"
 
     # CONSONANT CHO CHOE WITH VOWELS
@@ -738,7 +738,7 @@ script_to_roman:
     "\u0E0C\u0E44\u0E22": "chai"
     "\u0E0C\u0E44": "chai"
     # FINAL CONSONANT CHO CHOE WITH NO VOWEL
-    "%\u0E0C": "t"
+    "\u0E0C%": "t"
     "\u0E0C": "cho"
 
     # CONSONANT YO YING WITH VOWELS
@@ -797,7 +797,7 @@ script_to_roman:
     "\u0E0D\u0E44\u0E22": "yai"
     "\u0E0D\u0E44": "yai"
     # FINAL CONSONANT YO YING WITH NO VOWEL
-    "%\u0E0D": "n"
+    "\u0E0D%": "n"
     "\u0E0D": "yo"
 
     # CONSONANT DO CHADA WITH VOWELS
@@ -856,7 +856,7 @@ script_to_roman:
     "\u0E0E\u0E44\u0E22": "dai"
     "\u0E0E\u0E44": "dai"
     # FINAL CONSONANT DO CHADA WITH NO VOWEL
-    "%\u0E0E": "t"
+    "\u0E0E%": "t"
     "\u0E0E": "do"
 
     # CONSONANT TO PATAK WITH VOWELS
@@ -915,7 +915,7 @@ script_to_roman:
     "\u0E0F\u0E44\u0E22": "tai"
     "\u0E0F\u0E44": "tai"
     # FINAL CONSONANT TO PATAK WITH NO VOWEL
-    "%\u0E0F": "t"
+    "\u0E0F%": "t"
     "\u0E0F": "to"
 
     # CONSONANT THO THAN WITH VOWELS
@@ -974,7 +974,7 @@ script_to_roman:
     "\u0E10\u0E44\u0E22": "thai"
     "\u0E10\u0E44": "thai"
     # FINAL CONSONANT THO THAN WITH NO VOWEL
-    "%\u0E10": "th"
+    "\u0E10%": "th"
     "\u0E10": "tho"
 
     # CONSONANT THO NANGMONTHO WITH VOWELS
@@ -1033,7 +1033,7 @@ script_to_roman:
     "\u0E11\u0E44\u0E22": "thai"
     "\u0E11\u0E44": "thai"
     # FINAL CONSONANT THO NANGMONTHO WITH NO VOWEL
-    "%\u0E11": "t"
+    "\u0E11%": "t"
     "\u0E11": "tho"
 
     # CONSONANT THO PHUTHAO WITH VOWELS
@@ -1092,7 +1092,7 @@ script_to_roman:
     "\u0E12\u0E44\u0E22": "thai"
     "\u0E12\u0E44": "thai"
     # FINAL CONSONANT THO PHUTHAO WITH NO VOWEL
-    "%\u0E12": "t"
+    "\u0E12%": "t"
     "\u0E12": "tho"
 
     # CONSONANT NO NEN WITH VOWELS
@@ -1151,7 +1151,7 @@ script_to_roman:
     "\u0E13\u0E44\u0E22": "nai"
     "\u0E13\u0E44": "nai"
     # FINAL CONSONANT NO NEN WITH NO VOWEL
-    "%\u0E13": "n"
+    "\u0E13%": "n"
     "\u0E13": "no"
 
     # CONSONANT DO DEK WITH VOWELS
@@ -1210,7 +1210,7 @@ script_to_roman:
     "\u0E14\u0E44\u0E22": "dai"
     "\u0E14\u0E44": "dai"
     # FINAL CONSONANT NO NEN WITH NO VOWEL
-    "%\u0E14": "t"
+    "\u0E14%": "t"
     "\u0E14": "do"
 
     # CONSONANT TO TAO WITH VOWELS
@@ -1269,7 +1269,7 @@ script_to_roman:
     "\u0E15\u0E44\u0E22": "tai"
     "\u0E15\u0E44": "tai"
     # FINAL CONSONANT TO TAO WITH NO VOWEL
-    "%\u0E15": "t"
+    "\u0E15%": "t"
     "\u0E15": "to"
 
     # CONSONANT THO THUNG WITH VOWELS
@@ -1328,7 +1328,7 @@ script_to_roman:
     "\u0E16\u0E44\u0E22": "thai"
     "\u0E16\u0E44": "thai"
     # FINAL CONSONANT THO THUNG WITH NO VOWEL
-    "%\u0E16": "t"
+    "\u0E16%": "t"
     "\u0E16": "tho"
 
     # CONSONANT THO THAHAN WITH VOWELS
@@ -1387,7 +1387,7 @@ script_to_roman:
     "\u0E17\u0E44\u0E22": "thai"
     "\u0E17\u0E44": "thai"
     # FINAL CONSONANT THO THAHAN WITH NO VOWEL
-    "%\u0E17": "t"
+    "\u0E17%": "t"
     "\u0E17": "tho"
 
     # CONSONANT THO THONG WITH VOWELS
@@ -1446,7 +1446,7 @@ script_to_roman:
     "\u0E18\u0E44\u0E22": "thai"
     "\u0E18\u0E44": "thai"
     # FINAL CONSONANT THO THONG WITH NO VOWEL
-    "%\u0E18": "t"
+    "\u0E18%": "t"
     "\u0E18": "tho"
 
     # CONSONANT NO NU WITH VOWELS
@@ -1505,7 +1505,7 @@ script_to_roman:
     "\u0E19\u0E44\u0E22": "nai"
     "\u0E19\u0E44": "nai"
     # FINAL CONSONANT NO NU WITH NO VOWEL
-    "%\u0E19": "n"
+    "\u0E19%": "n"
     "\u0E19": "no"
 
     # CONSONANT BO BAIMAI WITH VOWELS
@@ -1564,7 +1564,7 @@ script_to_roman:
     "\u0E1A\u0E44\u0E22": "bai"
     "\u0E1A\u0E44": "bai"
     # FINAL CONSONANT BO BAIMAI WITH NO VOWEL
-    "%\u0E1A": "p"
+    "\u0E1A%": "p"
     "\u0E1A": "bo"
 
     # CONSONANT PO PLA WITH VOWELS
@@ -1623,7 +1623,7 @@ script_to_roman:
     "\u0E1B\u0E44\u0E22": "pai"
     "\u0E1B\u0E44": "pai"
     # FINAL CONSONANT PO PLA WITH NO VOWEL
-    "%\u0E1B": "p"
+    "\u0E1B%": "p"
     "\u0E1B": "po"
 
     # CONSONANT PHO PHUNG WITH VOWELS
@@ -1682,7 +1682,7 @@ script_to_roman:
     "\u0E1C\u0E44\u0E22": "phai"
     "\u0E1C\u0E44": "phai"
     # FINAL CONSONANT PHO PHUNG WITH NO VOWEL
-    "%\u0E1C": "p"
+    "\u0E1C%": "p"
     "\u0E1C": "pho"
 
     # CONSONANT FO FA WITH VOWELS
@@ -1741,7 +1741,7 @@ script_to_roman:
     "\u0E1D\u0E44\u0E22": "fai"
     "\u0E1D\u0E44": "fai"
     # FINAL CONSONANT FO FA WITH NO VOWEL
-    "%\u0E1D": "p"
+    "\u0E1D%": "p"
     "\u0E1D": "fo"
 
     # CONSONANT PHO PHAN WITH VOWELS
@@ -1800,7 +1800,7 @@ script_to_roman:
     "\u0E1E\u0E44\u0E22": "phai"
     "\u0E1E\u0E44": "phai"
     # FINAL CONSONANT PHO PHAN WITH NO VOWEL
-    "%\u0E1E": "p"
+    "\u0E1E%": "p"
     "\u0E1E": "pho"
 
     # CONSONANT FO FAN WITH VOWELS
@@ -1859,7 +1859,7 @@ script_to_roman:
     "\u0E1F\u0E44\u0E22": "fai"
     "\u0E1F\u0E44": "fai"
     # FINAL CONSONANT FO FAN WITH NO VOWEL
-    "%\u0E1F": "p"
+    "\u0E1F%": "p"
     "\u0E1F": "fo"
 
     # CONSONANT PHO SAMPHAO WITH VOWELS
@@ -1918,7 +1918,7 @@ script_to_roman:
     "\u0E20\u0E44\u0E22": "phai"
     "\u0E20\u0E44": "phai"
     # FINAL CONSONANT PHO SAMPHAO WITH NO VOWEL
-    "%\u0E20": "p"
+    "\u0E20%": "p"
     "\u0E20": "pho"
 
     # CONSONANT MO MA WITH VOWELS
@@ -1977,7 +1977,7 @@ script_to_roman:
     "\u0E21\u0E44\u0E22": "mai"
     "\u0E21\u0E44": "mai"
     # FINAL CONSONANT MO MA WITH NO VOWEL
-    "%\u0E21": "m"
+    "\u0E21%": "m"
     "\u0E21": "mo"
 
     # CONSONANT YO YAK WITH VOWELS
@@ -2036,7 +2036,7 @@ script_to_roman:
     "\u0E22\u0E44\u0E22": "yai"
     "\u0E22\u0E44": "yai"
     # FINAL CONSONANT YO YAK WITH NO VOWEL
-    "%\u0E22": ""
+    "\u0E22%": ""
     "\u0E22": "yo"
 
     # CONSONANT RO RUA WITH VOWELS
@@ -2095,7 +2095,7 @@ script_to_roman:
     "\u0E23\u0E44\u0E22": "rai"
     "\u0E23\u0E44": "rai"
     # FINAL CONSONANT RO RUA WITH NO VOWEL
-    "%\u0E23": "n"
+    "\u0E23%": "n"
     "\u0E23": "ro"
 
     # CONSONANT LO LING WITH VOWELS
@@ -2154,7 +2154,7 @@ script_to_roman:
     "\u0E25\u0E44\u0E22": "lai"
     "\u0E25\u0E44": "lai"
     # FINAL CONSONANT LO LING WITH NO VOWEL
-    "%\u0E25": "n"
+    "\u0E25%": "n"
     "\u0E25": "lo"
 
     # CONSONANT WO WAEN WITH VOWELS
@@ -2213,7 +2213,7 @@ script_to_roman:
     "\u0E27\u0E44\u0E22": "wai"
     "\u0E27\u0E44": "wai"
     # FINAL CONSONANT WO WAEN WITH NO VOWEL
-    "%\u0E27": ""
+    "\u0E27%": ""
     "\u0E27": "wo"
 
     # CONSONANT SO SALA WITH VOWELS
@@ -2272,7 +2272,7 @@ script_to_roman:
     "\u0E28\u0E44\u0E22": "sai"
     "\u0E28\u0E44": "sai"
     # FINAL CONSONANT SO SALA WITH NO VOWEL
-    "%\u0E28": "t"
+    "\u0E28%": "t"
     "\u0E28": "so"
 
     # CONSONANT SO RUSI WITH VOWELS
@@ -2331,7 +2331,7 @@ script_to_roman:
     "\u0E29\u0E44\u0E22": "sai"
     "\u0E29\u0E44": "sai"
     # FINAL CONSONANT SO RUSI WITH NO VOWEL
-    "%\u0E29": "t"
+    "\u0E29%": "t"
     "\u0E29": "so"
 
     # CONSONANT SO SUA WITH VOWELS
@@ -2390,7 +2390,7 @@ script_to_roman:
     "\u0E2A\u0E44\u0E22": "sai"
     "\u0E2A\u0E44": "sai"
     # FINAL CONSONANT SO SUA WITH NO VOWEL
-    "%\u0E2A": "t"
+    "\u0E2A%": "t"
     "\u0E2A": "so"
 
     # CONSONANT HO HIP WITH VOWELS
@@ -2449,7 +2449,7 @@ script_to_roman:
     "\u0E2B\u0E44\u0E22": "hai"
     "\u0E2B\u0E44": "hai"
     # FINAL CONSONANT HO HIP WITH NO VOWEL
-    "%\u0E2B": ""
+    "\u0E2B%": ""
     "\u0E2B": "ho"
 
     # CONSONANT LO CHULA WITH VOWELS
@@ -2508,7 +2508,7 @@ script_to_roman:
     "\u0E2C\u0E44\u0E22": "lai"
     "\u0E2C\u0E44": "lai"
     # FINAL CONSONANT LO CHULA WITH NO VOWEL
-    "%\u0E2C": "n"
+    "\u0E2C%": "n"
     "\u0E2C": "lo"
 
     # CONSONANT O ANG WITH VOWELS ALONE (NO CONSONANT)
@@ -2624,7 +2624,7 @@ script_to_roman:
     "\u0E2E\u0E44\u0E22": "hai"
     "\u0E2E\u0E44": "hai"
     # FINAL CONSONANT HO NOKHUK WITH NO VOWEL
-    "%\u0E2E": ""
+    "\u0E2E%": ""
     "\u0E2E": "ho"
 
     "\u0E2F": ""

+ 132 - 161
scriptshifter/tables/data/uighur_arabic.yml

@@ -2,337 +2,308 @@
 general:
   name: Uighur (Arabic)
   case_sensitive: false
-
 roman_to_script:
   map:
     "%a": "\u0626\u0627"
-    "a": "\uFE8E"
-    "%a%": "\u0626\u0627"
-    "%ă": "\u0626\u0647"
-    "ă": "\u0647"
-    "%ă%": "\u0626\u0647"
-    "%b": "\uFE91"
-    "b": "\uFE92"
-    "b%": "\uFE90"
-    "%b%": "\uFE8F"
-    "%ch": "\uFB7C"
-    "ch": "\uFB7D"
-    "ch%": "\uFB7B"
-    "%ch%": "\uFB7A"
-    "%d": "\uFEA9"
-    "d": "\uFEAA"
-    "%d%": "\uFEA9"
-    "%e": "\uFBE6"
-    "e": "\uFBE7"
-    "e%": "\u06D0"
-    "%e%": "\u06D0"
-    "%f": "\uFED3"
-    "f": "\uFED4"
-    "f%": "\uFED2"
-    "%f%": "\uFED1"
-    "%g": "\uFB94"
-    "g": "\uFB95"
-    "g%": "\uFB93"
-    "%g%": "\uFB92"
-    "%gh": "\uFECF"
-    "gh": "\uFED0"
-    "gh%": "\uFECE"
-    "%gh%": "\uFECD"
-    "%h": "\uFEEB"
-    "h": "\uFEEC"
-    "h%": "\uFEEA"
-    "%h%": "\u0647"
-    "%i": "\uFBE8"
-    "i": "\uFBE9"
-    "i%": "\u06CC"
-    "%i%": "\u06CC"
-    "%j": "\uFE9F"
-    "j": "\uFEA0"
-    "j%": "\uFE9E"
-    "%j%": "\uFE9D"
-    "%k": "\uFEDB"
-    "k": "\uFEDC"
-    "k%": "\uFEDA"
-    "%k%": "\uFED9"
-    "%kh": "\uFEA7"
-    "kh": "\uFEA8"
-    "kh%": "\uFEA6"
-    "%kh%": "\uFEA5"
-    "%l": "\uFEDF"
-    "l": "\uFEE0"
-    "l%": "\uFEDE"
-    "%l%": "\uFEDD"
-    "%m": "\uFEE3"
-    "m": "\uFEE4"
-    "m%": "\uFEE2"
-    "%m%": "\uFEE1"
-    "%n": "\uFEE7"
-    "n": "\uFEE8"
-    "n%": "\uFEE6"
-    "%n%": "\uFEE5"
-    "%ng": "\uFBD5"
-    "ng": "\uFBD6"
-    "ng%": "\uFBD4"
-    "%ng%": "\u06AD"
+    "a": "\u0627"
+    "%ă": "\u0626\u06D5"
+    "ă": "\u06D5"
+    "b": "\u0628"
+    "ch": "\u0686"
+    "d": "\u062F"
+    "%e": "\u0626\u06D0"
+    "e": "\u06D0"
+    "f": "\u0641"
+    "g": "\u06AF"
+    "gh": "\u063A"
+    "h": "\u06BE"
+    "%i": "\u0626\u0649"
+    "i": "\u0649"
+    "j": "\u062C"
+    "k": "\u0643"
+    "kh": "\u062E"
+    "l": "\u0644"
+    "m": "\u0645"
+    "n": "\u0646"
+    "ng": "\u06AD"
     "%o": "\u0626\u0648"
-    "o": "\uFEEE"
-    "%o%": "\u0648"
-    "%ö": "\uFE8B\uFBDA"
-    "ö": "\uFBDA"
-    "%ö%": "\u06C6"
-    "%p": "\uFB58"
-    "p": "\uFB59"
-    "p%": "\uFB57"
-    "%p%": "\uFB56"
-    "%q": "\uFED7"
-    "q": "\uFED8"
-    "q%": "\uFED6"
-    "%q%": "\uFED5"
-    "%r": "\uFEAD"
-    "r": "\uFEAE"
-    "%r%": "\uFEAD"
-    "%s": "\uFEB3"
-    "s": "\uFEB4"
-    "s%": "\uFEB2"
-    "%s%": "\uFEB1"
-    "%sh": "\uFEB7"
-    "sh": "\uFEB8"
-    "sh%": "\uFEB6"
-    "%sh%": "\uFEB5"
-    "%t": "\uFE97"
-    "t": "\uFE98"
-    "t%": "\uFE96"
-    "%t%": "\uFE95"
-    "%u": "\uFBF0"
+    "o": "\u0648"
+    "%ö": "\u0626\u06C6"
+    "ö": "\u06C6"
+    "p": "\u067E"
+    "q": "\u0642"
+    "r": "\u0631"
+    "s": "\u0633"
+    "sh": "\u0634"
+    "t": "\u062A"
+    "%u": "\u0626\u06C7"
     "u": "\u06C7"
-    "%ü": "\uFBF4"
+    "%ü": "\u0626\u06C8"
     "ü": "\u06C8"
-    "%v": "\u06CB"
-    "v": "\uFBDF"
-    "%v%": "\u06CB"
-    "%y": "\uFEF3"
-    "y": "\uFEF4"
-    "y%": "\u064A"
-    "%y%": "\u064A"
-    "%z": "\uFEAF"
-    "z": "\uFEB0"
-    "%z%": "\uFEAF"
-    "%zh": "\uFB8A"
-    "zh": "\uFB8B"
-    "%zh%": "\uFB8A"
-
+    "v": "\u06CB"
+    "y": "\u064A"
+    "z": "\u0632"
+    "zh": "\u0698"
+    "ʼ": "\u0626"
+    ";": "\u061B"
+    ",": "\u060C"
+    "?": "\u061F"
 script_to_roman:
   map:
-    "\u0626\u0627": "a"
-    "\uFE8E": "a"
+    "%\u0626\u0627": "a"
     "\u0627": "a"
-    "\u0626\u0647": "ă"
-    "\uFEEA": "ă"
+    "\uFE8E": "a"
+    "%\u0626\u06D5": "ă"
+    "\u06D5": "ă"
+    "%\u0626\u0647": "ă"
     "\u0647": "ă"
+    "\uFEEA": "ă"
+    "\u0628": "b"
     "\uFE91": "b"
     "\uFE92": "b"
     "\uFE90": "b"
     "\uFE8F": "b"
+    "\u0686": "ch"
     "\uFB7C": "ch"
     "\uFB7D": "ch"
     "\uFB7B": "ch"
     "\uFB7A": "ch"
+    "\u062F": "d"
     "\uFEA9": "d"
     "\uFEAA": "d"
-    "\uFE8B\uFBE7": "e"
+    "%\u0626\u06D0": "e"
+    "\u06D0": "e"
     "\uFBE6": "e"
     "\uFBE7": "e"
-    "\u06D0": "e"
-    "\uFE8B\uFBE5": "e"
+    "\uFBE5": "e"
+    "\u0641": "f"
     "\uFED3": "f"
     "\uFED4": "f"
     "\uFED2": "f"
     "\uFED1": "f"
+    "\u06AF": "g"
     "\uFB94": "g"
     "\uFB95": "g"
     "\uFB93": "g"
     "\uFB92": "g"
+    "\u063A": "gh"
     "\uFECF": "gh"
     "\uFED0": "gh"
     "\uFECE": "gh"
     "\uFECD": "gh"
+    "\u06BE": "h"
     "\uFEEB": "h"
     "\uFEEC": "h"
-    "\u0640\u0629": "h"
-    "%\uFEEA": "h"
+    "%\u0640\u0629": "h"
     "\u0629": "h"
-    "%\u0647%": "h"
-    "\uFE8C": "i"
+    "%\u0626\u0649": "i"
+    "\u0649": "i"
     "\uFBE8": "i"
+    "\uFE8C": "i"
     "\uFBE9": "i"
     "\u06CC": "i"
-    "\u0626": "i"
+    "\u062C": "j"
     "\uFE9F": "j"
     "\uFEA0": "j"
     "\uFE9E": "j"
     "\uFE9D": "j"
+    "\u0643": "k"
     "\uFEDB": "k"
     "\uFEDC": "k"
     "\uFEDA": "k"
     "\uFED9": "k"
+    "\u062E": "kh"
     "\uFEA7": "kh"
     "\uFEA8": "kh"
     "\uFEA6": "kh"
     "\uFEA5": "kh"
+    "\u0644": "l"
     "\uFEDF": "l"
     "\uFEE0": "l"
     "\uFEDE": "l"
     "\uFEDD": "l"
+    "\u0645": "m"
     "\uFEE3": "m"
     "\uFEE4": "m"
     "\uFEE2": "m"
     "\uFEE1": "m"
+    "\u0646": "n"
     "\uFEE7": "n"
     "\uFEE8": "n"
     "\uFEE6": "n"
     "\uFEE5": "n"
+    "\u06AD": "ng"
     "\uFBD5": "ng"
     "\uFBD6": "ng"
     "\uFBD4": "ng"
-    "\u06AD": "ng"
-    "\u0626\u0648": "o"
-    "\uFEEE": "o"
+    "%\u0626\u0648": "o"
     "\u0648": "o"
-    "\uFE8B\uFBDA": "ö"
-    "\uFBDA": "ö"
+    "\uFEEE": "o"
+    "%\u0626\u06C6": "ö"
     "\u06C6": "ö"
+    "\uFBDA": "ö"
+    "\u067E": "p"
     "\uFB58": "p"
     "\uFB59": "p"
     "\uFB57": "p"
     "\uFB56": "p"
+    "\u0642": "q"
     "\uFED7": "q"
     "\uFED8": "q"
     "\uFED6": "q"
     "\uFED5": "q"
+    "\u0631": "r"
     "\uFEAD": "r"
     "\uFEAE": "r"
+    "\u0633": "s"
     "\uFEB3": "s"
     "\uFEB4": "s"
     "\uFEB2": "s"
     "\uFEB1": "s"
+    "\u0634": "sh"
     "\uFEB7": "sh"
     "\uFEB8": "sh"
     "\uFEB6": "sh"
     "\uFEB5": "sh"
+    "\u062A": "t"
     "\uFE97": "t"
     "\uFE98": "t"
     "\uFE96": "t"
     "\uFE95": "t"
-    "\uFBF0": "u"
+    "%\u0626\u06C7": "u"
     "\u06C7": "u"
-    "\uFBF4": "ü"
+    "\uFBF0": "u"
+    "%\u0626\u06C8": "ü"
     "\u06C8": "ü"
+    "\uFBF4": "ü"
     "\u06CB": "v"
     "\uFBDF": "v"
+    "\u064A": "y"
     "\uFEF3": "y"
     "\uFEF4": "y"
-    "\u064A": "y"
+    "\u0632": "z"
     "\uFEAF": "z"
     "\uFEB0": "z"
+    "\u0698": "zh"
     "\uFB8A": "zh"
     "\uFB8B": "zh"
+    "\u0626": "ʼ"
+    "\uFE8B": "ʼ"
+    "\u061B": ";"
+    "\u060C": ","
+    "\u061F": "?"
+    "\u0646\u06AF": "n\u02B9g"
+    "\u0646\uFB94": "n\u02B9g"
+    "\u0646\uFB95": "n\u02B9g"
+    "\u0646\uFB93": "n\u02B9g"
+    "\u0646\uFB92": "n\u02B9g"
+    "\uFEE7\u06AF": "n\u02B9g"
     "\uFEE7\uFB94": "n\u02B9g"
     "\uFEE7\uFB95": "n\u02B9g"
     "\uFEE7\uFB93": "n\u02B9g"
     "\uFEE7\uFB92": "n\u02B9g"
+    "\uFEE8\u06AF": "n\u02B9g"
     "\uFEE8\uFB94": "n\u02B9g"
     "\uFEE8\uFB95": "n\u02B9g"
     "\uFEE8\uFB93": "n\u02B9g"
     "\uFEE8\uFB92": "n\u02B9g"
+    "\uFEE6\u06AF": "n\u02B9g"
     "\uFEE6\uFB94": "n\u02B9g"
     "\uFEE6\uFB95": "n\u02B9g"
     "\uFEE6\uFB93": "n\u02B9g"
     "\uFEE6\uFB92": "n\u02B9g"
+    "\uFEE5\u06AF": "n\u02B9g"
     "\uFEE5\uFB94": "n\u02B9g"
     "\uFEE5\uFB95": "n\u02B9g"
     "\uFEE5\uFB93": "n\u02B9g"
     "\uFEE5\uFB92": "n\u02B9g"
+    "\u0633\u06BE": "s\u02B9h"
+    "\u0633\uFEEB": "s\u02B9h"
+    "\u0633\uFEEC": "s\u02B9h"
+    "\u0633\u0640\u0629": "s\u02B9h"
+    "\u0633\u0629": "s\u02B9h"
+    "\uFEB3\u06BE": "s\u02B9h"
     "\uFEB3\uFEEB": "s\u02B9h"
     "\uFEB3\uFEEC": "s\u02B9h"
     "\uFEB3\u0640\u0629": "s\u02B9h"
-    "\uFEB3\uFEEA": "s\u02B9h"
     "\uFEB3\u0629": "s\u02B9h"
-    "\uFEB3\u0647": "s\u02B9h"
+    "\uFEB4\u06BE": "s\u02B9h"
     "\uFEB4\uFEEB": "s\u02B9h"
     "\uFEB4\uFEEC": "s\u02B9h"
     "\uFEB4\u0640\u0629": "s\u02B9h"
-    "\uFEB4\uFEEA": "s\u02B9h"
     "\uFEB4\u0629": "s\u02B9h"
-    "\uFEB4\u0647": "s\u02B9h"
+    "\uFEB2\u06BE": "s\u02B9h"
     "\uFEB2\uFEEB": "s\u02B9h"
     "\uFEB2\uFEEC": "s\u02B9h"
     "\uFEB2\u0640\u0629": "s\u02B9h"
-    "\uFEB2\uFEEA": "s\u02B9h"
     "\uFEB2\u0629": "s\u02B9h"
-    "\uFEB2\u0647": "s\u02B9h"
+    "\uFEB1\u06BE": "s\u02B9h"
     "\uFEB1\uFEEB": "s\u02B9h"
     "\uFEB1\uFEEC": "s\u02B9h"
     "\uFEB1\u0640\u0629": "s\u02B9h"
-    "\uFEB1\uFEEA": "s\u02B9h"
     "\uFEB1\u0629": "s\u02B9h"
-    "\uFEB1\u0647": "s\u02B9h"
+    "\u0643\u06BE": "k\u02B9h"
+    "\u0643\uFEEB": "k\u02B9h"
+    "\u0643\uFEEC": "k\u02B9h"
+    "\u0643\u0640\u0629": "k\u02B9h"
+    "\u0643\u0629": "k\u02B9h"
+    "\uFEDB\u06BE": "k\u02B9h"
     "\uFEDB\uFEEB": "k\u02B9h"
     "\uFEDB\uFEEC": "k\u02B9h"
     "\uFEDB\u0640\u0629": "k\u02B9h"
-    "\uFEDB\uFEEA": "k\u02B9h"
     "\uFEDB\u0629": "k\u02B9h"
-    "\uFEDB\u0647": "k\u02B9h"
+    "\uFEDC\u06BE": "k\u02B9h"
     "\uFEDC\uFEEB": "k\u02B9h"
     "\uFEDC\uFEEC": "k\u02B9h"
     "\uFEDC\u0640\u0629": "k\u02B9h"
-    "\uFEDC\uFEEA": "k\u02B9h"
     "\uFEDC\u0629": "k\u02B9h"
-    "\uFEDC\u0647": "k\u02B9h"
+    "\uFEDA\u06BE": "k\u02B9h"
     "\uFEDA\uFEEB": "k\u02B9h"
     "\uFEDA\uFEEC": "k\u02B9h"
     "\uFEDA\u0640\u0629": "k\u02B9h"
-    "\uFEDA\uFEEA": "k\u02B9h"
     "\uFEDA\u0629": "k\u02B9h"
-    "\uFEDA\u0647": "k\u02B9h"
+    "\uFED9\u06BE": "k\u02B9h"
     "\uFED9\uFEEB": "k\u02B9h"
     "\uFED9\uFEEC": "k\u02B9h"
     "\uFED9\u0640\u0629": "k\u02B9h"
-    "\uFED9\uFEEA": "k\u02B9h"
     "\uFED9\u0629": "k\u02B9h"
-    "\uFED9\u0647": "k\u02B9h"
+    "\u0632\u06BE": "z\u02B9h"
+    "\u0632\uFEEB": "z\u02B9h"
+    "\u0632\uFEEC": "z\u02B9h"
+    "\u0632\u0640\u0629": "z\u02B9h"
+    "\u0632\u0629": "z\u02B9h"
+    "\uFEAF\u06BE": "z\u02B9h"
     "\uFEAF\uFEEB": "z\u02B9h"
     "\uFEAF\uFEEC": "z\u02B9h"
     "\uFEAF\u0640\u0629": "z\u02B9h"
-    "\uFEAF\uFEEA": "z\u02B9h"
     "\uFEAF\u0629": "z\u02B9h"
-    "\uFEAF\u0647": "z\u02B9h"
+    "\uFEB0\u06BE": "z\u02B9h"
     "\uFEB0\uFEEB": "z\u02B9h"
     "\uFEB0\uFEEC": "z\u02B9h"
     "\uFEB0\u0640\u0629": "z\u02B9h"
-    "\uFEB0\uFEEA": "z\u02B9h"
     "\uFEB0\u0629": "z\u02B9h"
-    "\uFEB0\u0647": "z\u02B9h"
+    "\u06AF\u06BE": "g\u02B9h"
+    "\u06AF\uFEEB": "g\u02B9h"
+    "\u06AF\uFEEC": "g\u02B9h"
+    "\u06AF\u0640\u0629": "g\u02B9h"
+    "\u06AF\u0629": "g\u02B9h"
+    "\uFB94\u06BE": "g\u02B9h"
     "\uFB94\uFEEB": "g\u02B9h"
     "\uFB94\uFEEC": "g\u02B9h"
     "\uFB94\u0640\u0629": "g\u02B9h"
-    "\uFB94\uFEEA": "g\u02B9h"
     "\uFB94\u0629": "g\u02B9h"
-    "\uFB94\u0647": "g\u02B9h"
+    "\uFB95\u06BE": "g\u02B9h"
     "\uFB95\uFEEB": "g\u02B9h"
     "\uFB95\uFEEC": "g\u02B9h"
     "\uFB95\u0640\u0629": "g\u02B9h"
-    "\uFB95\uFEEA": "g\u02B9h"
     "\uFB95\u0629": "g\u02B9h"
-    "\uFB95\u0647": "g\u02B9h"
+    "\uFB93\u06BE": "g\u02B9h"
     "\uFB93\uFEEB": "g\u02B9h"
     "\uFB93\uFEEC": "g\u02B9h"
     "\uFB93\u0640\u0629": "g\u02B9h"
-    "\uFB93\uFEEA": "g\u02B9h"
     "\uFB93\u0629": "g\u02B9h"
-    "\uFB93\u0647": "g\u02B9h"
+    "\uFB92\u06BE": "g\u02B9h"
     "\uFB92\uFEEB": "g\u02B9h"
     "\uFB92\uFEEC": "g\u02B9h"
     "\uFB92\u0640\u0629": "g\u02B9h"
-    "\uFB92\uFEEA": "g\u02B9h"
     "\uFB92\u0629": "g\u02B9h"
-    "\uFB92\u0647": "g\u02B9h"

+ 55 - 55
scriptshifter/tables/data/urdu.yml

@@ -59,10 +59,10 @@ roman_to_script:
 
     ####
 
-    #lillah
+    # lillah
     "lilla\u0304h": "\u0644\u0644\u0647"
 
-    #billah
+    # billah
     "billa\u0304h": "\u0628\u0644\u0644\u0647"
 
     # Rahman
@@ -72,8 +72,8 @@ roman_to_script:
     "Nuzhat": "\u0646\u0632\u0647\u062A"
 
     # Uddin names
-    "%i\u0304uddi\u0304n": "\u0649\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
-    "%uddi\u0304n": "\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
+    "i\u0304uddi\u0304n%": "\u0649\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
+    "uddi\u0304n%": "\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
 
     # ta'lif
 
@@ -84,7 +84,7 @@ roman_to_script:
     "# Ae": "\u0627\u06D2"
 
     # Parsing "sh[dot below] as in "Ishaq [name]"
-    "%sh\u0323%": "\u0633\u062D"
+    "sh\u0323": "\u0633\u062D"
 
     # Numbers (\u06F0-06F9 for Persian/Urdu)
     # currently *not* valid MARC21 characters
@@ -154,21 +154,21 @@ roman_to_script:
     "\u02B9": "\u200C"
 
     # Izafah here
-    "%a\u0304-yi": "\u0627\u0626\u06D2"
-    "%u\u0304-yi": "\u0648\u0626\u06D2"
-    "%o-yi": "\u0648\u0626\u06D2"
-    "%e-yi": "\u06D2"
-    "%i\u0304-yi": "\u0649"
-    "%h-yi": "\u06C0"
-    "%-yi": "\u06C0"
-    "%al-i": "\u0644"
-    "%ul-i": "\u0644"
-    "%-i": ""
+    "a\u0304-yi%": "\u0627\u0626\u06D2"
+    "u\u0304-yi%": "\u0648\u0626\u06D2"
+    "o-yi%": "\u0648\u0626\u06D2"
+    "e-yi%": "\u06D2"
+    "i\u0304-yi%": "\u0649"
+    "h-yi%": "\u06C0"
+    "-yi%": "\u06C0"
+    "al-i%": "\u0644"
+    "ul-i%": "\u0644"
+    "-i%": ""
 
     # Hyphenated prefixes:
     "bi-": "\u0628"
-    "al-a\u0304%": "\u0627\u0644\u0627"
-    "ul-a\u0304%": "\u0627\u0644\u0627"
+    "%al-a\u0304": "\u0627\u0644\u0627"
+    "%ul-a\u0304": "\u0627\u0644\u0627"
     "al-": "\u0627\u0644"
     "ul-": "\u0627\u0644"
     "lil-i": "\u0644\u0644"
@@ -234,9 +234,9 @@ roman_to_script:
 
     # Diphthongs here
     "Ae": "\u0627\u06D2"
-    "%ai": "\u06D2"
+    "ai%": "\u06D2"
     "Ai": "\u0627\u064A"
-    "ai%": "\u0627\u064A"
+    "%ai": "\u0627\u064A"
     "ai": "\u064A"
     "\u02BBAu": "\u0639\u0648"
     "\u02BBau": "\u0639\u0648"
@@ -244,23 +244,23 @@ roman_to_script:
     "au": "\u0648"
 
     # ayn-alif combo
-    "%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621"
-    "%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621"
-    "%\u02BBa\u0304%": "\u0639\u0627"
+    "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
+    "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
+    "\u02BBa\u0304": "\u0639\u0627"
 
     # hamza and vowel combo
     # [in final position]
-    "%u\u0304\u02BEi\u0304": "\u0648\u0626\u0649"
-    "%u\u0304\u02BCi\u0304": "\u0648\u0626\u0649"
-    "%\u02BEi\u0304": "\u0626\u0649"
-    "%\u02BCi\u0304": "\u0626\u0649"
-    "%\u02BEe": "\u0626\u06D2"
-    "%\u02BCe": "\u0626\u06D2"
-
-    "%\u02BEu\u0304": "\u0624"
-    "%\u02BCu\u0304": "\u0624"
-    "%\u02BEo": "\u0624"
-    "%\u02BCo": "\u0624"
+    "u\u0304\u02BEi\u0304%": "\u0648\u0626\u0649"
+    "u\u0304\u02BCi\u0304%": "\u0648\u0626\u0649"
+    "\u02BEi\u0304%": "\u0626\u0649"
+    "\u02BCi\u0304%": "\u0626\u0649"
+    "\u02BEe%": "\u0626\u06D2"
+    "\u02BCe%": "\u0626\u06D2"
+
+    "\u02BEu\u0304%": "\u0624"
+    "\u02BCu\u0304%": "\u0624"
+    "\u02BEo%": "\u0624"
+    "\u02BCo%": "\u0624"
 
     # [in medial position]
     "a\u02BEa": "\u0623"
@@ -294,31 +294,31 @@ roman_to_script:
     "\u02BEa": "\u0626"
     "\u02BCa": "\u0626"
 
-    "%i\u0304": "\u0649"
-    "%a\u0301": "\u0649\u0670"
+    "i\u0304%": "\u0649"
+    "a\u0301%": "\u0649\u0670"
 
     # A
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
-    "\u02BBA%": "\u0639"
+    "%\u02BBA": "\u0639"
     "\u02BBa": "\u0639"
     "A\u02BB": "\u0627\u0639"
-    "a\u02BB%": "\u0627\u0639"
+    "%a\u02BB": "\u0627\u0639"
     "a\u02BB": "\u0639"
-    "A\u0304%": "\u0622"
-    "a\u0304%": "\u0622"
+    "%A\u0304": "\u0622"
+    "%a\u0304": "\u0622"
     "a\u0304": "\u0627"
     "a\u0301": "\u0649"
     "ayy": "\u064A\u0651"
-    "A%": "\u0627"
-    "a%": "\u0627"
+    "%A": "\u0627"
+    "%a": "\u0627"
     "A": ""
     "a": ""
 
     # E
-    "%e": "\u06D2"
-    "E%": "\u0627\u064A"
-    "e%": "\u0627\u064A"
+    "e%": "\u06D2"
+    "%E": "\u0627\u064A"
+    "%e": "\u0627\u064A"
     "e": "\u064A"
 
     # I
@@ -327,29 +327,29 @@ roman_to_script:
     "I\u02BB": "\u0627\u0639"
     "i\u02BB": "\u0639"
     "\u02BBI": "\u0639"
-    "I\u0304%": "\u0627\u064A"
-    "i\u0304%": "\u0627\u064A"
+    "%I\u0304": "\u0627\u064A"
+    "%i\u0304": "\u0627\u064A"
     "i\u0304y": "\u064A"
     "i\u0304": "\u064A"
     "iyy": "\u064A\u0651"
-    "I%": "\u0627"
-    "i%": "\u0627"
+    "%I": "\u0627"
+    "%i": "\u0627"
     "I": "\u0627"
     "i": ""
 
     # O
-    "O%": "\u0627\u0648"
+    "%O": "\u0627\u0648"
     "o": "\u0648"
 
     # U
     "\u02BBu\u0304": "\u0639\u0648"
     "\u02BBU": "\u0639"
     "\u02BBu": "\u0639"
-    "U\u0304%": "\u0627\u0648"
-    "u\u0304%": "\u0627\u0648"
+    "%U\u0304": "\u0627\u0648"
+    "%u\u0304": "\u0627\u0648"
     "u\u0304": "\u0648"
-    "U%": "\u0627"
-    "u%": "\u0627"
+    "%U": "\u0627"
+    "%u": "\u0627"
     "U": ""
     "u": ""
 
@@ -461,5 +461,5 @@ roman_to_script:
     "\u02BB": "\u0639"
 
     # hamza (alone in final position)
-    "%\u02BE": "\u0621"
-    "%\u02BC": "\u0621"
+    "\u02BE%": "\u0621"
+    "\u02BC%": "\u0621"

+ 9 - 1
scriptshifter/trans.py

@@ -2,10 +2,11 @@ import logging
 
 from importlib import import_module
 from re import Pattern, compile
+from unicodedata import normalize as precomp_normalize
 
 from scriptshifter.exceptions import BREAK, CONT
 from scriptshifter.tables import (
-        BOW, EOW, WORD_BOUNDARY, FEAT_CASEI, FEAT_R2S, FEAT_S2R, HOOK_PKG_PATH,
+        BOW, EOW, WORD_BOUNDARY, FEAT_R2S, FEAT_S2R, HOOK_PKG_PATH,
         get_connection, get_lang_dcap, get_lang_general, get_lang_hooks,
         get_lang_ignore, get_lang_map, get_lang_normalize)
 
@@ -342,6 +343,13 @@ def _normalize_src(ctx, norm_rules):
     NOTE: this manipluates the protected source attribute so it may not
     correspond to the originally provided source.
     """
+    # Normalize precomposed Unicode characters.
+    #
+    # In using diacritics, LC standards prefer the decomposed form (combining
+    # diacritic + base character) to the pre-composed form (single Unicode
+    # symbol for the letter with diacritic).
+    ctx._src = precomp_normalize("NFD", ctx.src)
+
     for nk, nv in norm_rules.items():
         ctx._src = ctx.src.replace(nk, nv)
 

+ 4 - 0
test/data/script_samples/uighur_arabic.csv

@@ -0,0 +1,4 @@
+uighur_arabic,ئابباس مۇنيار تۈركىيقان,abbas munyar türkiyqan
+uighur_arabic,تەنھا پەرۋانە,tănha părvană
+uighur_arabic,ئۈرۈمچى,ürümchi
+uighur_arabic,شىنجاڭ گۈزەل سەنئەت-فوتو سۈرەت نەشرىياتى,shinjang güzăl sănʼăt-foto sürăt năshriyati