Преглед на файлове

Reverse % sign for word boundary.

scossu преди 3 месеца
родител
ревизия
2489c54759

+ 58 - 54
scriptshifter/tables/data/arabic.yml

@@ -4,7 +4,8 @@
 ---
 general:
   name: Arabic
-  description: Arabic R2S using a conversion table and S2R using a 3rd party library.
+  description: >
+    Arabic R2S using a conversion table and S2R using a 3rd party library.
   case_sensitive: false
 
   parents:
@@ -16,17 +17,18 @@ roman_to_script:
 
     # Original table by David Bucknum
     # Last updated 25 January 2019
-    # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO 
-    # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin Scripts Conceptually" 
+    # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
+    # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
+    # Scripts Conceptually"
 
 
-    # Punctuation marks:    
+    # Punctuation marks:
     "*": "\u066D"
     ",": "\u060C"
     ";": "\u061B"
     "?": "\u061F"
 
-    # Exceptions for specific words 
+    # Exceptions for specific words
     # Allah
     "Alla\u0304h": "\u0627\u0644\u0644\u0647"
 
@@ -66,19 +68,19 @@ roman_to_script:
 
     # "sh[dot below] as in "Ishaq"
 
-    "%sh\u0323%": "\u0633\u062D"
+    "sh\u0323": "\u0633\u062D"
 
     # "s[prime]h" combos
 
-    "%s\u02B9h%": "\u0633\u0647"
+    "s\u02B9h": "\u0633\u0647"
 
     # "th[dot below]"
 
-    "%th\u0323%": "\u062A\u062D"
+    "th\u0323": "\u062A\u062D"
 
-    # dh[dot under] 
+    # dh[dot under]
 
-    "%dh\u0323%": "\u062F\u062D"
+    "dh\u0323": "\u062F\u062D"
 
     # La-hu
 
@@ -96,7 +98,9 @@ roman_to_script:
     "mi\u02BEat": "\u0645\u0627\u0626\u0629"
     "mi\u02BCat": "\u0645\u0627\u0626\u0629"
 
-    # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu will technically use \u06F0-06F9. This needs further discussion with PSD as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
+    # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu
+    # will technically use \u06F0-06F9. This needs further discussion with PSD
+    # as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
 
     # Edition statements with Latin number
     "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1"
@@ -132,15 +136,15 @@ roman_to_script:
     "ka-": "\u0643"
 
     # Vowels and vowel/consonant combinations
-    "%ah": "\u0629"
-    "%at": "\u0629"
+    "ah%": "\u0629"
+    "at%": "\u0629"
 
-    #tanwin
-    "%an": "\u0627"
+    # tanwin
+    "an%": "\u0627"
 
     # ayn-alif combo
-    "%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621"
-    "%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621"
+    "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
+    "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
 
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
@@ -153,27 +157,27 @@ roman_to_script:
     "\u02BBU": "\u0639"
     "\u02BBu": "\u0639"
 
-    "\u02BBA%": "\u0639"
-    #"\u02BBa%": "\u0639"
+    "%\u02BBA": "\u0639"
+    # "%\u02BBa": "\u0639"
 
     # alif and hamzas for all occasions
 
-    # truncation necessary? It seems to work fine with. 
+    # truncation necessary? It seems to work fine with.
 
-    "%i\u0304\u02BEah": "\u064A\u0626\u0629"
-    "%i\u0304\u02BCah": "\u064A\u0626\u0629"
+    "i\u0304\u02BEah%": "\u064A\u0626\u0629"
+    "i\u0304\u02BCah%": "\u064A\u0626\u0629"
 
-    "%i\u0304\u02BEat": "\u064A\u0626\u0629"
-    "%i\u0304\u02BCat": "\u064A\u0626\u0629"
+    "i\u0304\u02BEat%": "\u064A\u0626\u0629"
+    "i\u0304\u02BCat%": "\u064A\u0626\u0629"
 
-    "%i\u02BEa\u0304": "\u0626\u0627"
-    "%i\u02BCa\u0304": "\u0626\u0627"
+    "i\u02BEa\u0304%": "\u0626\u0627"
+    "i\u02BCa\u0304%": "\u0626\u0627"
 
-    "%i\u02BE": "\u0626"
-    "%i\u02BC": "\u0626"
+    "i\u02BE%": "\u0626"
+    "i\u02BC%": "\u0626"
     "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
     "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"
-     
+
     "a\u02BE": "\u0623"
     "a\u02BC": "\u0623"
     "\u02BEi": "\u0626"
@@ -198,64 +202,66 @@ roman_to_script:
     "a\u0304\u02BEi": "\u0627\u0626"
     "a\u0304\u02BC": "\u0627\u0621"
     "a\u0304\u02BE": "\u0627\u0621"
-    "A\u0304%": "\u0622"
-    "a\u0304%": "\u0622"
+    "%A\u0304": "\u0622"
+    "%a\u0304": "\u0622"
     "A\u0304": "\u0627"
     "a\u0304": "\u0627"
 
-    # These next two lines were intended to convert to alif-ayn when it is at the beginning of a word, definite or indefinine (i.e. al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l" 
-    "A\u02BB%": "\u0623\u0639"
-    "a\u02BB%": "\u0623\u0639"
+    # These next two lines were intended to convert to alif-ayn when it is at
+    # # the beginning of a word, definite or indefinine (i.e.
+    # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
+    "%A\u02BB": "\u0623\u0639"
+    "%a\u02BB": "\u0623\u0639"
     "a\u02BB": "\u0639"
     "A\u0301": "\u0649"
     "a\u0301": "\u0649"
 
     "ayy": "\u064A"
-    "A%": "\u0623"
-    "a%": "\u0627"
+    "%A": "\u0623"
+    "%a": "\u0627"
     "A": "\u0623"
     "a": ""
 
     # I - Capital I at beginning of word is usually alif hamzah-below.
 
-    "%i\u0304": "\u064A"
+    "i\u0304%": "\u064A"
     "i\u0304y": "\u064A"
     "iy": "\u064A"
-    "I\u0304%": "\u0625\u064A"
+    "%I\u0304": "\u0625\u064A"
     "i\u0304": "\u064A"
-    "\u02BBI%": "\u0639"
+    "%\u02BBI": "\u0639"
 
-    #"i\u02BB": "\u0625\u0639"
+    # "i\u02BB": "\u0625\u0639"
 
     "I\u02BE": "\u0627\u0626"
     "I\u02BC": "\u0627\u0626"
     "i\u02BE": "\u0626"
     "i\u02BC": "\u0627\u0626"
 
-    "I%": "\u0625"
-    "i%": "\u0625"
+    "%I": "\u0625"
+    "%i": "\u0625"
     "I": "\u0625"
     "i": ""
 
-    # U 
+    # U
 
     "u\u0304\u02BE": "\u0624"
     "u\u0304\u02BC": "\u0624"
-    "U\u0304w%": "\u0623\u0648"
-    "u\u0304w%": "\u0623\u0648"
-    "U\u0304%": "\u0623\u0648"
-    "u\u0304%": "\u0623\u0648"
+    "%U\u0304w": "\u0623\u0648"
+    "%u\u0304w": "\u0623\u0648"
+    "%U\u0304": "\u0623\u0648"
+    "%u\u0304": "\u0623\u0648"
     "u\u0304w": "\u0648"
     "u\u0304": "\u0648"
     "u\u02BE": "\u0624"
     "u\u02BC": "\u0624"
 
-    "U%": "\u0623"
-    "u%": "\u0623"
+    "%U": "\u0623"
+    "%u": "\u0623"
     "U": "\u0623"
     "u": ""
 
-    # Consonants, with tashdid added 
+    # Consonants, with tashdid added
 
     "B": "\u0628"
     "bb": "\u0628"
@@ -352,8 +358,8 @@ roman_to_script:
     # hamza - not romanized
     # "\u0621"
     # hamza (alone in final position)
-    "%\u02BE": "\u0621"
-    "%\u02BC": "\u0621"
+    "\u02BE%": "\u0621"
+    "\u02BC%": "\u0621"
 
     # Do not know what, if anything, is needed here:
     # tatweel:
@@ -380,8 +386,6 @@ roman_to_script:
     # "\u0671"
 
 
-
-
 script_to_roman:
   hooks:
     post_config:

+ 2 - 2
scriptshifter/tables/data/bulgarian.yml

@@ -38,8 +38,8 @@ script_to_roman:
     "\u044C": ""
     "\u042C": ""
     "\u044A": ""
-    "\u042A%": "u\u0306"
-    "\u042A": ""
+    "\u042A%": ""  # Final
+    "\u042A": "u\u0306"
     "\u0413": "G"
     "\u0433": "g"
     "\u0416": "Zh"

+ 17 - 17
scriptshifter/tables/data/divehi_thaana.yml

@@ -30,8 +30,8 @@ roman_to_script:
     "h\u032E": "\u0781\u07B0"
 
     # THAANA LETTER ALIFU FINAL WITH SUKUN (LOW LINE 0332)
-    "%H\u0332": "\u0787\u07B0"
-    "%h\u0332": "\u0787\u07B0"
+    "H\u0332%": "\u0787\u07B0"
+    "h\u0332%": "\u0787\u07B0"
     "H": "\u0780"
     "h": "\u0780"
     "S\u0301": "\u0781"
@@ -70,13 +70,13 @@ roman_to_script:
     "F": "\u078A"
     "ff": "\u0787\u07B0\u078A"
     "f": "\u078A"
-    # THAANA LETTER "D/d" WITH DOT BELOW (0323) 
+    # THAANA LETTER "D/d" WITH DOT BELOW (0323)
     "D\u0323": "\u0791"
     "d\u0323": "\u0791"
     "D": "\u078B"
     "dd": "\u0787\u07B0\u078B"
     "d": "\u078B"
-    # THAANA LETTER "T/t" WITH DOT BELOW (0323) 
+    # THAANA LETTER "T/t" WITH DOT BELOW (0323)
     "T\u0323": "\u0793"
     "t\u0323": "\u0793"
     "T\u0324T": "\u078C\u07B0\u078C"
@@ -169,7 +169,7 @@ roman_to_script:
     "ghgh": "\u0787\u07B0\u07A3"
     "gh": "\u07A3"
 
-    # THAANA EXTENSION FOR ARABIC LETTER QAAFU 
+    # THAANA EXTENSION FOR ARABIC LETTER QAAFU
     "Q": "\u07A4"
     "qq": "\u0787\u07B0\u07A4"
     "q": "\u07A4"
@@ -181,24 +181,24 @@ roman_to_script:
 
     # INITIAL (AND UPPERCASE) VOWELS THAT CONVERT
     # TO ALIF FOLLOWED BY VOWEL (ALIF OMITTED IN ROMANIZATION)
-    "A\u0304%": "\u0787\u07A7"
-    "A%": "\u0787\u07A6"
+    "%A\u0304": "\u0787\u07A7"
+    "%A": "\u0787\u07A6"
     "\u0020a\u0304": "\u0020\u0787\u07A7"
     "\u0020a": "\u0020\u0787\u07A6"
-    "E\u0304%": "\u0787\u07AD"
-    "E%": "\u0787\u07AC"
+    "%E\u0304": "\u0787\u07AD"
+    "%E": "\u0787\u07AC"
     "\u0020e\u0304": "\u0020\u0787\u07AD"
     "\u0020e": "\u0020\u0787\u07AC"
-    "I\u0304%": "\u0787\u07A9"
-    "I%": "\u0787\u07A8"
+    "%I\u0304": "\u0787\u07A9"
+    "%I": "\u0787\u07A8"
     "\u0020i\u0304": "\u0020\u0787\u07A9"
     "\u0020i": "\u0020\u0787\u07A8"
-    "O\u0304%": "\u0787\u07AF"
-    "O%": "\u0787\u07AE"
+    "%O\u0304": "\u0787\u07AF"
+    "%O": "\u0787\u07AE"
     "\u0020o\u0304": "\u0020\u0787\u07AF"
     "\u0020o": "\u0020\u0787\u07AE"
-    "U\u0304%": "\u0787\u07AB"
-    "U%": "\u0787\u07AB"
+    "%U\u0304": "\u0787\u07AB"
+    "%U": "\u0787\u07AB"
     "\u0020u\u0304": "\u0020\u0787\u07AB"
     "\u0020u": "\u0020\u0787\u07AB"
 
@@ -301,7 +301,7 @@ script_to_roman:
     "\u0787\u07B0\u078A": "ff"
     "\u078A": "f"
 
-    # THAANA LETTER "D/d" WITH DOT BELOW (0323) 
+    # THAANA LETTER "D/d" WITH DOT BELOW (0323)
     "\u0787\u07B0\u0791": "d\u0323d\u0323"
     "\u0791": "d\u0323"
 
@@ -386,7 +386,7 @@ script_to_roman:
     "\u0787\u07B0\u07A3": "ghgh"
     "\u07A3": "gh"
 
-    # THAANA EXTENSION FOR ARABIC LETTER QAAFU 
+    # THAANA EXTENSION FOR ARABIC LETTER QAAFU
     "\u0787\u07B0\u07A4": "qq"
     "\u07A4": "q"
 

+ 49 - 49
scriptshifter/tables/data/persian.yml

@@ -34,7 +34,7 @@ roman_to_script:
     "ibn": "\u0628\u0646"
 
     # Parsing "sh[dot below] as in "Ishaq [name]"
-    "%sh\u0323%": "\u0633\u062D"
+    "sh\u0323": "\u0633\u062D"
 
     # Edition statements with Latin number
     "Cha\u0304p-i 1": "\u0686\u0627\u067E 1"
@@ -75,7 +75,7 @@ roman_to_script:
     "# prime ": " ZWNJ"
     "\u02B9a\u0304": "\u200C\u0622"
     "\u02B9a": "\u200C\u0627"
-    "%\u02B9i\u0304": "\u200C\u0627\u0649"
+    "\u02B9i\u0304%": "\u200C\u0627\u0649"
     "\u02B9i\u0304": "\u200C\u0627\u064A"
     "i\u0304\u02B9a\u0304": "\u0649\u200C\u0622"
     "i\u0304\u02B9a": "\u0649\u200C\u0627"
@@ -87,22 +87,22 @@ roman_to_script:
     # Vowel and vowel/consonant combinations
     # and hyphenated suffixes:
     # izafah here
-    "%h-\u02BEi": "\u06C0"
-    "%h-\u02BCi": "\u06C0 "
-    "%-\u02BEi": "\u06C0"
-    "%-\u02BCi": "\u06C0"
+    "h-\u02BEi%": "\u06C0"
+    "h-\u02BCi%": "\u06C0 "
+    "-\u02BEi%": "\u06C0"
+    "-\u02BCi%": "\u06C0"
 
-    "%h-yi": "\u0647\u200C\u0649"
-    "%-yi": "\u0649"
-    "%yi": "\u0649"
-    "%\u02BEi\u0304": "\u0649"
-    "%\u02BCi\u0304": "\u0649"
-    "%i\u0304-i": "\u0649"
-    "%i\u0304": "\u0649"
-    "%ayy-i": "\u0649"
-    "%ay": "\u0649"
-    "%al-i": "\u0644"
-    "%-i": ""
+    "h-yi%": "\u0647\u200C\u0649"
+    "-yi%": "\u0649"
+    "yi%": "\u0649"
+    "\u02BEi\u0304%": "\u0649"
+    "\u02BCi\u0304%": "\u0649"
+    "i\u0304-i%": "\u0649"
+    "i\u0304%": "\u0649"
+    "ayy-i%": "\u0649"
+    "ay%": "\u0649"
+    "al-i%": "\u0644"
+    "-i%": ""
 
     # Hyphenated prefixes:
     "wa-": "\u0648"
@@ -113,18 +113,18 @@ roman_to_script:
     "ka-": "\u0643"
 
     # ayn combo
-    "%\u02BBa\u0304%": "\u0639\u0627"
+    "\u02BBa\u0304": "\u0639\u0627"
     # alif combos
     # [final position]
-    "%a\u0304\u02BE": "\u0627\u0621"
-    "%a\u0304\u02BC": "\u0627\u0621"
-    "%a\u0304\u02BEi\u0304": "\u0627\u0626\u0649"
-    "%a\u0304\u02BCi\u0304": "\u0627\u0626\u0649"
+    "a\u0304\u02BE%": "\u0627\u0621"
+    "a\u0304\u02BC%": "\u0627\u0621"
+    "a\u0304\u02BEi\u0304%": "\u0627\u0626\u0649"
+    "a\u0304\u02BCi\u0304%": "\u0627\u0626\u0649"
     # [initial position]
-    "A\u0304\u02BEi\u0304%": "\u0622\u0626\u064A"
-    "A\u0304\u02BCi\u0304%": "\u0622\u0626\u064A"
-    "a\u0304\u02BEi\u0304%": "\u0622\u0626\u064A"
-    "a\u0304\u02BCi\u0304%": "\u0622\u0626\u064A"
+    "%A\u0304\u02BEi\u0304": "\u0622\u0626\u064A"
+    "%A\u0304\u02BCi\u0304": "\u0622\u0626\u064A"
+    "%a\u0304\u02BEi\u0304": "\u0622\u0626\u064A"
+    "%a\u0304\u02BCi\u0304": "\u0622\u0626\u064A"
     # [medial position]
     "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"
     "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A"
@@ -138,10 +138,10 @@ roman_to_script:
     # A
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
-    "\u02BBA%": "\u0639"
+    "%\u02BBA": "\u0639"
     "\u02BBa": "\u0639"
-    "A\u02BB%": "\u0627\u0639"
-    "a\u02BB%": "\u0627\u0639"
+    "%A\u02BB": "\u0627\u0639"
+    "%a\u02BB": "\u0627\u0639"
     "A\u02BB": "\u0623\u0639"
     "a\u02BB": "\u0639"
     "a\u02BE": "\u0623"
@@ -150,57 +150,57 @@ roman_to_script:
     "\u02BCa": "\u0623"
     "a\u0304\u02BE": "\u0621"
     "a\u0304\u02BC": "\u0621"
-    "A\u0304%": "\u0622"
-    "a\u0304%": "\u0622"
+    "%A\u0304": "\u0622"
+    "%a\u0304": "\u0622"
     # previously an alif:
     "A\u0304": "\u0622"
     "a\u0304": "\u0627"
     "A\u0301": "\u0649"
     "a\u0301": "\u0649"
     "ayy": "\u064A"
-    "A%": "\u0627"
-    "a%": "\u0627"
+    "%A": "\u0627"
+    "%a": "\u0627"
     "A": "\u0627"
     "a": ""
 
     # I
     "\u02BBI\u0304": "\u0639\u064A"
     "\u02BBi\u0304": "\u0639\u064A"
-    "I\u02BB%": "\u0627\u0639"
-    "i\u02BB%": "\u0627\u0639"
+    "%I\u02BB": "\u0627\u0639"
+    "%i\u02BB": "\u0627\u0639"
     "i\u02BB": "\u0639"
-    "\u02BBI%": "\u0639"
-    "I\u02BE%": "\u0627\u0626"
-    "I\u02BC%": "\u0627\u0626"
+    "%\u02BBI": "\u0639"
+    "%I\u02BE": "\u0627\u0626"
+    "%I\u02BC": "\u0627\u0626"
     "i\u02BE": "\u0626"
     "i\u02BC": "\u0626"
     "\u02BEi\u0304": "\u0626\u0649"
     "\u02BCi\u0304": "\u0626\u0649"
     "\u02BEi": "\u0626"
     "\u02BCi": "\u0626"
-    "I\u0304%": "\u0627\u064A"
-    "i\u0304%": "\u0627\u064A"
+    "%I\u0304": "\u0627\u064A"
+    "%i\u0304": "\u0627\u064A"
     "i\u0304y": "\u064A"
     "I\u0304": "\u0627\u0649"
     "i\u0304": "\u064A"
-    "I%": "\u0627"
-    "i%": "\u0627"
+    "%I": "\u0627"
+    "%i": "\u0627"
     "I": "\u0627"
     "i": ""
 
     # U
     "\u02BEu\u0304": "\u0626\u0648"
     "\u02BCu\u0304": "\u0626\u0648"
-    "U\u02BE%": "\u0627\u0624"
-    "U\u02BC%": "\u0627\u0624"
+    "%U\u02BE": "\u0627\u0624"
+    "%U\u02BC": "\u0627\u0624"
     "u\u02BE": "\u0624"
     "u\u02BC": "\u0624"
-    "U\u0304w%": "\u0627\u0628"
+    "%U\u0304w": "\u0627\u0628"
     "u\u0304w": "\u0628"
     "U\u0304": "\u0627\u0648"
     "u\u0304": "\u0648"
-    "U%": "\u0627"
-    "u%": "\u0627"
+    "%U": "\u0627"
+    "%u": "\u0627"
     "U": "\u0627"
     "u": ""
 
@@ -307,5 +307,5 @@ roman_to_script:
     "\u02BB": "\u0639"
 
     # hamza (alone in final position)
-    "%\u02BE": "\u0621"
-    "%\u02BC": "\u0621"
+    "\u02BE%": "\u0621"
+    "\u02BC%": "\u0621"

+ 61 - 61
scriptshifter/tables/data/pushto.yml

@@ -75,7 +75,7 @@ roman_to_script:
     "# prime ": " ZWNJ"
     "\u02B9a\u0304": "\u200C\u0622"
     "\u02B9a": "\u200C\u0627"
-    "%\u02B9i\u0304": "\u200C\u0627\u0649"
+    "\u02B9i\u0304%": "\u200C\u0627\u0649"
     "\u02B9i\u0304": "\u200C\u0627\u064A"
     "i\u0304\u02B9": "\u0649\u200C"
     "a\u0323y\u02B9": "\u06D3\u200C"
@@ -84,25 +84,25 @@ roman_to_script:
     # Vowel and vowel/consonant combinations
     # and hyphenated suffixes:
     # izafah here
-    "%h-\u02BEi": "\u06C0"
-    "%h-\u02BCi": "\u06C0 "
-    "%-\u02BEi": "\u06C0"
-    "%-\u02BCi": "\u06C0"
-
-    "%h-yi": "\u0647\u200C\u0649"
-    "%-yi": "\u0649"
-    "%yi": "\u0649"
-    "%\u02BEi\u0304": "\u0649"
-    "%\u02BCi\u0304": "\u0649"
-    "%i\u0304-i": "\u0649"
-    "%i\u0304": "\u0649"
-
-    "%a\u0323h": "\u06C0"
-    "%ayy-i": "\u0649"
-    "%a\u0304y": "\u0627\u0649"
-    "%a\u0301": "\u0649\u0670"
-    "%al-i": "\u0644"
-    "%-i": ""
+    "h-\u02BEi%": "\u06C0"
+    "h-\u02BCi%": "\u06C0 "
+    "-\u02BEi%": "\u06C0"
+    "-\u02BCi%": "\u06C0"
+
+    "h-yi%": "\u0647\u200C\u0649"
+    "-yi%": "\u0649"
+    "yi%": "\u0649"
+    "\u02BEi\u0304%": "\u0649"
+    "\u02BCi\u0304%": "\u0649"
+    "i\u0304-i%": "\u0649"
+    "i\u0304%": "\u0649"
+
+    "a\u0323h%": "\u06C0"
+    "ayy-i%": "\u0649"
+    "a\u0304y%": "\u0627\u0649"
+    "a\u0301%": "\u0649\u0670"
+    "al-i%": "\u0644"
+    "-i%": ""
 
     # Hyphenated prefixes:
     "wa-": "\u0648"
@@ -113,29 +113,29 @@ roman_to_script:
     "ka-": "\u0643"
 
     # Diphthongs here
-    "Ayy%": "\u0627\u064A"
-    "ayy%": "\u0627\u064A"
-    "%a\u0323y": "\u06D3"
-    "%ay": "\u0649"
+    "%Ayy": "\u0627\u064A"
+    "%ayy": "\u0627\u064A"
+    "a\u0323y%": "\u06D3"
+    "ay%": "\u0649"
     "\u02BBAw": "\u0639\u0648"
     "\u02BBaw": "\u0639\u0648"
     "Aw": "\u0627\u0648"
     "aw": "\u0648"
 
     # ayn combo
-    "%\u02BBa\u0304%": "\u0639\u0627"
+    "\u02BBa\u0304": "\u0639\u0627"
 
     # alif combos
     # [final position]
-    "%a\u0304\u02BE": "\u0627\u0621"
-    "%a\u0304\u02BC": "\u0627\u0621"
-    "%a\u0304\u02BEi\u0304": "\u0627\u0626\u0649"
-    "%a\u0304\u02BCi\u0304": "\u0627\u0626\u0649"
+    "a\u0304\u02BE%": "\u0627\u0621"
+    "a\u0304\u02BC%": "\u0627\u0621"
+    "a\u0304\u02BEi\u0304%": "\u0627\u0626\u0649"
+    "a\u0304\u02BCi\u0304%": "\u0627\u0626\u0649"
     # [initial position]
-    "A\u0304\u02BEi\u0304%": "\u0622\u0626\u064A"
-    "A\u0304\u02BCi\u0304%": "\u0622\u0626\u064A"
-    "a\u0304\u02BEi\u0304%": "\u0622\u0626\u064A"
-    "a\u0304\u02BCi\u0304%": "\u0622\u0626\u064A"
+    "%A\u0304\u02BEi\u0304": "\u0622\u0626\u064A"
+    "%A\u0304\u02BCi\u0304": "\u0622\u0626\u064A"
+    "%a\u0304\u02BEi\u0304": "\u0622\u0626\u064A"
+    "%a\u0304\u02BCi\u0304": "\u0622\u0626\u064A"
 
     # [medial position]
     "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"
@@ -145,7 +145,7 @@ roman_to_script:
     "a\u02BEi\u0304": "\u0626\u064A"
     "a\u02BCi\u0304": "\u0626\u064A"
 
-    #a [macron] hamza followed by e
+    # a [macron] hamza followed by e
     "a\u0304\u02BEe": "\u0627\u0626\u064A"
     "a\u0304\u02BCe": "\u0627\u0626\u064A"
 
@@ -157,7 +157,7 @@ roman_to_script:
     # A
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
-    "\u02BBA%": "\u0639"
+    "%\u02BBA": "\u0639"
     "\u02BBa": "\u0639"
     "A\u02BB": "\u0623\u0639"
     "a\u02BB": "\u0639"
@@ -167,8 +167,8 @@ roman_to_script:
     "\u02BCa": "\u0623"
     "a\u0304\u02BE": "\u0621"
     "a\u0304\u02BC": "\u0621"
-    "A\u0304%": "\u0622"
-    "a\u0304%": "\u0622"
+    "%A\u0304": "\u0622"
+    "%a\u0304": "\u0622"
     "A\u0304": "\u0627"
     "a\u0304": "\u0627"
     "A\u0301": "\u0649"
@@ -178,40 +178,40 @@ roman_to_script:
     # heh hamzah at end
     "a\u0323h": "\u06C0"
 
-    "A%": "\u0627"
-    "a%": "\u0627"
+    "%A": "\u0627"
+    "%a": "\u0627"
     "A": ""
     "a": ""
 
     # E
-    "%e": "\u06D0"
-    "E%": "\u0627\u064A"
-    "e%": "\u0627\u064A"
+    "e%": "\u06D0"
+    "%E": "\u0627\u064A"
+    "%e": "\u0627\u064A"
     "e": "\u06D0"
 
-    # I 
-    "I\u02BB%": "\u0627\u0639"
-    "i\u02BB%": "\u0627\u0639"
+    # I
+    "%I\u02BB": "\u0627\u0639"
+    "%i\u02BB": "\u0627\u0639"
     "i\u02BB": "\u0639"
-    "\u02BBI%": "\u0639"
-    "I\u02BE%": "\u0627\u0626"
-    "I\u02BC%": "\u0627\u0626"
+    "%\u02BBI": "\u0639"
+    "%I\u02BE": "\u0627\u0626"
+    "%I\u02BC": "\u0627\u0626"
     "i\u02BE": "\u0626"
     "i\u02BC": "\u0626"
     "\u02BEi": "\u0626"
     "\u02BCi": "\u0626"
-    "I\u0304%": "\u0627\u064A"
-    "i\u0304%": "\u0627\u064A"
+    "%I\u0304": "\u0627\u064A"
+    "%i\u0304": "\u0627\u064A"
     "i\u0304y": "\u064A"
     "i\u0304": "\u064A"
-    "I%": "\u0627"
-    "i%": "\u0627"
+    "%I": "\u0627"
+    "%i": "\u0627"
     "I": ""
     "i": ""
 
     # O
-    "o%": "\u0627\u0648"
-    "O%": "\u0627\u0648"
+    "%o": "\u0627\u0648"
+    "%O": "\u0627\u0648"
     "o": "\u0648"
 
     # U
@@ -221,16 +221,16 @@ roman_to_script:
 
     "\u02BEu\u0304": "\u0626\u0648"
     "\u02BCu\u0304": "\u0626\u0648"
-    "U\u02BE%": "\u0627\u0624"
-    "U\u02BC%": "\u0627\u0624"
+    "%U\u02BE": "\u0627\u0624"
+    "%U\u02BC": "\u0627\u0624"
     "u\u02BE": "\u0624"
     "u\u02BC": "\u0624"
-    "U\u0304w%": "\u0627\u0628"
+    "%U\u0304w": "\u0627\u0628"
     "u\u0304w": "\u0628"
     "U\u0304": "\u0627\u0648"
     "u\u0304": "\u0648"
-    "U%": "\u0627"
-    "u%": "\u0627"
+    "%U": "\u0627"
+    "%u": "\u0627"
     "U": ""
     "u": ""
 
@@ -388,5 +388,5 @@ roman_to_script:
     "\u02BB": "\u0639"
 
     # hamza (alone in final position)
-    "%\u02BE": "\u0621"
-    "%\u02BC": "\u0621"
+    "\u02BE%": "\u0621"
+    "\u02BC%": "\u0621"

+ 43 - 43
scriptshifter/tables/data/thai.yml

@@ -89,7 +89,7 @@ script_to_roman:
     "\u0E01\u0E44\u0E22": "kai"
     "\u0E01\u0E44": "kai"
     # FINAL CONSONANT KO KAI WITH NO VOWEL
-    "%\u0E01": "k"
+    "\u0E01%": "k"
     "\u0E01": "ko"
 
     # CONSONANT KHO KHAI WITH VOWELS
@@ -148,7 +148,7 @@ script_to_roman:
     "\u0E02\u0E44\u0E22": "khai"
     "\u0E02\u0E44": "khai"
     # FINAL CONSONANT KHO KHAI WITH NO VOWEL
-    "%\u0E02": "k"
+    "\u0E02%": "k"
     "\u0E02": "kho"
 
     # CONSONANT KHO KHUAT WITH VOWELS
@@ -207,7 +207,7 @@ script_to_roman:
     "\u0E03\u0E44\u0E22": "khai"
     "\u0E03\u0E44": "khai"
     # FINAL CONSONANT KHO KHUAT WITH NO VOWEL
-    "%\u0E03": "k"
+    "\u0E03%": "k"
     "\u0E03": "kho"
 
     # CONSONANT KHO KHWAI WITH VOWELS
@@ -266,7 +266,7 @@ script_to_roman:
     "\u0E04\u0E44\u0E22": "khai"
     "\u0E04\u0E44": "khai"
     # FINAL CONSONANT KHO KHWAI WITH NO VOWEL
-    "%\u0E04": "k"
+    "\u0E04%": "k"
     "\u0E04": "kho"
 
     # CONSONANT KHO KHON WITH VOWELS
@@ -325,7 +325,7 @@ script_to_roman:
     "\u0E05\u0E44\u0E22": "khai"
     "\u0E05\u0E44": "khai"
     # FINAL CONSONANT KHO KHON WITH NO VOWEL
-    "%\u0E05": "k"
+    "\u0E05%": "k"
     "\u0E05": "kho"
 
     # CONSONANT KHO RAKHANG WITH VOWELS
@@ -384,7 +384,7 @@ script_to_roman:
     "\u0E06\u0E44\u0E22": "khai"
     "\u0E06\u0E44": "khai"
     # FINAL CONSONANT KHO RAKHANG WITH NO VOWEL
-    "%\u0E06": "k"
+    "\u0E06%": "k"
     "\u0E06": "kho"
 
     # CONSONANT NGO NGU WITH VOWELS
@@ -443,7 +443,7 @@ script_to_roman:
     "\u0E07\u0E44\u0E22": "ngai"
     "\u0E07\u0E44": "ngai"
     # FINAL CONSONANT NGO NGU WITH NO VOWEL
-    "%\u0E07": "ng"
+    "\u0E07%": "ng"
     "\u0E07": "ngo"
 
     # CONSONANT CHO CHAN WITH VOWELS
@@ -502,7 +502,7 @@ script_to_roman:
     "\u0E08\u0E44\u0E22": "c\u030Chai"
     "\u0E08\u0E44": "c\u030Chai"
     # FINAL CONSONANT CHO CHAN WITH NO VOWEL
-    "%\u0E08": "t"
+    "\u0E08%": "t"
     "\u0E08": "c\u030Cho"
 
     # CONSONANT CHO CHING WITH VOWELS
@@ -561,7 +561,7 @@ script_to_roman:
     "\u0E09\u0E44\u0E22": "chai"
     "\u0E09\u0E44": "chai"
     # FINAL CONSONANT CHO CHING WITH NO VOWEL
-    "%\u0E09": "t"
+    "\u0E09%": "t"
     "\u0E09": "cho"
 
     # CONSONANT CHO CHANG WITH VOWELS
@@ -620,7 +620,7 @@ script_to_roman:
     "\u0E0A\u0E44\u0E22": "chai"
     "\u0E0A\u0E44": "chai"
     # FINAL CONSONANT CHO CHANG WITH NO VOWEL
-    "%\u0E0A": "t"
+    "\u0E0A%": "t"
     "\u0E0A": "cho"
 
     # CONSONANT SO SO WITH VOWELS
@@ -679,7 +679,7 @@ script_to_roman:
     "\u0E0B\u0E44\u0E22": "sai"
     "\u0E0B\u0E44": "sai"
     # FINAL CONSONANT SO SO WITH NO VOWEL
-    "%\u0E0B": "t"
+    "\u0E0B%": "t"
     "\u0E0B": "so"
 
     # CONSONANT CHO CHOE WITH VOWELS
@@ -738,7 +738,7 @@ script_to_roman:
     "\u0E0C\u0E44\u0E22": "chai"
     "\u0E0C\u0E44": "chai"
     # FINAL CONSONANT CHO CHOE WITH NO VOWEL
-    "%\u0E0C": "t"
+    "\u0E0C%": "t"
     "\u0E0C": "cho"
 
     # CONSONANT YO YING WITH VOWELS
@@ -797,7 +797,7 @@ script_to_roman:
     "\u0E0D\u0E44\u0E22": "yai"
     "\u0E0D\u0E44": "yai"
     # FINAL CONSONANT YO YING WITH NO VOWEL
-    "%\u0E0D": "n"
+    "\u0E0D%": "n"
     "\u0E0D": "yo"
 
     # CONSONANT DO CHADA WITH VOWELS
@@ -856,7 +856,7 @@ script_to_roman:
     "\u0E0E\u0E44\u0E22": "dai"
     "\u0E0E\u0E44": "dai"
     # FINAL CONSONANT DO CHADA WITH NO VOWEL
-    "%\u0E0E": "t"
+    "\u0E0E%": "t"
     "\u0E0E": "do"
 
     # CONSONANT TO PATAK WITH VOWELS
@@ -915,7 +915,7 @@ script_to_roman:
     "\u0E0F\u0E44\u0E22": "tai"
     "\u0E0F\u0E44": "tai"
     # FINAL CONSONANT TO PATAK WITH NO VOWEL
-    "%\u0E0F": "t"
+    "\u0E0F%": "t"
     "\u0E0F": "to"
 
     # CONSONANT THO THAN WITH VOWELS
@@ -974,7 +974,7 @@ script_to_roman:
     "\u0E10\u0E44\u0E22": "thai"
     "\u0E10\u0E44": "thai"
     # FINAL CONSONANT THO THAN WITH NO VOWEL
-    "%\u0E10": "th"
+    "\u0E10%": "th"
     "\u0E10": "tho"
 
     # CONSONANT THO NANGMONTHO WITH VOWELS
@@ -1033,7 +1033,7 @@ script_to_roman:
     "\u0E11\u0E44\u0E22": "thai"
     "\u0E11\u0E44": "thai"
     # FINAL CONSONANT THO NANGMONTHO WITH NO VOWEL
-    "%\u0E11": "t"
+    "\u0E11%": "t"
     "\u0E11": "tho"
 
     # CONSONANT THO PHUTHAO WITH VOWELS
@@ -1092,7 +1092,7 @@ script_to_roman:
     "\u0E12\u0E44\u0E22": "thai"
     "\u0E12\u0E44": "thai"
     # FINAL CONSONANT THO PHUTHAO WITH NO VOWEL
-    "%\u0E12": "t"
+    "\u0E12%": "t"
     "\u0E12": "tho"
 
     # CONSONANT NO NEN WITH VOWELS
@@ -1151,7 +1151,7 @@ script_to_roman:
     "\u0E13\u0E44\u0E22": "nai"
     "\u0E13\u0E44": "nai"
     # FINAL CONSONANT NO NEN WITH NO VOWEL
-    "%\u0E13": "n"
+    "\u0E13%": "n"
     "\u0E13": "no"
 
     # CONSONANT DO DEK WITH VOWELS
@@ -1210,7 +1210,7 @@ script_to_roman:
     "\u0E14\u0E44\u0E22": "dai"
     "\u0E14\u0E44": "dai"
     # FINAL CONSONANT NO NEN WITH NO VOWEL
-    "%\u0E14": "t"
+    "\u0E14%": "t"
     "\u0E14": "do"
 
     # CONSONANT TO TAO WITH VOWELS
@@ -1269,7 +1269,7 @@ script_to_roman:
     "\u0E15\u0E44\u0E22": "tai"
     "\u0E15\u0E44": "tai"
     # FINAL CONSONANT TO TAO WITH NO VOWEL
-    "%\u0E15": "t"
+    "\u0E15%": "t"
     "\u0E15": "to"
 
     # CONSONANT THO THUNG WITH VOWELS
@@ -1328,7 +1328,7 @@ script_to_roman:
     "\u0E16\u0E44\u0E22": "thai"
     "\u0E16\u0E44": "thai"
     # FINAL CONSONANT THO THUNG WITH NO VOWEL
-    "%\u0E16": "t"
+    "\u0E16%": "t"
     "\u0E16": "tho"
 
     # CONSONANT THO THAHAN WITH VOWELS
@@ -1387,7 +1387,7 @@ script_to_roman:
     "\u0E17\u0E44\u0E22": "thai"
     "\u0E17\u0E44": "thai"
     # FINAL CONSONANT THO THAHAN WITH NO VOWEL
-    "%\u0E17": "t"
+    "\u0E17%": "t"
     "\u0E17": "tho"
 
     # CONSONANT THO THONG WITH VOWELS
@@ -1446,7 +1446,7 @@ script_to_roman:
     "\u0E18\u0E44\u0E22": "thai"
     "\u0E18\u0E44": "thai"
     # FINAL CONSONANT THO THONG WITH NO VOWEL
-    "%\u0E18": "t"
+    "\u0E18%": "t"
     "\u0E18": "tho"
 
     # CONSONANT NO NU WITH VOWELS
@@ -1505,7 +1505,7 @@ script_to_roman:
     "\u0E19\u0E44\u0E22": "nai"
     "\u0E19\u0E44": "nai"
     # FINAL CONSONANT NO NU WITH NO VOWEL
-    "%\u0E19": "n"
+    "\u0E19%": "n"
     "\u0E19": "no"
 
     # CONSONANT BO BAIMAI WITH VOWELS
@@ -1564,7 +1564,7 @@ script_to_roman:
     "\u0E1A\u0E44\u0E22": "bai"
     "\u0E1A\u0E44": "bai"
     # FINAL CONSONANT BO BAIMAI WITH NO VOWEL
-    "%\u0E1A": "p"
+    "\u0E1A%": "p"
     "\u0E1A": "bo"
 
     # CONSONANT PO PLA WITH VOWELS
@@ -1623,7 +1623,7 @@ script_to_roman:
     "\u0E1B\u0E44\u0E22": "pai"
     "\u0E1B\u0E44": "pai"
     # FINAL CONSONANT PO PLA WITH NO VOWEL
-    "%\u0E1B": "p"
+    "\u0E1B%": "p"
     "\u0E1B": "po"
 
     # CONSONANT PHO PHUNG WITH VOWELS
@@ -1682,7 +1682,7 @@ script_to_roman:
     "\u0E1C\u0E44\u0E22": "phai"
     "\u0E1C\u0E44": "phai"
     # FINAL CONSONANT PHO PHUNG WITH NO VOWEL
-    "%\u0E1C": "p"
+    "\u0E1C%": "p"
     "\u0E1C": "pho"
 
     # CONSONANT FO FA WITH VOWELS
@@ -1741,7 +1741,7 @@ script_to_roman:
     "\u0E1D\u0E44\u0E22": "fai"
     "\u0E1D\u0E44": "fai"
     # FINAL CONSONANT FO FA WITH NO VOWEL
-    "%\u0E1D": "p"
+    "\u0E1D%": "p"
     "\u0E1D": "fo"
 
     # CONSONANT PHO PHAN WITH VOWELS
@@ -1800,7 +1800,7 @@ script_to_roman:
     "\u0E1E\u0E44\u0E22": "phai"
     "\u0E1E\u0E44": "phai"
     # FINAL CONSONANT PHO PHAN WITH NO VOWEL
-    "%\u0E1E": "p"
+    "\u0E1E%": "p"
     "\u0E1E": "pho"
 
     # CONSONANT FO FAN WITH VOWELS
@@ -1859,7 +1859,7 @@ script_to_roman:
     "\u0E1F\u0E44\u0E22": "fai"
     "\u0E1F\u0E44": "fai"
     # FINAL CONSONANT FO FAN WITH NO VOWEL
-    "%\u0E1F": "p"
+    "\u0E1F%": "p"
     "\u0E1F": "fo"
 
     # CONSONANT PHO SAMPHAO WITH VOWELS
@@ -1918,7 +1918,7 @@ script_to_roman:
     "\u0E20\u0E44\u0E22": "phai"
     "\u0E20\u0E44": "phai"
     # FINAL CONSONANT PHO SAMPHAO WITH NO VOWEL
-    "%\u0E20": "p"
+    "\u0E20%": "p"
     "\u0E20": "pho"
 
     # CONSONANT MO MA WITH VOWELS
@@ -1977,7 +1977,7 @@ script_to_roman:
     "\u0E21\u0E44\u0E22": "mai"
     "\u0E21\u0E44": "mai"
     # FINAL CONSONANT MO MA WITH NO VOWEL
-    "%\u0E21": "m"
+    "\u0E21%": "m"
     "\u0E21": "mo"
 
     # CONSONANT YO YAK WITH VOWELS
@@ -2036,7 +2036,7 @@ script_to_roman:
     "\u0E22\u0E44\u0E22": "yai"
     "\u0E22\u0E44": "yai"
     # FINAL CONSONANT YO YAK WITH NO VOWEL
-    "%\u0E22": ""
+    "\u0E22%": ""
     "\u0E22": "yo"
 
     # CONSONANT RO RUA WITH VOWELS
@@ -2095,7 +2095,7 @@ script_to_roman:
     "\u0E23\u0E44\u0E22": "rai"
     "\u0E23\u0E44": "rai"
     # FINAL CONSONANT RO RUA WITH NO VOWEL
-    "%\u0E23": "n"
+    "\u0E23%": "n"
     "\u0E23": "ro"
 
     # CONSONANT LO LING WITH VOWELS
@@ -2154,7 +2154,7 @@ script_to_roman:
     "\u0E25\u0E44\u0E22": "lai"
     "\u0E25\u0E44": "lai"
     # FINAL CONSONANT LO LING WITH NO VOWEL
-    "%\u0E25": "n"
+    "\u0E25%": "n"
     "\u0E25": "lo"
 
     # CONSONANT WO WAEN WITH VOWELS
@@ -2213,7 +2213,7 @@ script_to_roman:
     "\u0E27\u0E44\u0E22": "wai"
     "\u0E27\u0E44": "wai"
     # FINAL CONSONANT WO WAEN WITH NO VOWEL
-    "%\u0E27": ""
+    "\u0E27%": ""
     "\u0E27": "wo"
 
     # CONSONANT SO SALA WITH VOWELS
@@ -2272,7 +2272,7 @@ script_to_roman:
     "\u0E28\u0E44\u0E22": "sai"
     "\u0E28\u0E44": "sai"
     # FINAL CONSONANT SO SALA WITH NO VOWEL
-    "%\u0E28": "t"
+    "\u0E28%": "t"
     "\u0E28": "so"
 
     # CONSONANT SO RUSI WITH VOWELS
@@ -2331,7 +2331,7 @@ script_to_roman:
     "\u0E29\u0E44\u0E22": "sai"
     "\u0E29\u0E44": "sai"
     # FINAL CONSONANT SO RUSI WITH NO VOWEL
-    "%\u0E29": "t"
+    "\u0E29%": "t"
     "\u0E29": "so"
 
     # CONSONANT SO SUA WITH VOWELS
@@ -2390,7 +2390,7 @@ script_to_roman:
     "\u0E2A\u0E44\u0E22": "sai"
     "\u0E2A\u0E44": "sai"
     # FINAL CONSONANT SO SUA WITH NO VOWEL
-    "%\u0E2A": "t"
+    "\u0E2A%": "t"
     "\u0E2A": "so"
 
     # CONSONANT HO HIP WITH VOWELS
@@ -2449,7 +2449,7 @@ script_to_roman:
     "\u0E2B\u0E44\u0E22": "hai"
     "\u0E2B\u0E44": "hai"
     # FINAL CONSONANT HO HIP WITH NO VOWEL
-    "%\u0E2B": ""
+    "\u0E2B%": ""
     "\u0E2B": "ho"
 
     # CONSONANT LO CHULA WITH VOWELS
@@ -2508,7 +2508,7 @@ script_to_roman:
     "\u0E2C\u0E44\u0E22": "lai"
     "\u0E2C\u0E44": "lai"
     # FINAL CONSONANT LO CHULA WITH NO VOWEL
-    "%\u0E2C": "n"
+    "\u0E2C%": "n"
     "\u0E2C": "lo"
 
     # CONSONANT O ANG WITH VOWELS ALONE (NO CONSONANT)
@@ -2624,7 +2624,7 @@ script_to_roman:
     "\u0E2E\u0E44\u0E22": "hai"
     "\u0E2E\u0E44": "hai"
     # FINAL CONSONANT HO NOKHUK WITH NO VOWEL
-    "%\u0E2E": ""
+    "\u0E2E%": ""
     "\u0E2E": "ho"
 
     "\u0E2F": ""

+ 18 - 18
scriptshifter/tables/data/uighur_arabic.yml

@@ -4,20 +4,20 @@ general:
   case_sensitive: false
 roman_to_script:
   map:
-    "a%": "\u0626\u0627"
+    "%a": "\u0626\u0627"
     "a": "\u0627"
-    "ă%": "\u0626\u06D5"
+    "%ă": "\u0626\u06D5"
     "ă": "\u06D5"
     "b": "\u0628"
     "ch": "\u0686"
     "d": "\u062F"
-    "e%": "\u0626\u06D0"
+    "%e": "\u0626\u06D0"
     "e": "\u06D0"
     "f": "\u0641"
     "g": "\u06AF"
     "gh": "\u063A"
     "h": "\u06BE"
-    "i%": "\u0626\u0649"
+    "%i": "\u0626\u0649"
     "i": "\u0649"
     "j": "\u062C"
     "k": "\u0643"
@@ -26,9 +26,9 @@ roman_to_script:
     "m": "\u0645"
     "n": "\u0646"
     "ng": "\u06AD"
-    "o%": "\u0626\u0648"
+    "%o": "\u0626\u0648"
     "o": "\u0648"
-    "ö%": "\u0626\u06C6"
+    "%ö": "\u0626\u06C6"
     "ö": "\u06C6"
     "p": "\u067E"
     "q": "\u0642"
@@ -36,9 +36,9 @@ roman_to_script:
     "s": "\u0633"
     "sh": "\u0634"
     "t": "\u062A"
-    "u%": "\u0626\u06C7"
+    "%u": "\u0626\u06C7"
     "u": "\u06C7"
-    "ü%": "\u0626\u06C8"
+    "%ü": "\u0626\u06C8"
     "ü": "\u06C8"
     "v": "\u06CB"
     "y": "\u064A"
@@ -50,12 +50,12 @@ roman_to_script:
     "?": "\u061F"
 script_to_roman:
   map:
-    "\u0626\u0627%": "a"
+    "%\u0626\u0627": "a"
     "\u0627": "a"
     "\uFE8E": "a"
-    "\u0626\u06D5%": "ă"
+    "%\u0626\u06D5": "ă"
     "\u06D5": "ă"
-    "\u0626\u0647%": "ă"
+    "%\u0626\u0647": "ă"
     "\u0647": "ă"
     "\uFEEA": "ă"
     "\u0628": "b"
@@ -71,7 +71,7 @@ script_to_roman:
     "\u062F": "d"
     "\uFEA9": "d"
     "\uFEAA": "d"
-    "\u0626\u06D0%": "e"
+    "%\u0626\u06D0": "e"
     "\u06D0": "e"
     "\uFBE6": "e"
     "\uFBE7": "e"
@@ -94,9 +94,9 @@ script_to_roman:
     "\u06BE": "h"
     "\uFEEB": "h"
     "\uFEEC": "h"
-    "\u0640\u0629%": "h"
+    "%\u0640\u0629": "h"
     "\u0629": "h"
-    "\u0626\u0649%": "i"
+    "%\u0626\u0649": "i"
     "\u0649": "i"
     "\uFBE8": "i"
     "\uFE8C": "i"
@@ -136,10 +136,10 @@ script_to_roman:
     "\uFBD5": "ng"
     "\uFBD6": "ng"
     "\uFBD4": "ng"
-    "\u0626\u0648%": "o"
+    "%\u0626\u0648": "o"
     "\u0648": "o"
     "\uFEEE": "o"
-    "\u0626\u06C6%": "ö"
+    "%\u0626\u06C6": "ö"
     "\u06C6": "ö"
     "\uFBDA": "ö"
     "\u067E": "p"
@@ -170,10 +170,10 @@ script_to_roman:
     "\uFE98": "t"
     "\uFE96": "t"
     "\uFE95": "t"
-    "\u0626\u06C7%": "u"
+    "%\u0626\u06C7": "u"
     "\u06C7": "u"
     "\uFBF0": "u"
-    "\u0626\u06C8%": "ü"
+    "%\u0626\u06C8": "ü"
     "\u06C8": "ü"
     "\uFBF4": "ü"
     "\u06CB": "v"

+ 55 - 55
scriptshifter/tables/data/urdu.yml

@@ -59,10 +59,10 @@ roman_to_script:
 
     ####
 
-    #lillah
+    # lillah
     "lilla\u0304h": "\u0644\u0644\u0647"
 
-    #billah
+    # billah
     "billa\u0304h": "\u0628\u0644\u0644\u0647"
 
     # Rahman
@@ -72,8 +72,8 @@ roman_to_script:
     "Nuzhat": "\u0646\u0632\u0647\u062A"
 
     # Uddin names
-    "%i\u0304uddi\u0304n": "\u0649\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
-    "%uddi\u0304n": "\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
+    "i\u0304uddi\u0304n%": "\u0649\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
+    "uddi\u0304n%": "\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
 
     # ta'lif
 
@@ -84,7 +84,7 @@ roman_to_script:
     "# Ae": "\u0627\u06D2"
 
     # Parsing "sh[dot below] as in "Ishaq [name]"
-    "%sh\u0323%": "\u0633\u062D"
+    "sh\u0323": "\u0633\u062D"
 
     # Numbers (\u06F0-06F9 for Persian/Urdu)
     # currently *not* valid MARC21 characters
@@ -154,21 +154,21 @@ roman_to_script:
     "\u02B9": "\u200C"
 
     # Izafah here
-    "%a\u0304-yi": "\u0627\u0626\u06D2"
-    "%u\u0304-yi": "\u0648\u0626\u06D2"
-    "%o-yi": "\u0648\u0626\u06D2"
-    "%e-yi": "\u06D2"
-    "%i\u0304-yi": "\u0649"
-    "%h-yi": "\u06C0"
-    "%-yi": "\u06C0"
-    "%al-i": "\u0644"
-    "%ul-i": "\u0644"
-    "%-i": ""
+    "a\u0304-yi%": "\u0627\u0626\u06D2"
+    "u\u0304-yi%": "\u0648\u0626\u06D2"
+    "o-yi%": "\u0648\u0626\u06D2"
+    "e-yi%": "\u06D2"
+    "i\u0304-yi%": "\u0649"
+    "h-yi%": "\u06C0"
+    "-yi%": "\u06C0"
+    "al-i%": "\u0644"
+    "ul-i%": "\u0644"
+    "-i%": ""
 
     # Hyphenated prefixes:
     "bi-": "\u0628"
-    "al-a\u0304%": "\u0627\u0644\u0627"
-    "ul-a\u0304%": "\u0627\u0644\u0627"
+    "%al-a\u0304": "\u0627\u0644\u0627"
+    "%ul-a\u0304": "\u0627\u0644\u0627"
     "al-": "\u0627\u0644"
     "ul-": "\u0627\u0644"
     "lil-i": "\u0644\u0644"
@@ -234,9 +234,9 @@ roman_to_script:
 
     # Diphthongs here
     "Ae": "\u0627\u06D2"
-    "%ai": "\u06D2"
+    "ai%": "\u06D2"
     "Ai": "\u0627\u064A"
-    "ai%": "\u0627\u064A"
+    "%ai": "\u0627\u064A"
     "ai": "\u064A"
     "\u02BBAu": "\u0639\u0648"
     "\u02BBau": "\u0639\u0648"
@@ -244,23 +244,23 @@ roman_to_script:
     "au": "\u0648"
 
     # ayn-alif combo
-    "%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621"
-    "%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621"
-    "%\u02BBa\u0304%": "\u0639\u0627"
+    "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
+    "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
+    "\u02BBa\u0304": "\u0639\u0627"
 
     # hamza and vowel combo
     # [in final position]
-    "%u\u0304\u02BEi\u0304": "\u0648\u0626\u0649"
-    "%u\u0304\u02BCi\u0304": "\u0648\u0626\u0649"
-    "%\u02BEi\u0304": "\u0626\u0649"
-    "%\u02BCi\u0304": "\u0626\u0649"
-    "%\u02BEe": "\u0626\u06D2"
-    "%\u02BCe": "\u0626\u06D2"
-
-    "%\u02BEu\u0304": "\u0624"
-    "%\u02BCu\u0304": "\u0624"
-    "%\u02BEo": "\u0624"
-    "%\u02BCo": "\u0624"
+    "u\u0304\u02BEi\u0304%": "\u0648\u0626\u0649"
+    "u\u0304\u02BCi\u0304%": "\u0648\u0626\u0649"
+    "\u02BEi\u0304%": "\u0626\u0649"
+    "\u02BCi\u0304%": "\u0626\u0649"
+    "\u02BEe%": "\u0626\u06D2"
+    "\u02BCe%": "\u0626\u06D2"
+
+    "\u02BEu\u0304%": "\u0624"
+    "\u02BCu\u0304%": "\u0624"
+    "\u02BEo%": "\u0624"
+    "\u02BCo%": "\u0624"
 
     # [in medial position]
     "a\u02BEa": "\u0623"
@@ -294,31 +294,31 @@ roman_to_script:
     "\u02BEa": "\u0626"
     "\u02BCa": "\u0626"
 
-    "%i\u0304": "\u0649"
-    "%a\u0301": "\u0649\u0670"
+    "i\u0304%": "\u0649"
+    "a\u0301%": "\u0649\u0670"
 
     # A
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
-    "\u02BBA%": "\u0639"
+    "%\u02BBA": "\u0639"
     "\u02BBa": "\u0639"
     "A\u02BB": "\u0627\u0639"
-    "a\u02BB%": "\u0627\u0639"
+    "%a\u02BB": "\u0627\u0639"
     "a\u02BB": "\u0639"
-    "A\u0304%": "\u0622"
-    "a\u0304%": "\u0622"
+    "%A\u0304": "\u0622"
+    "%a\u0304": "\u0622"
     "a\u0304": "\u0627"
     "a\u0301": "\u0649"
     "ayy": "\u064A\u0651"
-    "A%": "\u0627"
-    "a%": "\u0627"
+    "%A": "\u0627"
+    "%a": "\u0627"
     "A": ""
     "a": ""
 
     # E
-    "%e": "\u06D2"
-    "E%": "\u0627\u064A"
-    "e%": "\u0627\u064A"
+    "e%": "\u06D2"
+    "%E": "\u0627\u064A"
+    "%e": "\u0627\u064A"
     "e": "\u064A"
 
     # I
@@ -327,29 +327,29 @@ roman_to_script:
     "I\u02BB": "\u0627\u0639"
     "i\u02BB": "\u0639"
     "\u02BBI": "\u0639"
-    "I\u0304%": "\u0627\u064A"
-    "i\u0304%": "\u0627\u064A"
+    "%I\u0304": "\u0627\u064A"
+    "%i\u0304": "\u0627\u064A"
     "i\u0304y": "\u064A"
     "i\u0304": "\u064A"
     "iyy": "\u064A\u0651"
-    "I%": "\u0627"
-    "i%": "\u0627"
+    "%I": "\u0627"
+    "%i": "\u0627"
     "I": "\u0627"
     "i": ""
 
     # O
-    "O%": "\u0627\u0648"
+    "%O": "\u0627\u0648"
     "o": "\u0648"
 
     # U
     "\u02BBu\u0304": "\u0639\u0648"
     "\u02BBU": "\u0639"
     "\u02BBu": "\u0639"
-    "U\u0304%": "\u0627\u0648"
-    "u\u0304%": "\u0627\u0648"
+    "%U\u0304": "\u0627\u0648"
+    "%u\u0304": "\u0627\u0648"
     "u\u0304": "\u0648"
-    "U%": "\u0627"
-    "u%": "\u0627"
+    "%U": "\u0627"
+    "%u": "\u0627"
     "U": ""
     "u": ""
 
@@ -461,5 +461,5 @@ roman_to_script:
     "\u02BB": "\u0639"
 
     # hamza (alone in final position)
-    "%\u02BE": "\u0621"
-    "%\u02BC": "\u0621"
+    "\u02BE%": "\u0621"
+    "\u02BC%": "\u0621"