Browse Source

Add Asian Cyrillic sets. (#78)

* Addnew batch of Asian Cyrilic tables.

* Remove redundant _ignore_base inheritance.

* Fix unicode sequence in Komi table.
Stefano Cossu 5 months ago
parent
commit
f03f9993d1
43 changed files with 1918 additions and 604 deletions
  1. 27 0
      scriptshifter/tables/data/altai_cyrillic.yml
  2. 700 452
      scriptshifter/tables/data/asian_cyrillic.yml
  3. 12 33
      scriptshifter/tables/data/azerbaijani_cyrillic.yml
  4. 54 0
      scriptshifter/tables/data/bashkir_cyrillic.yml
  5. 23 0
      scriptshifter/tables/data/buriat_cyrillic.yml
  6. 20 0
      scriptshifter/tables/data/chukchi_cyrillic.yml
  7. 45 0
      scriptshifter/tables/data/chuvash_cyrillic.yml
  8. 35 0
      scriptshifter/tables/data/dungan_cyrillic.yml
  9. 27 0
      scriptshifter/tables/data/even-evenki_cyrillic.yml
  10. 22 0
      scriptshifter/tables/data/gagauz_cyrillic.yml
  11. 80 17
      scriptshifter/tables/data/index.yml
  12. 23 26
      scriptshifter/tables/data/kalmyk_cyrillic.yml
  13. 44 0
      scriptshifter/tables/data/kara-kalpak_cyrillic.yml
  14. 15 0
      scriptshifter/tables/data/karachay-balkar_cyrillic.yml
  15. 22 0
      scriptshifter/tables/data/karelian_cyrillic.yml
  16. 18 24
      scriptshifter/tables/data/kazakh_cyrillic.yml
  17. 40 0
      scriptshifter/tables/data/khakass_cyrillic.yml
  18. 47 0
      scriptshifter/tables/data/khanty_cyrillic.yml
  19. 61 0
      scriptshifter/tables/data/komi_cyrillic.yml
  20. 19 0
      scriptshifter/tables/data/koryak_cyrillic.yml
  21. 3 6
      scriptshifter/tables/data/kyrgyz_cyrillic.yml
  22. 30 0
      scriptshifter/tables/data/lithuanian_cyrillic.yml
  23. 15 0
      scriptshifter/tables/data/mansi_cyrillic.yml
  24. 36 0
      scriptshifter/tables/data/mari_cyrillic.yml
  25. 14 0
      scriptshifter/tables/data/moldovan_cyrillic.yml
  26. 4 7
      scriptshifter/tables/data/mongolian_cyrillic.yml
  27. 60 0
      scriptshifter/tables/data/mordvin_cyrillic.yml
  28. 15 0
      scriptshifter/tables/data/nenets_cyrillic.yml
  29. 17 0
      scriptshifter/tables/data/ossetic_cyrillic.yml
  30. 23 0
      scriptshifter/tables/data/romani_cyrillic.yml
  31. 28 0
      scriptshifter/tables/data/shor_cyrillic.yml
  32. 44 0
      scriptshifter/tables/data/syriac_cyrillic.yml
  33. 9 14
      scriptshifter/tables/data/tajik_cyrillic.yml
  34. 35 0
      scriptshifter/tables/data/tatar-kryashen_cyrillic.yml
  35. 35 0
      scriptshifter/tables/data/tatar_cyrillic.yml
  36. 7 10
      scriptshifter/tables/data/turkmen_cyrillic.yml
  37. 27 0
      scriptshifter/tables/data/tuvinian_cyrillic.yml
  38. 32 0
      scriptshifter/tables/data/udmurt_cyrillic.yml
  39. 46 0
      scriptshifter/tables/data/uighur_cyrillic.yml
  40. 9 14
      scriptshifter/tables/data/uzbek_cyrillic.yml
  41. 58 0
      scriptshifter/tables/data/yakut_cyrillic.yml
  42. 36 0
      scriptshifter/tables/data/yuit_cyrillic.yml
  43. 1 1
      tests/data/script_samples/cyrillic.csv

+ 27 - 0
scriptshifter/tables/data/altai_cyrillic.yml

@@ -0,0 +1,27 @@
+general:
+  name: Altai (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A4"
+    "N\uFE20g\uFE21": "\u04A4"
+    "n\uFE20g\uFE21": "\u04A5"
+    "D\u0301": "\u0408"
+    "d\u0301": "\u0458"
+    "O\u0307": "\u04E6"
+    "o\u0307": "\u04E7"
+    "U\u0307": "\u04F0"
+    "u\u0307": "\u04F1"
+
+script_to_roman:
+  map:
+    "\u0408": "D\u0301"
+    "\u0458": "d\u0301"
+    "\u04A4": "N\uFE20G\uFE21"
+    "\u04A5": "n\uFE20g\uFE21"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04F0": "U\u0307"
+    "\u04F1": "u\u0307"

+ 700 - 452
scriptshifter/tables/data/asian_cyrillic.yml

@@ -1,452 +1,700 @@
-general:
-  name: Asian Cyrillic
-  inherits: _cyrillic_base
-
-roman_to_script:
-  map:
-    # COMMON COMBINING CHARACTERS (always follow a base letter): 
-    # combining grave U+0300
-    # combining acute U+0301
-    # combining circumflex U+0302
-    # combining macron U+0304
-    # combining breve U+0306
-    # combining dot above U+0307
-    # combining diaeresis U+0308
-    # combining ring above U+030A
-    # combining double acute U+030B
-    # combining caron (hachek) U+030C
-    # combining candrabindu U+0310
-    # combining dot below U+0323
-    # combining comma below U+0326 (Romanian, Latvian, Livonian)
-    # combining cedilla U+0327 (French, Turkish, Azeri)
-    # combining ogonek (hook) U+0328 (Polish, Lithuanian)
-    # combining left ligature U+FE20 (Cyrillic transliteration)
-    # combining right ligature U+FE21 (Cyrillic transliteration)
-    # soft sign/prime (spacing) U+02B9(Cyrillic transliteration)
-    # hard sign/double prime (spacing) U+02BA (Cyrillic transliteration)
-    # ayn(spacing) U+02BB (Semitic and Caucasian languages)
-    # alif (spacing) U+02BC (Semitic languages)
-    # middle dot (space) U+00B7) (Catalan)
-
-    # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
-
-    # CONVERSION OF "I/i" LIGATED TO "A/a" (all capitalization patterns)
-    "I\uFE20A\uFE21": "\u042F"
-    "I\uFE20a\uFE21": "\u042F"
-    "i\uFE20a\uFE21": "\u044F"
-    "i\uFE20A\uFE21": "\u044F"
-
-    # CONVERSION OF "A/a" WITH BREVE (0306)
-    "A\u0306": "\u04D8"
-    "a\u0306": "\u04D9"
-
-    # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
-    #"A\u0306": "\u04D2"
-    # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
-    #"a\u0306": "\u04D3"
-
-    # REMAINING LONE "A/a"
-
-
-    "V\u0307": "\u0474"
-    "v\u0307": "\u0475"
-
-    "Gh": "\u0492"
-    "GH": "\u0492"
-    "gH": "\u0493"
-    "gh": "\u0493"
-
-    # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
-    #"Gh": "\u0494"
-    # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
-    #"GH": "\u0494"
-    # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
-    #"gH": "\u0495"
-    # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
-    #"gh": "\u0495"
-
-    "G\u0301": "\u0494"
-    "g\u0301": "\u0495"
-    "G\u0307": "\u049C"
-    "g\u0307": "\u049D"
-    "G": "\u0413"
-    "g": "\u0433"
-
-
-    # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
-    "I\uFE20E\uFE21\u0304": "\u0464"
-    "I\uFE20E\u0304\uFE21": "\u0464"
-    "I\uFE20e\uFE21\u0304": "\u0464"
-    "I\uFE20e\u0304\uFE21": "\u0464"
-    "I\uFE20E\uFE21\u0328": "\u0468"
-    "I\uFE20E\u0328\uFE21": "\u0468"
-    "I\uFE20e\uFE21\u0328": "\u0468"
-    "I\uFE20e\u0328\uFE21": "\u0468"
-    "i\uFE20e\uFE21\u0304": "\u0465"
-    "i\uFE20e\u0304\uFE21": "\u0465"
-    "i\uFE20E\uFE21\u0304": "\u0465"
-    "i\uFE20E\u0304\uFE21": "\u0465"
-    "i\uFE20e\uFE21\u0328": "\u0469"
-    "i\uFE20e\u0328\uFE21": "\u0469"
-    "i\uFE20E\uFE21\u0328": "\u0469"
-    "i\uFE20E\u0328\uFE21": "\u0469"
-    "I\uFE20E\uFE21": "\u0462"
-    "I\uFE20e\uFE21": "\u0462"
-    "i\uFE20e\uFE21": "\u0463"
-    "i\uFE20E\uFE21": "\u0463"
-
-    # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
-    "E\u030C": "\u0462"
-    "E\u0304": "\u0404"
-    "E\u0307": "\u042D"
-    "E\u0308": "\u0401"
-    "E\u0328": "\u0466"
-    "e\u030C": "\u0463"
-    "e\u0304": "\u0454"
-    "e\u0307": "\u044D"
-    "e\u0308": "\u0451"
-    "e\u0328": "\u0467"
-
-    # CONVERSION OF REMAINING LONE "E/e"
-
-    "ZH": "\u0416"
-    "Zh": "\u0416"
-    "zH": "\u0436"
-    "zh": "\u0436"
-
-
-    # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns)
-    "T\uFE20H\uFE21": "\u0498"
-    "T\uFE20h\uFE21": "\u0498"
-    "t\uFE20H\uFE21": "\u0499"
-    "t\uFE20h\uFE21": "\u0499"
-    "Th": "\u04AA"
-    "TH": "\u04AA"
-    "tH": "\u04AB"
-    "th": "\u04AB"
-
-    # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
-    "I\uFE20O\uFE21\u0328": "\u046C"
-    "I\uFE20O\u0328\uFE21": "\u046C"
-    "I\uFE20o\uFE21\u0328": "\u046C"
-    "I\uFE20o\u0328\uFE21": "\u046C"
-    "i\uFE20o\uFE21\u0328": "\u046D"
-    "i\uFE20o\u0328\uFE21": "\u046D"
-    "i\uFE20O\uFE21\u0328": "\u046D"
-    "i\uFE20O\u0328\uFE21": "\u046D"
-
-
-    # CONVERION OF "I/i" LIGATED TO "U/u"
-    "I\uFE20U\uFE21": "\u042E"
-    "I\uFE20u\uFE21": "\u042E"
-    "i\uFE20u\uFE21": "\u044E"
-    "i\uFE20U\uFE21": "\u044E"
-
-
-    # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310)
-    "I\u0304": "\u0406"
-    "I\u0306": "\u0419"
-    "I\u0310": "\u0408"
-    "i\u0304": "\u0456"
-    "i\u0306": "\u0439"
-    "i\u0310": "\u0458"
-
-    # CONVERSION OF REMAINING LONE "I/i"
-    "I": "\u0418"
-    "i": "\u0438"
-
-    "J": "\u0496"
-    "j": "\u0497"
-
-    # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION
-    #"J": "\u04B8"
-    # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION
-    #"J": "\u04B9"
-    # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION
-    #"J": "\u04B6"
-    # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION
-    #"J": "\u04B7"
-
-
-    "K\uFE20S\uFE21": "\u046E"
-    "K\uFE20s\uFE21": "\u046E"
-    "k\uFE20s\uFE21": "\u046F"
-    "k\uFE20S\uFE21": "\u046F"
-    "Q": "\u04A0"
-    "q": "\u04A1"
-
-    # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION
-    #"Q": "\u04C3"
-    # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION
-    #"q": "\u04C4"
-
-
-
-    "N\uFE20G\uFE21": "\u04A2"
-    "N\uFE20g\uFE21": "\u04A2"
-    "n\uFE20G\uFE21": "\u04A3"
-    "n\uFE20g\uFE21": "\u04A3"
-
-    # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION
-    #"N\uFE20G\uFE21": "\u04A4"
-    #"N\uFE20g\uFE21": "\u04A4"
-    #"n\uFE20G\uFE21": "\u04A5"
-    #"n\uFE20g\uFE21": "\u04A5"
-
-    # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION
-    #"N\uFE20G\uFE21": "\u04C7"
-    #"N\uFE20g\uFE21": "\u04C7"
-    #"n\uFE20G\uFE21": "\u04C8"
-    #"n\uFE20g\uFE21": "\u04C8"
-
-
-    # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
-    "O\u0304\uFE20T\uFE21": "\u047E"
-    "O\u0304\uFE20t\uFE21": "\u047E"
-    "O\uFE20\u0304T\uFE21": "\u047E"
-    "O\uFE20\u0304t\uFE21": "\u047E"
-    "O\uFE20T\uFE21": "\u047E"
-    "O\uFE20t\uFE21": "\u047E"
-    "o\u0304\uFE20t\uFE21": "\u047F"
-    "o\u0304\uFE20T\uFE21": "\u047F"
-    "o\uFE20\u0304t\uFE21": "\u047F"
-    "o\uFE20\u0304T\uFE21": "\u047F"
-    "o\uFE20t\uFE21": "\u047F"
-    "o\uFE20T\uFE21": "\u047F"
-
-
-    # CONVERSION OF "O/o" WITH MACRON(0304)
-    "O\u0304": "\u04EA"
-    "o\u0304": "\u04EB"
-    # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES
-    "O\u0307": "\u04E8"
-    "o\u0307": "\u04E9"
-
-    # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION
-    #"O\u0307": "\u04E6"
-    #"o\u0307": "\u04E7"
-
-    # CONVERSION OF REMAINING LONE "O/o"
-
-    "P\uFE20S\uFE21": "\u0470"
-    "P\uFE20s\uFE21": "\u0470"
-    "p\uFE20s\uFE21": "\u0471"
-    "p\uFE20S\uFE21": "\u0471"
-
-
-    "SHCH": "\u0429"
-    "SHCh": "\u0429"
-    "SHch": "\u0429"
-    "Shch": "\u0429"
-    "sHCH": "\u0449"
-    "shCH": "\u0449"
-    "shcH": "\u0449"
-    "shch": "\u0449"
-
-    "sH": "\u0448"
-
-    "T\uFE20S\uFE21\u0307": "\u04B4"
-    "T\uFE20S\u0307\uFE21": "\u04B4"
-    "T\uFE20s\uFE21\u0307": "\u04B4"
-    "T\uFE20s\u0307\uFE21": "\u04B4"
-    "t\uFE20S\uFE21\u0307": "\u04B5"
-    "t\uFE20S\u0307\uFE21": "\u04B5"
-    "t\uFE20s\uFE21\u0307": "\u04B5"
-    "t\uFE20s\u0307\uFE21": "\u04B5"
-
-    "T\uFE20S\uFE21": "\u0426"
-    "T\uFE20s\uFE21": "\u0426"
-    "t\uFE20s\uFE21": "\u0446"
-    "t\uFE20S\uFE21": "\u0446"
-
-    # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307)
-    "U\u0304": "\u04B0"
-    "u\u0304": "\u04B1"
-
-    # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
-    #"U\u0304": "\u04EE"
-    # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
-    #"U\u0304": "\u04EF"
-
-    "U\u0306": "\u040E"
-    "u\u0306": "\u0454"
-    "U\u0307": "\u04AE"
-    "u\u0307": "\u04AF"
-
-    # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION
-    #"U\u0307": "\u04E6"
-    #"u\u0307": "\u04E7"
-
-    # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE
-    "W": "\u040E"
-    "w": "\u0454"
-
-
-    "F\u0307": "\u0472"
-    "f\u0307": "\u0473"
-
-    "cH": "\u0447"
-
-    # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY)
-    "H\u0307": "\u04BA"
-    "h\u0307": "\u04BB"
-
-    # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
-    #"H\u0307": "\u04B2"
-    #"h\u0307": "\u04B3"
-    # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
-    #"H\u0307": "\u04FC"
-    #"h\u0307": "\u04FD"
-
-    "Y\u0307": "\u04F8"
-    "y\u0307": "\u04F9"
-
-    "Y": "\u042B"
-    "y": "\u044B"
-
-    "\u0027": "\u044C"
-    # this conversion is ambiguous - \u044C is also theoretically possible
-    "\u02BA": "\u044A"
-
-script_to_roman:
-  map:
-    "\u044F": "i\uFE20a\uFE21"
-    "\u04D8": "A\u0306"
-    "\u04D9": "a\u0306"
-    # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
-    "\u04D2": "A\u0306"
-    # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
-    "\u04D3": "a\u0306"
-    "\u0474": "V\u0307"
-    "\u0475": "v\u0307"
-    "\u0492": "Gh"
-    "\u0493": "gh"
-    # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
-    "\u0494": "Gh"
-    # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
-    "\u0495": "gh"
-    "\u0494": "G\u0301"
-    "\u0495": "g\u0301"
-    "\u049C": "G\u0307"
-    "\u049D": "g\u0307"
-    "\u0413": "G"
-    "\u0433": "g"
-    # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
-    "\u0464": "I\uFE20E\uFE21\u0304"
-    "\u0468": "I\uFE20E\uFE21\u0328"
-    "\u0465": "i\uFE20e\uFE21\u0304"
-    "\u0469": "i\uFE20e\uFE21\u0328"
-    "\u0462": "I\uFE20E\uFE21"
-    "\u0463": "i\uFE20e\uFE21"
-    # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
-    "\u0404": "E\u0304"
-    "\u042D": "E\u0307"
-    "\u0401": "E\u0308"
-    "\u0466": "E\u0328"
-    "\u0454": "e\u0304"
-    "\u044D": "e\u0307"
-    "\u0451": "e\u0308"
-    "\u0467": "e\u0328"
-    "\u0416": "Zh"
-    "\u0436": "zh"
-    # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns)
-    "\u0498": "T\uFE20H\uFE21"
-    "\u0499": "t\uFE20h\uFE21"
-    "\u04AA": "Th"
-    "\u04AB": "th"
-    # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
-    "\u046C": "I\uFE20O\uFE21\u0328"
-    "\u046D": "i\uFE20o\uFE21\u0328"
-    # CONVERION OF "I/i" LIGATED TO "U/u"
-    "\u044E": "i\uFE20u\uFE21"
-    # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310)
-    "\u0406": "I\u0304"
-    "\u0408": "I\u0310"
-    "\u0456": "i\u0304"
-    "\u0458": "i\u0310"
-    # CONVERSION OF REMAINING LONE "I/i"
-    "\u0418": "I"
-    "\u0438": "i"
-    "\u0496": "J"
-    "\u0497": "j"
-    # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION
-    "\u04B8": #"J"
-    # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION
-    "\u04B9": #"J"
-    # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION
-    "\u04B6": #"J"
-    # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION
-    "\u04B7": #"J"
-    "\u0445": "kh"
-    "\u046E": "K\uFE20S\uFE21"
-    "\u046F": "k\uFE20s\uFE21"
-    "\u04A0": "Q"
-    "\u04A1": "q"
-    # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION
-    "\u04C3": "Q"
-    # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION
-    "\u04C4": "q"
-    "\u04A2": "N\uFE20G\uFE21"
-    "\u04A3": "n\uFE20g\uFE21"
-    # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION
-    "\u04A4": #"N\uFE20G\uFE21"
-    "\u04A5": #"n\uFE20g\uFE21"
-    # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION
-    "\u04C7": #"N\uFE20G\uFE21"
-    "\u04C8": #"n\uFE20g\uFE21"
-    # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
-    "\u047E": "O\u0304\uFE20T\uFE21"
-    "\u047F": "o\u0304\uFE20t\uFE21"
-    # CONVERSION OF "O/o" WITH MACRON(0304)
-    "\u04EA": "O\u0304"
-    "\u04EB": "o\u0304"
-    # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
-    # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION
-    "\u04E6": #"O\u0307"
-    "\u04E7": #"o\u0307"
-    # CONVERSION OF REMAINING LONE "O/o"
-    "\u0470": "P\uFE20S\uFE21"
-    "\u0471": "p\uFE20s\uFE21"
-    "\u04B4": "T\uFE20S\uFE21\u0307"
-    "\u04B5": "t\uFE20s\uFE21\u0307"
-    "\u0426": "T\uFE20S\uFE21"
-    "\u0446": "t\uFE20s\uFE21"
-    # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307)
-    "\u04B0": "U\u0304"
-    "\u04B1": "u\u0304"
-    # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
-    "\u04EE": #"U\u0304"
-    # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
-    "\u04EF": #"U\u0304"
-    "\u040E": "U\u0306"
-    "\u0454": "u\u0306"
-    "\u04AE": "U\u0307"
-    "\u04AF": "u\u0307"
-    # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION
-    "\u04E6": #"U\u0307"
-    "\u04E7": #"u\u0307"
-    # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE
-    "\u040E": "W"
-    "\u0454": "w"
-    "\u0472": "F\u0307"
-    "\u0473": "f\u0307"
-    "\u0444": "f"
-    "\u0427": "Ch"
-    # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY)
-    "\u04BA": "H\u0307"
-    "\u04BB": "h\u0307"
-    # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
-    "\u04B2": "H\u0307"
-    "\u04B3": "h\u0307"
-    # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
-    "\u04FC": "H\u0307"
-    "\u04FD": "h\u0307"
-    "\u04F8": "Y\u0307"
-    "\u04F9": "y\u0307"
-    "\u042B": "Y"
-    "\u044B": "y"
-    # this conversion is ambiguous - \u044C is also theoretically possible
-    "\u044A": "\u02BA"
-
+general:
+  name: Asian (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+# COMMON COMBINING CHARACTERS (always follow a base letter):
+# combining grave \u0300
+# combining acute \u0301
+# combining circumflex \u0302
+# combining tilde \u0303
+# combining macron \u0304
+# combining breve \u0306
+# combining dot above \u0307
+# combining diaeresis \u0308
+# combining ring above \u030A
+# combining double acute \u030B
+# combining caron (hachek) \u030C
+# combining candrabindu \u0310
+# combining dot below \u0323
+# combining dieresis below \u0324
+# combining comma below \u0326 (Romanian, Latvian, Livonian)
+# combining cedilla \u0327 (French, Turkish, Azeri)
+# combining ogonek (hook) \u0328 (Polish, Lithuanian)
+# combining low line \u0332
+# combining double low line \u0333
+# combining left ligature \uFE20 (Cyrillic transliteration)
+# combining right ligature \uFE21 (Cyrillic transliteration)
+# soft sign/prime (spacing) \u02B9(Cyrillic transliteration)
+# hard sign/double prime (spacing) \u02BA (Cyrillic transliteration)
+# ayn(spacing) \u02BB (Semitic and Caucasian languages)
+# alif (spacing) \u02BC (Semitic languages)
+# middle dot (space) \u00B7) (Catalan)
+
+roman_to_script:
+  map:
+    "A\uFE20E\uFE21": "\u04D4"
+    "A\uFE20e\uFE21": "\u04D4"
+    "a\uFE20e\uFE21": "\u04D5"
+    "A\u0306\u0323": "\u04D0"
+    "a\u0306\u0323": "\u04D1"
+    "\u00C6": "\u04D4"
+    "\u00E6": "\u04D5"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "A\u030B": "\u04DA"
+    "a\u030B": "\u04DB"
+    "A\u0308": "\u04D2"
+    "a\u0308": "\u04D3"
+    "A\u0310": "\u0518"
+    "a\u0310": "\u0519"
+    
+    "B": "\u0411"
+    "b": "\u0431"
+    
+    "C\u0301h\u0301": "\u04BE"
+    "c\u0301h\u0301": "\u04BF"
+    "C\u0301h": "\u04BC"
+    "c\u0301h": "\u04BD"
+    "C\u0301": "\u040B"
+    "c\u0301": "\u045B"
+    "C\u0308h": "\u04F4"
+    "c\u0308h": "\u04F5"
+    "C\u0323h": "\u04CB"
+    "c\u0323h": "\u04CC"
+    
+    "D\u0301": "\u0502"
+    "d\u0301": "\u0503"
+    "D\u0307": "\u0500"
+    "d\u0307": "\u0501"
+    "D\uFE20c\uFE21h": "\u052C"
+    "d\uFE20c\uFE21h": "\u052D"
+    "D\uFE20z\uFE21h": "\u052A"
+    "d\uFE20z\uFE21h": "\u052B"
+    "D\uFE20Z\uFE21": "\u04E0"
+    "d\uFE20z\uFE21": "\u04E1"
+    "Dz\u030C": "\u040F"
+    "dz\u030C": "\u045F"
+    "D": "\u0414"
+    "d": "\u0434"
+    
+    "E\u0300": "\u0400"
+    "e\u0300": "\u0450"
+    "E\u0304": "\u0404"
+    "e\u0304": "\u0454"
+    "E\u0306": "\u04D6"
+    "e\u0306": "\u04D7"
+    "E\u0306\u0323": "\u048C"
+    "e\u0306\u0323": "\u048D"
+    "E\u0307": "\u042D"
+    "e\u0307": "\u044D"
+    "E\u0308\u0323": "\u04EC"
+    "e\u0308\u0323": "\u04ED"
+    "E\u0308": "\u0401"
+    "e\u0308": "\u0451"
+    "E\u0328": "\u0466"
+    "e\u0328": "\u0467"
+    
+    "F\u0307": "\u0472"
+    "f\u0307": "\u0473"
+    "F": "\u0424"
+    "f": "\u0444"
+    
+    "Gh\u0327": "\u04FA"
+    "gh\u0327": "\u04FB"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    "G\u0301": "\u0403"
+    "g\u0301": "\u0453"
+    "G\u0306": "\u0490"
+    "g\u0306": "\u0491"
+    "G\u0307": "\u049C"
+    "g\u0307": "\u049D"
+    "G\u0323": "\u04F6"
+    "g\u0323": "\u04F7"
+    "G\u0327": "\u0494"
+    "g\u0327": "\u0495"
+    
+    "H\u0304": "\u04FE"
+    "h\u0304": "\u04FF"
+    "H\u0327": "\u04FC"
+    "h\u0327": "\u04FD"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "H\u0308": "\u04C0"
+    "h\u0308": "\u04CF"
+    
+    "I\u0300": "\u040D"
+    "i\u0300": "\u045D"
+    "I\u0304\u0323": "\u04E2"
+    "i\u0304\u0323": "\u04E3"
+    "I\u0304": "\u0406"
+    "i\u0304": "\u0456"
+    "I\u0306\u0323": "\u048A"
+    "i\u0306\u0323": "\u048B"
+    "I\u0306": "\u0419"
+    "i\u0306": "\u0439"
+    "I\u0308\u0323": "\u04E4"
+    "i\u0308\u0323": "\u04E5"
+    "I\u0308": "\u0407"
+    "i\u0308": "\u0457"
+    "I\u0310": "\u0408"
+    "i\u0310": "\u0458"
+    
+    "I\uFE20A\uFE21": "\u042F"
+    "i\uFE20a\uFE21": "\u044F"
+    "A": "\u0410"
+    "a": "\u0430"
+    
+    "I\uFE20E\uFE21\u0304": "\u0464"
+    "i\uFE20e\uFE21\u0304": "\u0465"
+    "I\uFE20E\uFE21\u0328": "\u0468"
+    "i\uFE20e\uFE21\u0328": "\u0469"
+    "I\uFE20E\uFE21": "\u0462"
+    "i\uFE20e\uFE21": "\u0463"
+    "E": "\u0415"
+    "e": "\u0435"
+    
+    "I\uFE20O\uFE21\u0328": "\u046C"
+    "i\uFE20o\uFE21\u0328": "\u046D"
+    "I\uFE20U\uFE21": "\u042E"
+    "i\uFE20u\uFE21": "\u044E"
+    "I": "\u0418"
+    "i": "\u0438"
+    
+    "J\u0304": "\u04B8"
+    "j\u0304": "\u04B9"
+    "J\u0306": "\u04C1"
+    "j\u0306": "\u04C2"
+    "J\u0302": "\u04B6"
+    "j\u0302": "\u04B7"
+    "J\u0308": "\u04DC"
+    "j\u0308": "\u04DD"
+    
+    "K\u0300": "\u051E"
+    "k\u0300": "\u051F"
+    "K\u0301": "\u040C"
+    "k\u0301": "\u045C"
+    "K\uFE20H\uFE21": "\u04B2"
+    "k\uFE20h\uFE21": "\u04B3"
+    "Kh": "\u0425"
+    "kh": "\u0445"
+    "K\uFE20S\uFE21": "\u046E"
+    "k\uFE20s\uFE21": "\u046F"
+    "K": "\u041A"
+    "k": "\u043A"
+    
+    "Lj": "\u0409"
+    "lj": "\u0459"
+    "Lkh\u0307": "\u0514"
+    "lkh\u0307": "\u0515"
+    "L\u0301": "\u0508"
+    "l\u0301": "\u0509"
+    "L\u0321": "\u04C5"
+    "l\u0326": "\u04C6"
+    "L\u0323": "\u052E"
+    "l\u0323": "\u052F"
+    "L\u0327": "\u0512"
+    "l\u0327": "\u0513"
+    "L\u0324": "\u0520"
+    "l\u0324": "\u0521"
+    "L": "\u041B"
+    "l": "\u043B"
+    
+    "M\u0323": "\u04CD"
+    "m\u0323": "\u04CE"
+    "M": "\u041C"
+    "m": "\u043C"
+    
+    "Nj": "\u040A"
+    "nj": "\u045A"
+    "N\u0301G\u0300": "\u04A4"
+    "n\u0301g\u0300": "\u04A5"
+    "N\u0301": "\u050A"
+    "n\u0301": "\u050B"
+    "N\u0326": "\u0528"
+    "n\u0326": "\u0529"
+    "N\u0327": "\u0522"
+    "n\u0327": "\u0523"
+    "N\uFE20\u0323G\uFE21": "\u04C9"
+    "n\uFE20\u0323g\uFE21": "\u04CA"
+    "N\uFE20\u0327G\uFE21": "\u04C7"
+    "n\uFE20\u0327g\uFE21": "\u04C8"
+    "N\uFE20G\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "No\u0332": "\u2116"
+    "N": "\u041D"
+    "n": "\u043D"
+    
+    "G": "\u0413"
+    "g": "\u0433"
+    
+    "J": "\u0496"
+    "j": "\u0497"
+    
+    "O\u0303": "\u047C"
+    "o\u0303": "\u047D"
+    "O\u0304\u0323": "\u047A"
+    "o\u0304\u0323": "\u047B"
+    "O\u0304\uFE20T\uFE21": "\u047E"
+    "o\u0304\uFE20t\uFE21": "\u047F"
+    "O\u0304\u0324": "\u0460"
+    "o\u0304\u0324": "\u0461"
+    "O\u0304": "\u04EA"
+    "o\u0304": "\u04EB"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "O\u0308": "\u04E6"
+    "o\u0308": "\u04E7"
+    "O\u0328": "\u046A"
+    "o\u0328": "\u046B"
+    "O\uFE20u\uFE21": "\u0478"
+    "o\uFE20u\uFE21": "\u0479"
+    "O": "\u041E"
+    "o": "\u043E"
+    
+    "Ph": "\u04A6"
+    "ph": "\u04A7"
+    "P\u0323": "\u0524"
+    "p\u0323": "\u0525"
+    "P\uFE20S\uFE21": "\u0470"
+    "p\uFE20s\uFE21": "\u0471"
+    "P": "\u041F"
+    "p": "\u043F"
+    
+    "Q\u0300": "\u04A0"
+    "q\u0300": "\u04A1"
+    "Q\u0302": "\u0480"
+    "q\u0302": "\u0481"
+    "Q\u0304": "\u049E"
+    "q\u0304": "\u049F"
+    "Q\u0307": "\u04C3"
+    "q\u0307": "\u04C4"
+    "Q\u0308": "\u051A"
+    "q\u0308": "\u051B"
+    "Q": "\u049A"
+    "q": "\u049B"
+    
+    "Rkh\u0307": "\u0516"
+    "rkh\u0307": "\u0517"
+    "R\u0306": "\u048E"
+    "r\u0306": "\u048F"
+    "R": "\u0420"
+    "r": "\u0440"
+    
+    "Shch": "\u0429"
+    "shch": "\u0449"
+    "Sh\u0323": "\u0526"
+    "sh\u0323": "\u0527"
+    "Sh": "\u0428"
+    "sh": "\u0448"
+    "S\u0301": "\u050C"
+    "s\u0301": "\u050D"
+    "S\u0307": "\u0405"
+    "s\u0307": "\u0455"
+    
+    "Ch": "\u0427"
+    "ch": "\u0447"
+    "C": "\u0426"
+    "c": "\u0446"
+    
+    "Th": "\u04AA"
+    "th": "\u04AB"
+    "T\u0301": "\u050E"
+    "t\u0301": "\u050F"
+    "T\u0327": "\u04AC"
+    "t\u0327": "\u04AD"
+    "T\uFE20H\uFE21": "\u0498"
+    "t\uFE20h\uFE21": "\u0499"
+    "T\uFE20S\uFE21": "\u0426"
+    "t\uFE20s\uFE21": "\u0446"
+    "T\uFE20S\uFE21\u0307": "\u04B4"
+    "t\uFE20s\uFE21\u0307": "\u04B5"
+    
+    "S": "\u0421"
+    "s": "\u0441"
+    
+    "T": "\u0422"
+    "t": "\u0442"
+    
+    "U\u0302": "\u04B0"
+    "u\u0302": "\u04B1"
+    "U\u0304": "\u04EE"
+    "u\u0304": "\u04EF"
+    "U\u0306": "\u040E"
+    "u\u0306": "\u045E"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "U\u0308": "\u04F0"
+    "u\u0308": "\u04F1"
+    "U\u030B": "\u04F2"
+    "u\u030B": "\u04F3"
+    "U": "\u0423"
+    "u": "\u0443"
+    
+    "V\u0307": "\u0474"
+    "v\u0307": "\u0475"
+    "V\u0308": "\u0476"
+    "v\u0308": "\u0477"
+    "V": "\u0412"
+    "v": "\u0432"
+    
+    "W\u0308": "\u051C"
+    "w\u0308": "\u051D"
+    "W": "\u04A8"
+    "w": "\u04A9"
+    
+    "X": "\u0058"
+    "x": "\u0078"
+    
+    "Y\u0307": "\u0474"
+    "y\u0307": "\u0475"
+    "Y\u0308": "\u04F8"
+    "y\u0308": "\u04F9"
+    "Y": "\u042B"
+    "y": "\u044B"
+    
+    "Zh": "\u0416"
+    "zh": "\u0436"
+    "Z\u0301": "\u0504"
+    "z\u0301": "\u0505"
+    "Z\u0307": "\u0510"
+    "z\u0307": "\u0511"
+    "Z\u0308": "\u04DE"
+    "z\u0308": "\u04DF"
+    "Z\u0327": "\u0506"
+    "z\u0327": "\u0507"
+    "Z": "\u0417"
+    "z": "\u0437"
+    
+    "H": "\u0413"
+    "h": "\u0433"
+    
+    "\u0110": "\u0402"
+    "\u0111": "\u0452"
+    "\u02B9\u0333": "\u042C"
+    "\u02B9": "\u044C"
+    "\u02BA\u0333": "\u042A"
+    "\u02BA": "\u044A"
+    "\u0303": "\u0487"
+    "\u0311": "\u0484"
+    "\u0313": "\u0486"
+    "\u0314": "\u0485"
+    "\u007E": "\u0483"
+    "(|)": "\u0482"
+    "(^)": "\u0488"
+    "(')": "\u0489"
+    
+    "\u003C\u003C": "\u00AB"
+    "\u003E\u003E": "\u00BB"
+
+script_to_roman:
+  map:
+    
+    "\u00AB": """
+    "\u00BB": """
+    "\u2116": "No\u0332"
+    "\u0400": "E\u0300"
+    "\u0401": "E\u0308"
+    "\u0402": "\u0110"
+    "\u0403": "G\u0301"
+    "\u0404": "E\u0304"
+    "\u0405": "S\u0307"
+    "\u0406": "I\u0304"
+    "\u0407": "I\u0308"
+    "\u0408": "I\u0310"
+    "\u0409": "Lj"
+    "\u040A": "Nj"
+    "\u040B": "C\u0301"
+    "\u040C": "K\u0301"
+    "\u040D": "I\u0300"
+    "\u040E": "U\u0306"
+    "\u040F": "Dz\u030C"
+    "\u0410": "A"
+    "\u0411": "B"
+    "\u0412": "V"
+    "\u0413": "G"
+    "\u0414": "D"
+    "\u0415": "E"
+    "\u0416": "Zh"
+    "\u0417": "Z"
+    "\u0418": "I"
+    "\u0419": "I\u0306"
+    "\u041A": "K"
+    "\u041B": "L"
+    "\u041C": "M"
+    "\u041D": "N"
+    "\u041E": "O"
+    "\u041F": "P"
+    "\u0420": "R"
+    "\u0421": "S"
+    "\u0422": "T"
+    "\u0423": "U"
+    "\u0424": "F"
+    "\u0425": "Kh"
+    "\u0426": "T\uFE20S\uFE21"
+    "\u0427": "Ch"
+    "\u0428": "Sh"
+    "\u0429": "Shch"
+    "\u042A": "\u02BA\u0333"
+    "\u042B": "Y"
+    "\u042C": "\u02B9\u0333"
+    "\u042D": "E\u0307"
+    "\u042E": "I\uFE20U\uFE21"
+    "\u042F": "I\uFE20A\uFE21"
+    "\u0430": "a"
+    "\u0431": "b"
+    "\u0432": "v"
+    "\u0433": "g"
+    "\u0434": "d"
+    "\u0435": "e"
+    "\u0436": "zh"
+    "\u0437": "z"
+    "\u0438": "i"
+    "\u0439": "i\u0306"
+    "\u043A": "k"
+    "\u043B": "l"
+    "\u043C": "m"
+    "\u043D": "n"
+    "\u043E": "o"
+    "\u043F": "p"
+    "\u0440": "r"
+    "\u0441": "s"
+    "\u0442": "t"
+    "\u0443": "u"
+    "\u0444": "f"
+    "\u0445": "kh"
+    "\u0446": "t\uFE20s\uFE21"
+    "\u0447": "ch"
+    "\u0448": "sh"
+    "\u0449": "shch"
+    "\u044A": "\u02BA"
+    "\u044B": "y"
+    "\u044C": "\u02B9"
+    "\u044D": "e\u0307"
+    "\u044E": "i\uFE20u\uFE21"
+    "\u044F": "i\uFE20a\uFE21"
+    "\u0450": "e\u0300"
+    "\u0451": "e\u0308"
+    "\u0452": "\u0111"
+    "\u0453": "g\u0301"
+    "\u0454": "e\u0304"
+    "\u0455": "s\u0307"
+    "\u0456": "i\u0304"
+    "\u0457": "i\u0308"
+    "\u0458": "i\u0310"
+    "\u0459": "lj"
+    "\u045A": "nj"
+    "\u045B": "c\u0301"
+    "\u045C": "k\u0301"
+    "\u045D": "i\u0300"
+    "\u045E": "u\u0306"
+    "\u045F": "dz\u030C"
+    "\u0460": "O\u0304\u0324"
+    "\u0461": "o\u0304\u0324"
+    "\u0462": "I\uFE20E\uFE21"
+    "\u0463": "i\uFE20e\uFE21"
+    "\u0464": "I\uFE20E\uFE21\u0304"
+    "\u0465": "i\uFE20e\uFE21\u0304"
+    "\u0466": "E\u0328"
+    "\u0467": "e\u0328"
+    "\u0468": "I\uFE20E\uFE21\u0328"
+    "\u0469": "i\uFE20e\uFE21\u0328"
+    "\u046A": "O\u0328"
+    "\u046B": "o\u0328"
+    "\u046C": "I\uFE20O\uFE21\u0328"
+    "\u046D": "i\uFE20o\uFE21\u0328"
+    "\u046E": "K\uFE20S\uFE21"
+    "\u046F": "k\uFE20s\uFE21"
+    "\u0470": "P\uFE20S\uFE21"
+    "\u0471": "p\uFE20s\uFE21"
+    "\u0472": "F\u0307"
+    "\u0473": "f\u0307"
+    "\u0474": "V\u0307"
+    "\u0475": "v\u0307"
+    "\u0476": "V\u0308"
+    "\u0477": "v\u0308"
+    "\u0478": "O\uFE20u\uFE21"
+    "\u0479": "o\uFE20u\uFE21"
+    "\u047A": "O\u0304\u0323"
+    "\u047B": "o\u0304\u0323"
+    "\u047C": "O\u0303"
+    "\u047D": "o\u0303"
+    "\u047E": "O\u0304\uFE20T\uFE21"
+    "\u047F": "o\u0304\uFE20t\uFE21"
+    "\u0480": "Q\u0302"
+    "\u0481": "q\u0302"
+    "\u0482": "(|)"
+    "\u0483": "\u007E"
+    "\u0484": "\u0311"
+    "\u0485": "\u0314"
+    "\u0486": "\u0313"
+    "\u0487": "\u0303"
+    "\u0488": "(^)"
+    "\u0489": "(')"
+    "\u048A": "I\u0306\u0323"
+    "\u048B": "i\u0306\u0323"
+    "\u048C": "E\u0306\u0323"
+    "\u048D": "e\u0306\u0323"
+    "\u048E": "R\u0306"
+    "\u048F": "r\u0306"
+    "\u0490": "G\u0306"
+    "\u0491": "g\u0306"
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u0494": "G\u0327"
+    "\u0495": "g\u0327"
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u0498": "T\uFE20H\uFE21"
+    "\u0499": "t\uFE20h\uFE21"
+    "\u049A": "Q"
+    "\u049B": "q"
+    "\u049C": "G\u0307"
+    "\u049D": "g\u0307"
+    "\u049E": "Q\u0304"
+    "\u049F": "q\u0304"
+    "\u04A0": "Q\u0300"
+    "\u04A1": "q\u0300"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04A4": "N\u0301G\u0300"
+    "\u04A5": "n\u0301g\u0300"
+    "\u04A6": "Ph"
+    "\u04A7": "ph"
+    "\u04A8": "W"
+    "\u04A9": "w"
+    "\u04AA": "Th"
+    "\u04AB": "th"
+    "\u04AC": "T\u0327"
+    "\u04AD": "t\u0327"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04B0": "U\u0302"
+    "\u04B1": "u\u0302"
+    "\u04B2": "K\uFE20H\uFE21"
+    "\u04B3": "k\uFE20h\uFE21"
+    "\u04B4": "T\uFE20S\uFE21\u0307"
+    "\u04B5": "t\uFE20s\uFE21\u0307"
+    "\u04B6": "J\u0302"
+    "\u04B7": "j\u0302"
+    "\u04B8": "J\u0304"
+    "\u04B9": "j\u0304"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04BC": "C\u0301h"
+    "\u04BD": "c\u0301h"
+    "\u04BE": "C\u0301h\u0301"
+    "\u04BF": "c\u0301h\u0301"
+    "\u04C0": "H\u0308"
+    "\u04C1": "J\u0306"
+    "\u04C2": "j\u0306"
+    "\u04C3": "Q\u0307"
+    "\u04C4": "q\u0307"
+    "\u04C5": "L\u0326"
+    "\u04C6": "l\u0326"
+    "\u04C7": "N\uFE20\u0327G\uFE21"
+    "\u04C8": "n\uFE20\u0327g\uFE21"
+    "\u04C9": "N\uFE20\u0323G\uFE21"
+    "\u04CA": "n\uFE20\u0323g\uFE21"
+    "\u04CB": "C\u0323h"
+    "\u04CC": "c\u0323h"
+    "\u04CD": "M\u0323"
+    "\u04CE": "m\u0323"
+    "\u04CF": "h\u0308"
+    "\u04D0": "A\u0306\u0323"
+    "\u04D1": "a\u0306\u0323"
+    "\u04D2": "A\u0308"
+    "\u04D3": "a\u0308"
+    "\u04D4": "\u00C6"
+    "\u04D5": "\u00E6"
+    "\u04D6": "E\u0306"
+    "\u04D7": "e\u0306"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04DA": "A\u030B"
+    "\u04DB": "a\u030B"
+    "\u04DC": "J\u0308"
+    "\u04DD": "j\u0308"
+    "\u04DE": "Z\u0308"
+    "\u04DF": "z\u0308"
+    "\u04E0": "D\uFE20Z\uFE21"
+    "\u04E1": "d\uFE20z\uFE21"
+    "\u04E2": "I\u0304\u0323"
+    "\u04E3": "i\u0304\u0323"
+    "\u04E4": "I\u0308\u0323"
+    "\u04E5": "i\u0308\u0323"
+    "\u04E6": "O\u0308"
+    "\u04E7": "o\u0308"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04EA": "O\u0304"
+    "\u04EB": "o\u0304"
+    "\u04EC": "E\u0308\u0323"
+    "\u04ED": "e\u0308\u0323"
+    "\u04EE": "U\u0304"
+    "\u04EF": "u\u0304"
+    "\u04F0": "U\u0308"
+    "\u04F1": "u\u0308"
+    "\u04F2": "U\u030B"
+    "\u04F3": "u\u030B"
+    "\u04F4": "C\u0308h"
+    "\u04F5": "c\u0308h"
+    "\u04F6": "G\u0323"
+    "\u04F7": "g\u0323"
+    "\u04F8": "Y\u0308"
+    "\u04F9": "y\u0308"
+    "\u04FA": "Gh\u0327"
+    "\u04FB": "gh\u0327"
+    "\u04FC": "H\u0327"
+    "\u04FD": "h\u0327"
+    "\u04FE": "H\u0304"
+    "\u04FF": "h\u0304"
+    "\u0500": "D\u0307"
+    "\u0501": "d\u0307"
+    "\u0502": "D\u0301"
+    "\u0503": "d\u0301"
+    "\u0504": "Z\u0301"
+    "\u0505": "z\u0301"
+    "\u0506": "Z\u0327"
+    "\u0507": "z\u0327"
+    "\u0508": "L\u0301"
+    "\u0509": "l\u0301"
+    "\u050A": "N\u0301"
+    "\u050B": "n\u0301"
+    "\u050C": "S\u0301"
+    "\u050D": "s\u0301"
+    "\u050E": "T\u0301"
+    "\u050F": "t\u0301"
+    "\u0510": "Z\u0307"
+    "\u0511": "z\u0307"
+    "\u0512": "L\u0327"
+    "\u0513": "l\u0327"
+    "\u0514": "Lkh\u0307"
+    "\u0515": "lkh\u0307"
+    "\u0516": "Rkh\u0307"
+    "\u0517": "rkh\u0307"
+    "\u0518": "A\u0310"
+    "\u0519": "a\u0310"
+    "\u051A": "Q\u0308"
+    "\u051B": "q\u0308"
+    "\u051C": "W\u0308"
+    "\u051D": "w\u0308"
+    "\u051E": "K\u0300"
+    "\u051F": "k\u0300"
+    "\u0520": "L\u0324"
+    "\u0521": "l\u0324"
+    "\u0522": "N\u0327"
+    "\u0523": "n\u0327"
+    "\u0524": "P\u0323"
+    "\u0525": "p\u0323"
+    "\u0526": "Sh\u0323"
+    "\u0527": "sh\u0323"
+    "\u0528": "N\u0326"
+    "\u0529": "n\u0326"
+    "\u052A": "D\uFE20z\uFE21h"
+    "\u052B": "d\uFE20z\uFE21h"
+    "\u052C": "D\uFE20c\uFE21h"
+    "\u052D": "d\uFE20c\uFE21h"
+    "\u052E": "L\u0323"
+    "\u052F": "l\u0323"

+ 12 - 33
scriptshifter/tables/data/azerbaijani.yml → scriptshifter/tables/data/azerbaijani_cyrillic.yml

@@ -7,59 +7,38 @@ roman_to_script:
   map:
     "A\u0306": "\u04D8"
     "a\u0306": "\u04D9"
-    "G": "\u0413"
-    "g": "\u0433"
-    # Included to handle typos
     "GH": "\u0492"
     "Gh": "\u0492"
     "gh": "\u0493"
-    # Included to handle typos
     "gH": "\u0493"
-    "ZH": "\u0416"
-    "Zh": "\u0416"
-    "zh": "\u0436"
-    "I\u0310": "\u0408"
-    "I": "\u0418"
-    "i\u0310": "\u0458"
-    "i": "\u0438"
     "G\u0307": "\u049C"
     "g\u0307": "\u049D"
+    "I\u0310": "\u0408"
+    "i\u0310": "\u0458"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
     "O\u0307": "\u04E8"
     "o\u0307": "\u04E9"
     "U\u0307": "\u04AE"
     "u\u0307": "\u04AF"
-    "H\u0307": "\u04BA"
-    "h\u0307": "\u04BB"
     "J": "\u04B8"
     "j": "\u04B9"
-    "Y": "\u042B"
-    "y": "\u044B"
 
 script_to_roman:
   map:
-    "\u04D8": "A\u0306"
-    "\u04D9": "a\u0306"
-    "\u0413": "G"
-    "\u0433": "g"
-    "\u0492": "Gh"
-    "\u0493": "gh"
-    "\u0416": "Zh"
-    "\u0436": "zh"
-    "\u0418": "I"
-    "\u0438": "i"
     "\u0408": "I\u0310"
     "\u0458": "i\u0310"
+    "\u0492": "Gh"
+    "\u0493": "gh"
     "\u049C": "G\u0307"
     "\u049D": "g\u0307"
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
     "\u04AE": "U\u0307"
     "\u04AF": "u\u0307"
-    "\u04BA": "H\u0307"
-    "\u04BB": "h\u0307"
     "\u04B8": "J"
     "\u04B9": "j"
-    "\u042B": "Y"
-    "\u044B": "y"
-    "\u0259": "a\u0306"
-    "\u018F": "A\u0306"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 54 - 0
scriptshifter/tables/data/bashkir_cyrillic.yml

@@ -0,0 +1,54 @@
+general:
+  name: Bashkir (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "T\uFE20H\uFE21": "\u0498"
+    "T\uFE20h\uFE21": "\u0498"
+    "t\uFE20h\uFE21": "\u0499"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "GH": "\u0492"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "TH": "\u04AA"
+    "Th": "\u04AA"s
+    "th": "\u04AB"
+    "J": "\u04B8"
+    "j": "\u04B9"
+    "Q": "\u04A0"
+    "q": "\u04A1"
+
+script_to_roman:
+  map:
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u0498": "T\uFE20h\uFE21"
+    "\u0499": "t\uFE20h\uFE21"
+    "\u04A0": "Q"
+    "\u04A1": "q"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04AA": "Th"
+    "\u04AB": "th"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04B8": "J"
+    "\u04B9": "j"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 23 - 0
scriptshifter/tables/data/buriat_cyrillic.yml

@@ -0,0 +1,23 @@
+general:
+  name: Buriat (Cyrillic)
+  parents:
+    - _cyrillic_base
+    - _ignore_base
+
+roman_to_script:
+  map:
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+
+script_to_roman:
+  map:
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 20 - 0
scriptshifter/tables/data/chukchi_cyrillic.yml

@@ -0,0 +1,20 @@
+general:
+  name: Chukchi (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "Q": "\u04C3"
+    "q": "\u04C4"
+
+script_to_roman:
+  map:
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04C3": "Q"
+    "\u04C4": "q"
+

+ 45 - 0
scriptshifter/tables/data/chuvash_cyrillic.yml

@@ -0,0 +1,45 @@
+general:
+  name: Chuvash (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "A\u0306": "\u0400"
+    "a\u0306": "\u0401"
+    "CH": "\u040B"
+    "Ch": "\u040B"
+    "ch": "\u045B"
+    "E\u0306": "\u04D8"
+    "e\u0306": "\u04D9"
+    "L\u0301": "\u0520"
+    "l\u0301": "\u0521"
+    "N\u0301": "\u0522"
+    "n\u0301": "\u0523"
+    "S\u0301": "\u04AA"
+    "s\u0301": "\u04AB"
+    "T\u0301": "\u0422\u030C"
+    "t\u0301": "\u0442\u030C"
+    "U\u0307": "\u04F0"
+    "u\u0307": "\u04F1"
+
+script_to_roman:
+  map:
+    "\u0422\u030C": "T\u0301"
+    "\u0442\u030C": "t\u0301"
+    "\u0400": "A\u0306"
+    "\u0401": "a\u0306"
+    "\u0409": "L\u0301"
+    "\u040B": "Ch"
+    "\u0459": "l\u0301"
+    "\u045B": "ch"
+    "\u04AA": "S\u0301"
+    "\u04AB": "s\u0301"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04D8": "E\u0306"
+    "\u04D9": "e\u0306"
+    "\u04F2": "U\u0307"
+    "\u04F3": "u\u0307"
+    "\u0522": "N\u0301"
+    "\u0523": "n\u0301"

+ 35 - 0
scriptshifter/tables/data/dungan_cyrillic.yml

@@ -0,0 +1,35 @@
+general:
+  name: Dungan (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "U\u0306": "\u040E"
+    "u\u0306": "\u045E"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "J": "\u0496"
+    "j": "\u0496"
+
+script_to_roman:
+  map:
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04C7": "N\uFE20G\uFE21"
+    "\u04C8": "n\uFE20g\uFE21"
+    "\u04C9": "N\uFE20G\uFE21"
+    "\u04CA": "n\uFE20g\uFE21"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 27 - 0
scriptshifter/tables/data/even-evenki_cyrillic.yml

@@ -0,0 +1,27 @@
+general:
+  name: Even/Evenki (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04C7"
+    "N\uFE20g\uFE21": "\u04C7"
+    "n\uFE20g\uFE21": "\u04C8"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "O\u0304": "\u04EA"
+    "o\u0304": "\u04EB"
+
+script_to_roman:
+  map:
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04C7": "N\uFE20G\uFE21"
+    "\u04C8": "n\uFE20g\uFE21"
+    "\u04C9": "N\uFE20G\uFE21"
+    "\u04CA": "n\uFE20g\uFE21"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04EA": "O\u0304"
+    "\u04EB": "o\u0304"

+ 22 - 0
scriptshifter/tables/data/gagauz_cyrillic.yml

@@ -0,0 +1,22 @@
+general:
+  name: Gagauz (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "A\u0306": "\u04D2"
+    "a\u0306": "\u04D3"
+    "O\u0307": "\u04E6"
+    "o\u0307": "\u04E7"
+    "U\u0307": "\u04F0"
+    "u\u0307": "\u04F1"
+
+script_to_roman:
+  map:
+    "\u04D2": "A\u0306"
+    "\u04D3": "a\u0306"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04F0": "U\u0307"
+    "\u04F1": "u\u0307"

+ 80 - 17
scriptshifter/tables/data/index.yml

@@ -9,65 +9,116 @@
 
 abkhaz_cyrillic:
   name: Abkhaz (Cyrillic)
+altai_cyrillic:
+  name: Altai (Cyrillic)
 arabic_ext:
   name: Arabic (S2R)
   description: Arabic-to-Roman transliterator using the ArabicTransliterator external library.
 armenian:
   name: Armenian
-azerbaijani:
+azerbaijani_cyrillic:
   name: Azerbaijani (Cyrillic)
 asian_cyrillic:
   name: Asian Cyrillic
   description: >
     Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza, Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat, Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz, Ingush, Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak, Karelian, Khakass, Khanty, Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp, Lezghian, Lithuanian, Mansi, Mari, Moldovan, Molodstov, Mordvin, Nanai, Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, Selkup, Shor, Tabasaran, Tat, Tuva, Udekhe, Udmurt, Yakut.
+bashkir_cyrillic:
+  name: Bashkir (Cyrillic)
 belarusian:
   name: Belarusian
 bengali:
   name: Bengali
 bulgarian:
   name: Bulgarian
+buriat:
+  name: Buriat (Cyrillic)
 burmese:
   name: Burmese (Myanmar)
 chinese:
   name: Chinese (Hanzi)
+chukchi_cyrillic:
+  name: Chukchi (Cyrillic)
 church_slavonic:
   name: Church Slavonic
+chuvash_cyrillic:
+  name: Chuvash (Cyrillic)
 devanagari:
   name: Devanagari
+dungan_cyrillic:
+  name: Dungan (Cyrillic)
 ethiopic:
   name: Ethiopic (Amharic)
+even-evenki_cyrillic:
+  name: Even/Evenki (Cyrillic)
 georgian:
   name: Georgian
+gagauz_cyrillic:
+  name: Gagauz (Cyrillic)
 hindi:
   name: Hindi (Devanagari)
-kazakh:
+hiragana:
+  name: Japanese (Hiragana)
+katakana:
+  name: Japanese (Katakana)
+kalmyk_cyrillic:
+  name: Kalmyk (Cyrillic)
+kara-kalpak_cyrillic:
+  name: Kara-Kalpak (Cyrillic)
+karachai-balkar_cyrillic:
+  name: Karachay-Balkar  (Cyrillic)
+karelian_cyrillic:
+  name: Karelian  (Cyrillic)
+kazakh_cyrillic:
   name: Kazakh (Cyrillic)
+khakass_cyrillic:
+  name: Khakass (Cyrillic)
+khanty_cyrillic:
+  name: Khanty (Cyrillic)
+komi_cyrillic:
+  name: Komi (Cyrillic)
 korean_nonames:
   name: Korean
   description: Korean S2R for strings NOT containing any personal names.
 korean_names:
   name: Korean (last + first names only)
   description: Korean S2R for strings ONLY containing personal names formatted as last + first name. Separate multiple names with a comma or a center-dot (U+00B7).
-kyrgyz:
+koryak_cyrillic:
+  name: Koryak (Cyrillic)
+kyrgyz_cyrillic:
   name: Kyrgyz (Cyrillic)
-hiragana:
-  name: Japanese (Hiragana)
-katakana:
-  name: Japanese (Katakana)
-mongolian:
+lithuanian_cyrillic:
+  name: Lithuanian (Cyrillic)
+macedonian:
+  name: Macedonian
+mansi_cyrillic:
+  name: Mansi (Cyrillic)
+moldovan_cyrillic:
+  name: Moldovan (Cyrillic)
+mongolian_cyrillic:
   name: Mongolian (Cyrillic)
 mongolian_mongol_bichig:
   name: Mongolian (Mongol bichig)
-russian:
-  name: Russian
-macedonian:
-  name: Macedonian
-serbian:
-  name: Serbian
+mordvin_cyrillic:
+  name: Mordvin (Cyrillic)
+nenets_cyrillic:
+  name: Nenets (Cyrillic)
+ossetic_cyrillic:
+  name: Ossetic (Cyrillic)
 pulaar:
   name: Pulaar (Adlam)
 gurmukhi:
   name: Punjabi (Gurmukhi)
+romani_cyrillic:
+  name: Romani (Cyrillic)
+russian:
+  name: Russian
+serbian:
+  name: Serbian
+shor_cyrillic:
+  name: Shor (Cyrillic)
+syriac_cyrillic:
+  name: Syriac (Cyrillic)
+tajik_cyrillic:
 tamil:
   name: Tamil
 tamil_brahmi:
@@ -78,13 +129,25 @@ thai:
   name: Thai
 tajik:
   name: Tajik (Cyrillic)
-tatar:
+tatar-kryashen_cyrillic:
+  name: Tatar-Kryashen (Cyrillic)
+tatar_cyrillic:
   name: Tatar (Cyrillic)
 tibetan:
   name: Tibetan
-turkmen:
+turkmen_cyrillic:
   name: Turkmen (Cyrillic)
+tuvinian_cyrillic:
+  name: Tuvinian (Cyrillic)
+udmurt_cyrillic:
+  name: Udmurt (Cyrillic)
+uighur_cyrillic:
+  name: Uighur (Cyrillic)
 ukrainian:
   name: Ukrainian
-uzbek:
+uzbek_cyrillic:
   name: Uzbek (Cyrillic)
+yakut_cyrillic:
+  name: Yakut (Cyrillic)
+yuit_cyrillic:
+  name: Yuit (Cyrillic)

+ 23 - 26
scriptshifter/tables/data/tatar.yml → scriptshifter/tables/data/kalmyk_cyrillic.yml

@@ -1,50 +1,47 @@
 general:
-  name: tatar (Cyrillic)
+  name: Kalmyk (Cyrillic)
   parents:
     - _cyrillic_base
 
 roman_to_script:
   map:
-    "A\u0306": "\u04D8"
-    "a\u0306": "\u04D9"
-    "J": "\u0496"
-    "j": "\u0497"
     "N\uFE20G\uFE21": "\u04A2"
-    # Included to handle typos
     "N\uFE20g\uFE21": "\u04A2"
     "n\uFE20g\uFE21": "\u04A3"
-    # Included to handle typos
-    "n\uFE20G\uFE21": "\u04A3"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "A\u0310": "\u04D2"
+    "a\u0310": "\u04D3"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
     "O\u0307": "\u04E8"
     "o\u0307": "\u04E9"
+    "O\u0310": "\u04E6"
+    "o\u0310": "\u04E7"
     "U\u0307": "\u04AE"
     "u\u0307": "\u04AF"
-    "H\u0307": "\u04BA"
-    "h\u0307": "\u04BB"
+    "U\u0310": "\u04F0"
+    "u\u0310": "\u04F1"
+    "J": "\u0496"
+    "j": "\u0497"
 
 script_to_roman:
   map:
-    "\u04D8": "A\u0306"
-    "\u04D9": "a\u0306"
-    # Included to normalize alternate character
-    "\u04D2": "A\u0306"
-    # Included to normalize alternate character
-    "\u04D3": "a\u0306"
     "\u0496": "J"
     "\u0497": "j"
     "\u04A2": "N\uFE20G\uFE21"
     "\u04A3": "n\uFE20g\uFE21"
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
-    # Included to normalize alternate character
-    "\u04E6": "O\u0307"
-    # Included to normalize alternate character
-    "\u04E7": "o\u0307"
     "\u04AE": "U\u0307"
     "\u04AF": "u\u0307"
-    # Included to normalize alternate character
-    "\u04F0": "U\u0307"
-    # Included to normalize alternate character
-    "\u04F1": "u\u0307"
     "\u04BA": "H\u0307"
     "\u04BB": "h\u0307"
+    "\u04D2": "A\u0310"
+    "\u04D3": "a\u0310"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04E6": "O\u0310"
+    "\u04E7": "o\u0310"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04F0": "U\u0310"
+    "\u04F1": "u\u0310"

+ 44 - 0
scriptshifter/tables/data/kara-kalpak_cyrillic.yml

@@ -0,0 +1,44 @@
+general:
+  name: Kara-Kalpak (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "GH": "\u0492"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    "H\u0307": "\u04B2"
+    "h\u0307": "\u04B3"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "Q": "\u049A"
+    "q": "\u049B"
+    "W": "\u040E"
+    "w": "\u045E"
+
+script_to_roman:
+  map:
+    "\u040E": "W"
+    "\u045E"" "w"
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u049A": "Q"
+    "\u-49B": "q"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04B2": "H\u0307"
+    "\u04B3": "h\u0307"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 15 - 0
scriptshifter/tables/data/karachay-balkar_cyrillic.yml

@@ -0,0 +1,15 @@
+general:
+  name: Karachay-Balkar (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "W": "\u0423\u0313"
+    "w": "\u0443\u0313"
+
+script_to_roman:
+  map:
+    "\u0423\u0313": "W"
+    "\u0443\u0313": "w"
+

+ 22 - 0
scriptshifter/tables/data/karelian_cyrillic.yml

@@ -0,0 +1,22 @@
+general:
+  name: Karelian (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "A\u0306": "\u04D2"
+    "a\u0306": "\u04D3"
+    "O\u0307": "\u04E6"
+    "o\u0307": "\u04E7"
+    "U\u0307": "\u04F0"
+    "u\u0307": "\u04F1"
+
+script_to_roman:
+  map:
+    "\u04D2": "A\u0306"
+    "\u04D3": "a\u0306"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04F0": "U\u0307"
+    "\u04F1": "u\u0307"

+ 18 - 24
scriptshifter/tables/data/kazakh.yml → scriptshifter/tables/data/kazakh_cyrillic.yml

@@ -1,54 +1,48 @@
 general:
-  name: kazakh (Cyrillic)
+  name: Kazakh (Cyrillic)
   parents:
     - _cyrillic_base
 
 roman_to_script:
   map:
+    "N\uFE20G\uFE21": "\u04A2"
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
     "A\u0306": "\u04D8"
     "a\u0306": "\u04D9"
-    # Included to handle typos
     "GH": "\u0492"
     "Gh": "\u0492"
     "gh": "\u0493"
-    # Included to handle typos
-    "gH": "\u0493"
-    "I\u0304": "\u0408"
-    "i\u0304": "\u0458"
-    "Q": "\u049A"
-    "q": "\u049B"
-    "N\uFE20G\uFE21": "\u04A2"
-    # Included to handle typos
-    "N\uFE20g\uFE21": "\u04A2"
-    "n\uFE20g\uFE21": "\u04A3"
-    # Included to handle typos
-    "n\uFE20G\uFE21": "\u04A3"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "I\u0304": "\u0406"
+    "i\u0304": "\u0456"
     "O\u0307": "\u04E8"
     "o\u0307": "\u04E9"
     "U\u0304": "\u04B0"
     "u\u0304": "\u04B1"
     "U\u0307": "\u04AE"
     "u\u0307": "\u04AF"
-    "H\u0307": "\u04BA"
-    "h\u0307": "\u04BB"
+    "Q": "\u049A"
+    "q": "\u049B"
 
 script_to_roman:
   map:
-    "\u04D8": "A\u0306"
-    "\u04D9": "a\u0306"
+    "\u0406": "I\u0304"
+    "\u0456": "i\u0304"
     "\u0492": "Gh"
     "\u0493": "gh"
-    "\u0408": "I\u0304"
-    "\u0458": "i\u0304"
     "\u049A": "Q"
     "\u049B": "q"
     "\u04A2": "N\uFE20G\uFE21"
     "\u04A3": "n\uFE20g\uFE21"
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
-    "\u04B0": "U\u0304"
-    "\u04B1": "u\u0304"
     "\u04AE": "U\u0307"
     "\u04AF": "u\u0307"
+    "\u04B0": "U\u0304"
+    "\u04B1": "u\u0304"
     "\u04BA": "H\u0307"
     "\u04BB": "h\u0307"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 40 - 0
scriptshifter/tables/data/khakass_cyrillic.yml

@@ -0,0 +1,40 @@
+general:
+  name: Khakass (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "A\u0306": "\u04D2"
+    "a\u0306": "\u04D3"
+    "GH": "\u0492"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    "I\u0304": "\u0406"
+    "i\u0304": "\u0456"
+    "O\u0307": "\u04E6"
+    "o\u0307": "\u04E7"
+    "U\u0307": "\u04F0"
+    "u\u0307": "\u04F1"
+    "J\u0304": "\u0408"
+    "j\u0304": "\u0458"
+
+script_to_roman:
+  map:
+    "\u0406": "I\u0304"
+    "\u0408": "J\u0304"
+    "\u0456": "i\u0304"
+    "\u0458": "j\u0304"
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04D2": "A\u0306"
+    "\u04D3": "a\u0306"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04F0": "U\u0307"
+    "\u04F1": "u\u0307"

+ 47 - 0
scriptshifter/tables/data/khanty_cyrillic.yml

@@ -0,0 +1,47 @@
+general:
+  name: Khanty (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04C7"
+    "N\uFE20g\uFE21": "\u04C7"
+    "n\uFE20g\uFE21": "\u04C8"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "A\u0308": "\u04D2"
+    "a\u0308": "\u04D3"
+    "A\u0310": "\u04DA"
+    "a\u0310": "\u04DB"
+    "O\u0304": "\u04E8"
+    "o\u0304": "\u04E9"
+    "O\u0307": "\u04E6"
+    "o\u0307": "\u04E7"
+    "O\u0310": "\u04EA"
+    "o\u0310": "\u04EB"
+    "U\u0307": "\u04F0"
+    "u\u0307": "\u04F1"
+    "Q": "\u04C3"
+    "q": "\u04C4"
+
+script_to_roman:
+  map:
+    "\u04C3": "Q"
+    "\u04C4": "q"
+    "\u04C7": "N\uFE20G\uFE21"
+    "\u04C8": "n\uFE20g\uFE21"
+    "\u04D2": "A\u0308"
+    "\u04D3": "a\u0308"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04DA": "A\u0310"
+    "\u04DB": "a\u0310"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04E8": "O\u0304"
+    "\u04E9": "o\u0304"
+    "\u04EA": "O\u0310"
+    "\u04EB": "o\u0310"
+    "\u04F0": "U\u0307"
+    "\u04F1": "u\u0307"

+ 61 - 0
scriptshifter/tables/data/komi_cyrillic.yml

@@ -0,0 +1,61 @@
+general:
+  name: Komi (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "D\u0320Z\u0320\H\u\0320": "\u0496"
+    "D\u0320Z\u0320\h\u\0320": "\u0496"
+    "D\u0320z\u0320\h\u\0320": "\u0496"
+    "d\u0320z\u0320\h\u\0320": "\u0497"
+    "D\uFE20Z\uFE21": "\u0506"
+    "D\uFE20z\uFE21": "\u0506"
+    "d\uFE20z\uFE21": "\u0507"
+    "D\u0307": "\u0500"
+    "d\u0307": "\u0501"
+    "D\u0301": "\u0502"
+    "d\u0301": "\u0503"
+    "I\u0310": "\u0408"
+    "i\u0310": "\u0458"
+    "L\u0301": "\u0508"
+    "l\u0301": "\u0509"
+    "N\u0301": "\u050A"
+    "n\u0301": "\u050B"
+    "O\u0308": "\u04E6"
+    "o\u0308": "\u04E7"
+    "S\u0301": "\u050C"
+    "s\u0301": "\u050D"
+    "T\u0301": "\u050E"
+    "t\u0301": "\u050F"
+    "Z\u0301": "\u0504"
+    "z\u0301": "\u0505"
+
+script_to_roman:
+  map:
+    "\u0408": "I\u0310"
+    "\u0409": "L\u0301"
+    "\u040A": "N\u0301"
+    "\u040B": "i\u0310"
+    "\u0459": "l\u0301"
+    "\u045A": "n\u0301"
+    "\u0496": "D\u0320z\u0320h\u0320"
+    "\u0497": "d\u0320z\u0320h\u0320"
+    "\u04E6": "O\u0308"
+    "\u04E7": "o\u0308"
+    "\u0500": "D\u0307"
+    "\u0501": "d\u0307"
+    "\u0502": "D\u0301"
+    "\u0503": "d\u0301"
+    "\u0504": "Z\u0301"
+    "\u0505": "z\u0301"
+    "\u0506": "D\uFE20Z\uFE21"
+    "\u0507": "d\uFE20z\uFE21"
+    "\u0508": "L\u0301"
+    "\u0509": "l\u0301"
+    "\u050A": "N\u0301"
+    "\u050B": "n\u0301"
+    "\u050C": "S\u0301"
+    "\u050D": "s\u0301"
+    "\u050E": "T\u0301"
+    "\u050F": "t\u0301"

+ 19 - 0
scriptshifter/tables/data/koryak_cyrillic.yml

@@ -0,0 +1,19 @@
+general:
+  name: Koryak (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04C7"
+    "N\uFE20g\uFE21": "\u04C7"
+    "n\uFE20g\uFE21": "\u04C8"
+    "Q": "\u04C3"
+    "q": "\u04C4"
+
+script_to_roman:
+  map:
+    "\u04C3": "Q"
+    "\u04C4": "q"
+    "\u04C7": "N\uFE20G\uFE21"
+    "\u04C8": "n\uFE20g\uFE21"

+ 3 - 6
scriptshifter/tables/data/kyrgyz.yml → scriptshifter/tables/data/kyrgyz_cyrillic.yml

@@ -1,16 +1,13 @@
 general:
-  name: kyrgyz (Cyrillic)
+  name: Kyrgyz (Cyrillic)
   parents:
     - _cyrillic_base
 
 roman_to_script:
   map:
     "N\uFE20G\uFE21": "\u04A2"
-    # Included to handle typos
     "N\uFE20g\uFE21": "\u04A2"
     "n\uFE20g\uFE21": "\u04A3"
-    # Included to handle typos
-    "n\uFE20G\uFE21": "\u04A3"
     "O\u0307": "\u04E8"
     "o\u0307": "\u04E9"
     "U\u0307": "\u04AE"
@@ -20,7 +17,7 @@ script_to_roman:
   map:
     "\u04A2": "N\uFE20G\uFE21"
     "\u04A3": "n\uFE20g\uFE21"
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
     "\u04AE": "U\u0307"
     "\u04AF": "u\u0307"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 30 - 0
scriptshifter/tables/data/lithuanian_cyrillic.yml

@@ -0,0 +1,30 @@
+general:
+  name: Lithuanian (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "C\u030C": "\u0427"
+    "c\u030C": "\u0447"
+    "E\u0307": "\u0462"
+    "e\u0307": "\u0463"
+    "S\u030C": "\u0428"
+    "s\u030C": "\u0448"
+    "C": "\u0426"
+    "c": "\u0446"
+    "J": "\u0416"
+    "j": "\u0436"
+
+script_to_roman:
+  map:
+    "\u0416": "J"
+    "\u0426": "C"
+    "\u0427": "C\u030C"
+    "\u0428": "S\u030C"
+    "\u0436": "j"
+    "\u0446": "c"
+    "\u0447": "c\u030C"
+    "\u0448": "s\u030C"
+    "\u0462": "E\u0307"
+    "\u0463": "e\u0307"

+ 15 - 0
scriptshifter/tables/data/mansi_cyrillic.yml

@@ -0,0 +1,15 @@
+general:
+  name: Mansi (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04C7"
+    "N\uFE20g\uFE21": "\u04C7"
+    "n\uFE20g\uFE21": "\u04C8"
+
+script_to_roman:
+  map:
+    "\u04C7": "N\uFE20G\uFE21"
+    "\u04C8": "n\uFE20g\uFE21"

+ 36 - 0
scriptshifter/tables/data/mari_cyrillic.yml

@@ -0,0 +1,36 @@
+general:
+  name: Mari (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "I\uFE20O\uFE21": "u\0401"
+    "I\uFE20o\uFE21": "u\0401"
+    "i\uFE20o\uFE21": "u\0451"
+    "N\uFE20G\uFE21": "\u04A4"
+    "N\uFE20g\uFE21": "\u04A4"
+    "n\uFE20g\uFE21": "\u04A5"
+    "A\u0306": "\u04D2"
+    "a\u0306": "\u04D3"
+    "O\u0307": "\u04E6"
+    "o\u0307": "\u04E7"
+    "U\u0307": "\u04F0"
+    "u\u0307": "\u04F1"
+    "Y\u0307": "\u04F8"
+    "y\u0307": "\u04F9"
+
+script_to_roman:
+  map:
+    "u\0401": "I\uFE20O\uFE21"
+    "u\0451": "i\uFE20o\uFE21"
+    "\u04A4": "N\uFE20G\uFE21"
+    "\u04A5": "n\uFE20g\uFE21"
+    "\u04D2": "A\u0306"
+    "\u04D3": "a\u0306"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04F0": "U\u0307"
+    "\u04F1": "u\u0307"
+    "\u04F8": "Y\u0307"
+    "\u04F9": "y\u0307"

+ 14 - 0
scriptshifter/tables/data/moldovan_cyrillic.yml

@@ -0,0 +1,14 @@
+general:
+  name: Moldovan (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "J": "\u04C1"
+    "j": "\u04C2"
+
+script_to_roman:
+  map:
+    "\u04C1": "J"
+    "\u04C2": "j"

+ 4 - 7
scriptshifter/tables/data/mongolian.yml → scriptshifter/tables/data/mongolian_cyrillic.yml

@@ -6,25 +6,22 @@ general:
 roman_to_script:
   map:
     "N\uFE20G\uFE21": "\u04A2"
-    # Included to handle typos
     "N\uFE20g\uFE21": "\u04A2"
     "n\uFE20g\uFE21": "\u04A3"
-    # Included to handle typos
-    "n\uFE20G\uFE21": "\u04A3"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
     "O\u0307": "\u04E8"
     "o\u0307": "\u04E9"
     "U\u0307": "\u04AE"
     "u\u0307": "\u04AF"
-    "H\u0307": "\u04BA"
-    "h\u0307": "\u04BB"
 
 script_to_roman:
   map:
     "\u04A2": "N\uFE20G\uFE21"
     "\u04A3": "n\uFE20g\uFE21"
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
     "\u04AE": "U\u0307"
     "\u04AF": "u\u0307"
     "\u04BA": "H\u0307"
     "\u04BB": "h\u0307"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 60 - 0
scriptshifter/tables/data/mordvin_cyrillic.yml

@@ -0,0 +1,60 @@
+general:
+  name: Mordvin (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "I\uFE20O\uFE21": "\u0401"
+    "I\uFE20o\uFE21": "\u0401"
+    "i\uFE20o\uFE21": "\u0451"
+    "LKH\u0307": "\u0514"
+    "LKh\u0307": "\u0514"
+    "Lkh\u0307": "\u0514"
+    "lkh\u0307": "\u0515"
+    "N\uFE20G\uFE21": "\u04A4"
+    "N\uFE20g\uFE21": "\u04A4"
+    "n\uFE20g\uFE21": "\u04A5"
+    "RKH\u0307": "\u0516"
+    "RKh\u0307": "\u0516"
+    "Rkh\u0307": "\u0516"
+    "rkh\u0307": "\u0517"
+    "A\u0306": "\u04EC"
+    "a\u0306": "\u04ED"
+    "A\u0310": "\u0518"
+    "a\u0310": "\u0519"
+    "E\u0306": "\u04D6"
+    "e\u0306": "\u04D7"
+    "O\u0306": "\u041E"
+    "o\u0306": "\u043E"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "Y\u0307": "\u04F8"
+    "y\u0307": "\u04F9"
+
+script_to_roman:
+  map:
+    "\u0401": "I\uFE20O\uFE21"
+    "\u041E": "O\u0306"
+    "\u043E": "o\u0306"
+    "\u0451": "i\uFE20o\uFE21"
+    "\u04A4": "N\uFE20G\uFE21"
+    "\u04A5": "n\uFE20g\uFE21"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04D6": "E\u0306"
+    "\u04D7": "e\u0306"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04EC": "A\u0306"
+    "\u04ED": "a\u0306"
+    "\u04F8": "Y\u0307"
+    "\u04F9": "y\u0307"
+    "\u0514": "Lkh\u0307"
+    "\u0515": "lkh\u0307"
+    "\u0516": "Rkh\u0307"
+    "\u0517": "rkh\u0307"
+    "\u0518": "A\u0310"
+    "\u0519": "a\u0310"

+ 15 - 0
scriptshifter/tables/data/nenets_cyrillic.yml

@@ -0,0 +1,15 @@
+general:
+  name: Nenets (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04C7"
+    "N\uFE20g\uFE21": "\u04C7"
+    "n\uFE20g\uFE21": "\u04C8"
+
+script_to_roman:
+  map:
+    "\u04C7": "N\uFE20G\uFE21"
+    "\u04C8": "n\uFE20g\uFE21"

+ 17 - 0
scriptshifter/tables/data/ossetic_cyrillic.yml

@@ -0,0 +1,17 @@
+general:
+  name: Ossetic (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "A\uFE20E\uFE21": "\u04D4"
+    "A\uFE20e\uFE21": "\u04D4"
+    "a\uFE20e\uFE21": "\u04D5"
+    "\u00C6": "\u04D4"
+    "\u00E6": "\u04D5"
+
+script_to_roman:
+  map:
+    "\u04D4": "\u00C6"
+    "\u04D5": "\u00E6"

+ 23 - 0
scriptshifter/tables/data/romani_cyrillic.yml

@@ -0,0 +1,23 @@
+general:
+  name: Romani (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "GH": "\u0490"
+    "Gh": "\u0490"
+    "gh": "\u0491"
+    "R\u0326": "\u0420\u0306"
+    "r\u0326": "\u0440\u0306"
+    "R\u0327": "\u0420\u0306"
+    "r\u0327": "\u0440\u0306"
+
+script_to_roman:
+  map:
+    "\u0420\u0306": "R\u0327"
+    "\u0440\u0306": "r\u0327"
+    "\u0490": "Gh"
+    "\u0491": "gh"
+    "\u0492": "Gh"
+    "\u0493f": "gh"

+ 28 - 0
scriptshifter/tables/data/shor_cyrillic.yml

@@ -0,0 +1,28 @@
+general:
+  name: Shor (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A4"
+    "N\uFE20g\uFE21": "\u04A4"
+    "n\uFE20g\uFE21": "\u04A5"
+    "C\u0307H": "\u0408"
+    "C\u0307h": "\u0408"
+    "c\u0307h": "\u0458"
+    "O\u0307": "\u04E6"
+    "o\u0307": "\u04E7"
+    "U\u0307": "\u04F0"
+    "u\u0307": "\u04F1"
+
+script_to_roman:
+  map:
+    "\u0408": "C\u0307h"
+    "\u0458": "c\u0307h"
+    "\u04A4": "N\uFE20G\uFE21"
+    "\u04A5": "n\uFE20g\uFE21"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04F0": "U\u0307"
+    "\u04F1": "u\u0307"

+ 44 - 0
scriptshifter/tables/data/syriac_cyrillic.yml

@@ -0,0 +1,44 @@
+general:
+  name: Syriac (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "I\u0310": "\u0408"
+    "i\u0310": "\u0458"
+    "T\u0307": "\u0422"
+    "t\u0307": "\u0442"
+    "D": "\u0500"
+    "d": "\u0501"
+    "L": "L"
+    "l": "l"
+    "Q": "\u051A"
+    "q": "\u051B"
+    "S": "\u0405"
+    "s": "\u0455"
+
+script_to_roman:
+  map:
+    "L": "L"
+    "l": "l"
+    "\u0405": "S"
+    "\u0408": "I\u0310"
+    "\u041B": "L"
+    "\u0422": "T\u0307"
+    "\u043B": "l"
+    "\u0442": "t\u0307"
+    "\u0455": "s"
+    "\u0458": "i\u0310"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u0500": "D"
+    "\u0501": "d"
+    "\u051A": "Q"
+    "\u051B": "q"

+ 9 - 14
scriptshifter/tables/data/tajik.yml → scriptshifter/tables/data/tajik_cyrillic.yml

@@ -1,40 +1,35 @@
 general:
-  name: tajik (Cyrillic)
+  name: Tajik (Cyrillic)
   parents:
     - _cyrillic_base
 
 roman_to_script:
   map:
-    # Included to handle typos
     "GH": "\u0492"
     "Gh": "\u0492"
     "gh": "\u0493"
-    # Included to handle typos
-    "gH": "\u0493"
+    "H\u0307": "\u04B2"
+    "h\u0307": "\u04B3"
     "I\u0304": "\u04E2"
     "i\u0304": "\u04E3"
-    "Q": "\u049A"
-    "q": "\u049B"
     "U\u0304": "\u04EE"
     "u\u0304": "\u04EF"
-    "H\u0307": "\u04B2"
-    "h\u0307": "\u04B3"
     "J": "\u04B6"
     "j": "\u04B7"
+    "Q": "\u049A"
+    "q": "\u049B"
 
 script_to_roman:
   map:
     "\u0492": "Gh"
     "\u0493": "gh"
-    "\u04E2": "I\u0304"
-    "\u04E3": "i\u0304"
     "\u049A": "Q"
     "\u049B": "q"
-    "\u04A2": "N\uFE20G\uFE21"
-    "\u04A3": "n\uFE20g\uFE21"
-    "\u04EE": "U\u0304"
-    "\u04EF": "u\u0304"
     "\u04B2": "H\u0307"
     "\u04B3": "h\u0307"
     "\u04B6": "J"
     "\u04B7": "j"
+    "\u04E2": "I\u0304"
+    "\u04E3": "i\u0304"
+    "\u04EE": "U\u0304"
+    "\u04EF": "u\u0304"

+ 35 - 0
scriptshifter/tables/data/tatar-kryashen_cyrillic.yml

@@ -0,0 +1,35 @@
+general:
+  name: Tatar-Kryashen (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A4"
+    "N\uFE20g\uFE21": "\u04A4"
+    "n\uFE20g\uFE21": "\u04A5"
+    "A\u0306": "\u04D2"
+    "a\u0306": "\u04D3"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "O\u0307": "\u04E6"
+    "o\u0307": "\u04E7"
+    "U\u0307": "\u04F0"
+    "u\u0307": "\u04F1"
+    "J": "\u0496"
+    "j": "\u0497"
+
+script_to_roman:
+  map:
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u04A4": "N\uFE20G\uFE21"
+    "\u04A5": "n\uFE20g\uFE21"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04D2": "A\u0306"
+    "\u04D3": "a\u0306"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04F0": "U\u0307"
+    "\u04F1": "u\u0307"

+ 35 - 0
scriptshifter/tables/data/tatar_cyrillic.yml

@@ -0,0 +1,35 @@
+general:
+  name: Tatar (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "J": "\u0496"
+    "j": "\u0497"
+
+script_to_roman:
+  map:
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 7 - 10
scriptshifter/tables/data/turkmen.yml → scriptshifter/tables/data/turkmen_cyrillic.yml

@@ -1,24 +1,21 @@
 general:
-  name: turkmen (Cyrillic)
+  name: Turkmen (Cyrillic)
   parents:
     - _cyrillic_base
 
 roman_to_script:
   map:
-    "J": "\u0496"
-    "j": "\u0497"
     "N\uFE20G\uFE21": "\u04A2"
-    # Included to handle typos
     "N\uFE20g\uFE21": "\u04A2"
     "n\uFE20g\uFE21": "\u04A3"
-    # Included to handle typos
-    "n\uFE20G\uFE21": "\u04A3"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
     "O\u0307": "\u04E8"
     "o\u0307": "\u04E9"
     "U\u0307": "\u04AE"
     "u\u0307": "\u04AF"
-    "A\u0306": "\u04D8"
-    "a\u0306": "\u04D9"
+    "J": "\u0496"
+    "j": "\u0497"
 
 script_to_roman:
   map:
@@ -26,9 +23,9 @@ script_to_roman:
     "\u0497": "j"
     "\u04A2": "N\uFE20G\uFE21"
     "\u04A3": "n\uFE20g\uFE21"
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
     "\u04AE": "U\u0307"
     "\u04AF": "u\u0307"
     "\u04D8": "A\u0306"
     "\u04D9": "a\u0306"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 27 - 0
scriptshifter/tables/data/tuvinian_cyrillic.yml

@@ -0,0 +1,27 @@
+general:
+  name: Tuvinian (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+
+script_to_roman:
+  map:
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 32 - 0
scriptshifter/tables/data/udmurt_cyrillic.yml

@@ -0,0 +1,32 @@
+general:
+  name: Udmurt (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "D\uFE20Z\uFE21": "\u04DE"
+    "D\uFE20z\uFE21": "\u04DE"
+    "d\uFE20z\uFE21": "\u04DF"
+    "I\u0308": "\u04E4"
+    "i\u0308": "\u04E5"
+    "O\u0307": "\u04E6"
+    "o\u0307": "\u04E7"
+    "C\u0307H": "\u04F4"
+    "C\u0307h": "\u04F4"
+    "c\u0307h": "\u04F5"
+    "J": "\u04DC"
+    "j": "\u04DD"
+
+script_to_roman:
+  map:
+    "\u04DC": "J"
+    "\u04DD": "j"
+    "\u04DE": "D\uFE20Z\uFE21"
+    "\u04DF": "d\uFE20z\uFE21"
+    "\u04E4": "I\u0308"
+    "\u04E5": "i\u0308"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04F4": "C\u0307h"
+    "\u04F5": "c\u0307h"

+ 46 - 0
scriptshifter/tables/data/uighur_cyrillic.yml

@@ -0,0 +1,46 @@
+general:
+  name: Uighur (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "GH": "\u0492"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "J": "\u0496"
+    "j": "\u0497"
+    "Q": "\u049A"
+    "q": "\u049B"
+
+script_to_roman:
+  map:
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u049A": "Q"
+    "\u049B": "q"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04C3": "Q"
+    "\u04C4": "q"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"

+ 9 - 14
scriptshifter/tables/data/uzbek.yml → scriptshifter/tables/data/uzbek_cyrillic.yml

@@ -1,34 +1,29 @@
 general:
-  name: uzbek (Cyrillic)
+  name: Uzbek (Cyrillic)
   parents:
     - _cyrillic_base
 
 roman_to_script:
   map:
-    # Included to handle typos
+    "H\u0307": "\u04B2"
+    "h\u0307": "\u04B3"
+    "U\u0306": "\u040E"
+    "u\u0306": "\u045E"
     "GH": "\u0492"
     "Gh": "\u0492"
     "gh": "\u0493"
-    # Included to handle typos
-    "gH": "\u0493"
     "Q": "\u049A"
     "q": "\u049B"
-    "U\u0306": "\u040E"
-    "u\u0306": "\u045E"
-    "H\u0307": "\u04B2"
-    "h\u0307": "\u04B3"
 
 script_to_roman:
   map:
+    "\u040E": "U\u0306"
+    "\u045E": "u\u0306"
     "\u0492": "Gh"
     "\u0493": "gh"
     "\u049A": "Q"
     "\u049B": "q"
-    # Included to normalize alternate character
-    "\u04C3": "Q"
-    # Included to normalize alternate character
-    "\u04C4": "q"
-    "\u040E": "U\u0306"
-    "\u045E": "u\u0306"
     "\u04B2": "H\u0307"
     "\u04B3": "h\u0307"
+    "\u04C3": "Q"
+    "\u04C4": "q"

+ 58 - 0
scriptshifter/tables/data/yakut_cyrillic.yml

@@ -0,0 +1,58 @@
+general:
+  name: Yakut (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A4"
+    "N\uFE20g\uFE21": "\u04A4"
+    "n\uFE20g\uFE21": "\u04A5"
+    "D\u0301": "\u040F"
+    "d\u0301": "\u045F"
+    "E\u0307": "\u04D2"
+    "e\u0307": "\u04D3"
+    "GH": "\u0494"
+    "Gh": "\u0494"
+    "gh": "\u0495"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "I\u0310": "\u0408"
+    "i\u0310": "\u0458"
+    "N\u0301": "\u041D\u0313"
+    "n\u0301": "\u045D\u0313"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "L": "\u004C"
+    "l": "\u006C"
+
+script_to_roman:
+  map:
+    "\u041D\u0313": "N\u0301"
+    "\u045D\u0313": "n\u0301"
+    "\u004C": "L"
+    "\u006C": "l"
+    "\u0408": "I\u0310"
+    "\u040F": "D\u0301"
+    "\u0458": "i\u0310"
+    "\u045F": "d\u0301"
+    "\u0494": "Gh"
+    "\u0495": "gh"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04A4": "N\uFE20G\uFE21"
+    "\u04A5": "n\uFE20g\uFE21"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04D2": "E\u0307"
+    "\u04D3": "e\u0307"
+    "\u04E6": "O\u0307"
+    "\u04E7": "o\u0307"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04F0": "U\u0307"
+    "\u04F1": "u\u0307"

+ 36 - 0
scriptshifter/tables/data/yuit_cyrillic.yml

@@ -0,0 +1,36 @@
+general:
+  name: Yuit (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u041D\u0027"
+    "N\uFE20g\uFE21": "\u041D\u0027"
+    "n\uFE20g\uFE21": "\u043D\u0027"
+    "GH": "\u0413\u0027"
+    "Gh": "\u0413\u0027"
+    "gh": "\u0433\u0027"
+    "H\u0307": "\u0425\u0027"
+    "h\u0307": "\u0445\u0027"
+    "L\u0301": "\u041B\u0027"
+    "l\u0301": "\u043B\u0027"
+    "Q": "\u041A\u0027"
+    "q": "\u043A\u0027"
+    "W": "\u040E"
+    "w": "\u045E"
+
+script_to_roman:
+  map:
+    "\u0413\u0027": "Gh"
+    "\u041A\u0027": "Q"
+    "\u041B\u0027": "L\u0301"
+    "\u041D\u0027": "N\uFE20G\uFE21"
+    "\u0425\u0027": "H\u0307"
+    "\u0433\u0027": "gh"
+    "\u043A\u0027": "q"
+    "\u043B\u0027": "l\u0301"
+    "\u043D\u0027": "n\uFE20g\uFE21"
+    "\u0445\u0027": "h\u0307"
+    "\u040E": "W"
+    "\u045E": "w"

+ 1 - 1
tests/data/script_samples/cyrillic.csv

@@ -10,7 +10,7 @@ kazakh,"А а, Ә ә Б б, В в, Г г, Ғ ғ, Д д, Е е, Ё ё, Ж ж, З
 ,Ә'франдинед нвиск'аред к'öрдед Әрмәнистанейә Советие,E'frandinêd nvîsk'arêd k'urdêd Ermenîstanêye Sovêtiê,,
 kyrgyz,Кыргызстандын тарыхы : байыркы мезгилден букунгу кунгө чейин : үч томдук / башкы ред. А. Джуманалиев [and nine others].,Kyrgyzstandyn tarykhy : baĭyrky mezgilden bukungu kungȯ cheĭin : u̇ch tomduk / bashky red. A. Dzhumanaliev [and nine others].,,
 kyrgyz,"А а, Б б, В в, Г г, Д д, Е е, Ё ё, Ж ж, З з, И и, Й й, К к, Л л, М м, Н н, Ң ң, О о, Ө ө, П п, Р р, С с, Т т, У у, Ү ү, Ф ф, Х х, Ц ц, Ч ч, Ш ш, Щ щ, Ъ ъ, Ы ы, Ь ь, Э э, Ю ю, Я я","A a, B b, V v, G g, D d, E e, Ë ë, Zh zh, Z z, I i, Ĭ ĭ, K k, L l, M m, N n, N︠G︡ n︠g︡, O o, Ȯ ȯ, P p, R r, S s, T t, U u, U̇ u̇, F f, Kh kh, T︠S︡ t︠s︡, Ch ch, Sh sh, Shch shch, ʺ̳ ʺ, Y y,  ʹ̳ ʹ, Ė ė, I︠U︡ i︠u︡, I︠A︡ i︠a︡",,
-serbian_macedonian,Облици на моќ : вистината за Македонија / Георги (Џорџ) Бранов,Oblici na moḱ : vistinata za Makedonija / Georgi (D︠ž︡ord︠ž︡) Branov,,
+serbian,Облици на моќ : вистината за Македонија / Георги (Џорџ) Бранов,Oblici na moḱ : vistinata za Makedonija / Georgi (D︠ž︡ord︠ž︡) Branov,,
 mongolian,Дайчин гүрний үеийн олон хэлний үсэг хавсарсан сурвалж бичгийн судлал.Тываның төөгүзү / Салчак Тока. Лодон багшын дэбтэрһээ.,Daĭchin gu̇rniĭ u̇eiĭn olon khėlniĭ u̇sėg khavsarsan survalzh bichgiĭn sudlal. Tyvanyn︠g︡ tȯȯgu̇zu̇ / Salchak Toka. Lodon bagshyn dėbtėrḣėė.,,
 mongolian,"А а, Б б, В в, Г г, Д д, Е е, Ё ё, Ж ж, З з, И и, Й й, К к, Л л, М м, Н н, О о, Ө ө, П п, Р р, С с, Т т, У у, Ү ү, Ф ф, Х х, Һ һ, Ц ц, Ч ч, Ш ш, Щ щ, Ъ ъ, Ы ы, Ь ь, Э э, Ю ю, Я я","A a, B b, V v, G g, D d, E e, Ë ë, Zh zh, Z z, I i, Ĭ ĭ, K k, L l, M m, N n, O o, Ȯ ȯ, P p, R r, S s, T t, U u, U̇ u̇, F f, Kh kh, Ḣ ḣ, T︠S︡ t︠s︡, Ch ch, Sh sh, Shch shch, ʺ̳ ʺ, Y y,  ʹ̳ ʹ, Ė ė, I︠U︡ i︠u︡, I︠A︡ i︠a︡",,
 russian,"Священный мусор : поднимаясь по лестнице Якова : [рассказы, эссе, интервью]","Svi︠a︡shchennyĭ musor : podnimai︠a︡sʹ po lestnit︠s︡e I︠A︡kova : [rasskazy, ėsse, intervʹi︠u︡]",,"Shall we normalize the R2S output (as well as the expected test strings) so that we consistently output either only pre-combined characters, or only combining characters separated?"