瀏覽代碼

Merge branch 'main' into randy_2025_03_index

Matt Miller 2 月之前
父節點
當前提交
6cf69f15f8
共有 59 個文件被更改,包括 1583 次插入1168 次删除
  1. 3 3
      doc/supported_scripts.md
  2. 0 133
      scriptshifter/tables/data/_cyrillic_base.yml
  3. 22 17
      scriptshifter/tables/data/_ignore_base.yml
  4. 1 1
      scriptshifter/tables/data/abkhaz_cyrillic.yml
  5. 1 1
      scriptshifter/tables/data/altai_cyrillic.yml
  6. 7 4
      scriptshifter/tables/data/arabic.yml
  7. 1 1
      scriptshifter/tables/data/azerbaijani_cyrillic.yml
  8. 1 1
      scriptshifter/tables/data/bashkir_cyrillic.yml
  9. 1 1
      scriptshifter/tables/data/belarusian.yml
  10. 18 38
      scriptshifter/tables/data/bulgarian.yml
  11. 1 2
      scriptshifter/tables/data/buriat_cyrillic.yml
  12. 1 1
      scriptshifter/tables/data/chukchi_cyrillic.yml
  13. 1 1
      scriptshifter/tables/data/church_slavonic.yml
  14. 1 1
      scriptshifter/tables/data/chuvash_cyrillic.yml
  15. 705 700
      scriptshifter/tables/data/cyrillic_generic.yml
  16. 1 1
      scriptshifter/tables/data/dungan_cyrillic.yml
  17. 1 1
      scriptshifter/tables/data/even-evenki_cyrillic.yml
  18. 1 1
      scriptshifter/tables/data/gagauz_cyrillic.yml
  19. 207 209
      scriptshifter/tables/data/greek_classical.yml
  20. 18 0
      scriptshifter/tables/data/greek_modern.yml
  21. 1 1
      scriptshifter/tables/data/kalmyk_cyrillic.yml
  22. 1 1
      scriptshifter/tables/data/kara-kalpak_cyrillic.yml
  23. 1 1
      scriptshifter/tables/data/karachay-balkar_cyrillic.yml
  24. 1 1
      scriptshifter/tables/data/karelian_cyrillic.yml
  25. 1 1
      scriptshifter/tables/data/kazakh_cyrillic.yml
  26. 1 1
      scriptshifter/tables/data/khakass_cyrillic.yml
  27. 1 1
      scriptshifter/tables/data/khanty_cyrillic.yml
  28. 1 1
      scriptshifter/tables/data/komi_cyrillic.yml
  29. 1 1
      scriptshifter/tables/data/koryak_cyrillic.yml
  30. 1 1
      scriptshifter/tables/data/kyrgyz_cyrillic.yml
  31. 1 1
      scriptshifter/tables/data/lithuanian_cyrillic.yml
  32. 62 16
      scriptshifter/tables/data/macedonian.yml
  33. 191 0
      scriptshifter/tables/data/manchu.yml
  34. 1 1
      scriptshifter/tables/data/mansi_cyrillic.yml
  35. 1 1
      scriptshifter/tables/data/mari_cyrillic.yml
  36. 1 1
      scriptshifter/tables/data/moldovan_cyrillic.yml
  37. 1 1
      scriptshifter/tables/data/mongolian_cyrillic.yml
  38. 1 1
      scriptshifter/tables/data/mordvin_cyrillic.yml
  39. 1 1
      scriptshifter/tables/data/nenets_cyrillic.yml
  40. 1 1
      scriptshifter/tables/data/ossetic_cyrillic.yml
  41. 1 1
      scriptshifter/tables/data/romani_cyrillic.yml
  42. 1 1
      scriptshifter/tables/data/russian.yml
  43. 1 1
      scriptshifter/tables/data/serbian.yml
  44. 1 1
      scriptshifter/tables/data/shor_cyrillic.yml
  45. 1 1
      scriptshifter/tables/data/syriac_cyrillic.yml
  46. 1 1
      scriptshifter/tables/data/tajik_cyrillic.yml
  47. 1 1
      scriptshifter/tables/data/tatar-kryashen_cyrillic.yml
  48. 1 1
      scriptshifter/tables/data/tatar_cyrillic.yml
  49. 275 0
      scriptshifter/tables/data/tod_mongolian.yml
  50. 1 1
      scriptshifter/tables/data/turkmen_cyrillic.yml
  51. 1 1
      scriptshifter/tables/data/tuvinian_cyrillic.yml
  52. 1 1
      scriptshifter/tables/data/udmurt_cyrillic.yml
  53. 1 1
      scriptshifter/tables/data/uighur_cyrillic.yml
  54. 1 1
      scriptshifter/tables/data/ukrainian.yml
  55. 1 1
      scriptshifter/tables/data/uzbek_cyrillic.yml
  56. 1 1
      scriptshifter/tables/data/yakut_cyrillic.yml
  57. 1 1
      scriptshifter/tables/data/yuit_cyrillic.yml
  58. 16 0
      scriptshifter/tables/index.yml
  59. 13 1
      scriptshifter/trans.py

+ 3 - 3
doc/supported_scripts.md

@@ -63,7 +63,7 @@ third-party library.
 |  [macedonian](../scriptshifter/tables/data/macedonian.yml)  |  Macedonian  |  Y  |  Y  |  stable  |  
 |  [malayalam](../scriptshifter/tables/data/malayalam.yml)  |  Malayalam  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [mansi_cyrillic](../scriptshifter/tables/data/mansi_cyrillic.yml)  |  Mansi (Cyrillic)  |  Y  |  Y  |  stable  |  
-|  [marathi](../scriptshifter/tables/data/marathi.yml)  |  Marathi  |  Y  |  Y  |    |  s-to-r lacks capitalization
+|  [marathi](../scriptshifter/tables/data/marathi_devanagari.yml)  |  Marathi  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [mari_cyrillic](../scriptshifter/tables/data/mari_cyrillic.yml)  |  Mari (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [moldovan_cyrillic](../scriptshifter/tables/data/moldovan_cyrillic.yml)  |  Moldovan (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [mongolian_cyrillic](../scriptshifter/tables/data/mongolian_cyrillic.yml)  |  Mongolian (Cyrillic)  |  Y  |  Y  |  stable  |  
@@ -86,13 +86,13 @@ third-party library.
 |  [sanskrit_devanagari](../scriptshifter/tables/data/sanskrit_devanagari.yml)  |  Sanskrit (Devanagari)  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [serbian](../scriptshifter/tables/data/serbian.yml)  |  Serbian  |  Y  |  Y  |  stable  |  
 |  [shor_cyrillic](../scriptshifter/tables/data/shor_cyrillic.yml)  |  Shor (Cyrillic)  |  Y  |  Y  |  stable  |  
-|  [sinhalese_sinhala](../scriptshifter/tables/data/sinhalese_sinhala.yml)  |  Sinhalese (Sinhala)  |  Y  |  Y  |    |  s-to-r lacks capitalization
+|  [sinhalese_sinhala](../scriptshifter/tables/data/sinhalese.yml)  |  Sinhalese (Sinhala)  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [syriac_cyrillic](../scriptshifter/tables/data/syriac_cyrillic.yml)  |  Syriac (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [tajik_cyrillic](../scriptshifter/tables/data/tajik_cyrillic.yml)  |  Tajik (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [tamil](../scriptshifter/tables/data/tamil.yml)  |  Tamil  |  Y  |  Y  |  beta  |  
 |  [tamil_brahmi](../scriptshifter/tables/data/tamil_brahmi.yml)  |  Tamil Brahmi  |  Y  |  Y  |    |  
 |  [tamil_extended](../scriptshifter/tables/data/tamil_extended.yml)  |  Tamil (extended)  |  Y  |  Y  |    |  
-|  [tatar-kryashen_cyrillic](../scriptshifter/tables/data/tatar.yml)  |  Tatar-Kryashen (Cyrillic)  |  Y  |  Y  |  stable  |  
+|  [tatar-kryashen_cyrillic](../scriptshifter/tables/data/tatar-kryashen_cyrillic.yml)  |  Tatar-Kryashen (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [tatar_cyrillic](../scriptshifter/tables/data/tatar_cyrillic.yml)  |  Tatar (Cyrillic)  |  Y  |  Y  |  stable  |  
 |  [telugu](../scriptshifter/tables/data/telugu.yml)  |  Telugu  |  Y  |  Y  |    |  s-to-r lacks capitalization
 |  [thai](../scriptshifter/tables/data/thai.yml)  |  Thai  |  Y  |  Y  |    |  

+ 0 - 133
scriptshifter/tables/data/_cyrillic_base.yml

@@ -1,133 +0,0 @@
-general:
-  name: Cyrillic base
-  parents:
-    - _ignore_base
-  notes: >
-    copied from Russian .cfg file and stripped
-    off language-specific tokens. Russian ignore list
-    has been left here on purpose, assuming it's valid
-    for all child languages.
-
-roman_to_script:
-  map:
-    "A": "\u0410"
-    "a": "\u0430"
-    "B": "\u0411"
-    "b": "\u0431"
-    "V": "\u0412"
-    "v": "\u0432"
-    "D": "\u0414"
-    "d": "\u0434"
-    "E": "\u0415"
-    "e": "\u0435"
-    # this conversion shouldn't be needed, but does no harm
-    "Z": "\u0417"
-    "z": "\u0437"
-    "I\u0306": "\u0419"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20U\uFE21": "\u042E"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20u\uFE21": "\u042E"
-    "I\uFE20A\uFE21": "\u042F"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20a\uFE21": "\u042F"
-    "i\u0306": "\u0439"
-    "i\uFE20u\uFE21": "\u044E"
-    "i\uFE20a\uFE21": "\u044F"
-    # this conversion shouldn't be needed, but does no harm
-    "KH": "\u0425"
-    "Kh": "\u0425"
-    "K": "\u041A"
-    "kh": "\u0445"
-    "k": "\u043A"
-    "L": "\u041B"
-    "l": "\u043B"
-    "M": "\u041C"
-    "m": "\u043C"
-    "N": "\u041D"
-    "n": "\u043D"
-    "O": "\u041E"
-    "o": "\u043E"
-    "P": "\u041F"
-    "p": "\u043F"
-    "R": "\u0420"
-    "r": "\u0440"
-    # this conversion shouldn't be needed, but does no harm
-    # this conversion shouldn't be needed, but does no harm
-    "SH": "\u0428"
-    "Sh": "\u0428"
-    "S": "\u0421"
-    "sh": "\u0448"
-    "s": "\u0441"
-    # this conversion shouldn't be needed, but does no harm
-    "T": "\u0422"
-    "t": "\u0442"
-    "U": "\u0423"
-    "u": "\u0443"
-    "F": "\u0424"
-    "f": "\u0444"
-    # this conversion shouldn't be needed, but does no harm
-    "CH": "\u0427"
-    "Ch": "\u0427"
-    "ch": "\u0447"
-    # this conversion shouldn't be needed, but does no harm
-    "\uFE20": ""
-    # this conversion shouldn't be needed, but does no harm
-    "\uFE21": ""
-    # this conversion is ambiguous - \u042C is also theoretically possible
-    "\u02B9": "\u044C"
-
-script_to_roman:
-  map:
-    "\u0404": "I\uFE20E\uFE21"
-    "\u0407": "I\u0308"
-    "\u0410": "A"
-    "\u0411": "B"
-    "\u0412": "V"
-    "\u0414": "D"
-    "\u0415": "E"
-    "\u0417": "Z"
-    "\u0419": "I\u0306"
-    "\u041A": "K"
-    "\u041B": "L"
-    "\u041C": "M"
-    "\u041D": "N"
-    "\u041E": "O"
-    "\u041F": "P"
-    "\u0420": "R"
-    "\u0421": "S"
-    "\u0422": "T"
-    "\u0423": "U"
-    "\u0424": "F"
-    "\u0425": "Kh"
-    "\u0427": "Ch"
-    "\u0428": "Sh"
-    "\u0429": "Shch"
-    "\u042C": "\u02B9"
-    "\u042E": "I\uFE20U\uFE21"
-    "\u042F": "I\uFE20A\uFE21"
-    "\u0430": "a"
-    "\u0431": "b"
-    "\u0432": "v"
-    "\u0434": "d"
-    "\u0435": "e"
-    "\u0437": "z"
-    "\u0439": "i\u0306"
-    "\u043A": "k"
-    "\u043B": "l"
-    "\u043C": "m"
-    "\u043D": "n"
-    "\u043E": "o"
-    "\u043F": "p"
-    "\u0440": "r"
-    "\u0441": "s"
-    "\u0442": "t"
-    "\u0443": "u"
-    "\u0444": "f"
-    "\u0445": "kh"
-    "\u0447": "ch"
-    "\u0448": "sh"
-    "\u0449": "shch"
-    "\u044C": "\u02B9"
-    "\u044E": "i\uFE20u\uFE21"
-    "\u044F": "i\uFE20a\uFE21"

+ 22 - 17
scriptshifter/tables/data/_ignore_base.yml

@@ -9,6 +9,12 @@ roman_to_script:
     - "date of publication not identified"
     - "place of publication not identified"
     - "publisher not identified"
+    - "and one other"
+    - "et al."
+  ignore_ptn:
+    - "and ([a-z0-9]+ )?others"
+
+    # Incorrectly entered (but frequently found) Roman numerals.
     # NOTE There is ambiguity about ignoring these
     # words. Note that the single-character Roman
     # numerals are not included on purpose.
@@ -16,25 +22,24 @@ roman_to_script:
     # dedicated U+2160÷U+216F (uppercase Roman
     # numerals) and/or U+2170÷U+217F (lower case Roman
     # numerals) ranges to avoid this ambiguity.
-    - "and one other"
-    - "et al."
-  ignore_ptn:
-    - "and ([a-z0-9]+ )?others"
-    - "I{2,3}"
-    - "I(V|X)"
-    - "LI{,3}"
-    - "LI?(V|X)"
-    - "L(V|X{1,3})I{,3}"
-    - "LX{1,3}I?V"
-    - "LX{1,3}VI{,3}"
-    - "(V|X{1,3})I{,3}"
-    - "X{1,3}I{,3}"
-    - "X{1,3}I(V|X)"
-    - "X{1,3}VI{,3}"
-    - "[\u2021$][0-9a-z] *"
+    - "\\bI{2,3}\\b"
+    - "\\bI(V|X)\\b"
+    - "\\bLI{,3}\\b"
+    - "\\bLI?(V|X)\\b"
+    - "\\bL(V|X{1,3})I{,3}\\b"
+    - "\\bLX{1,3}I?V\\b"
+    - "\\bLX{1,3}VI{,3}\\b"
+    - "\\b(V|X{1,3})I{,3}\\b"
+    - "\\bX{1,3}I{,3}\\b"
+    - "\\bX{1,3}I(V|X)\\b"
+    - "\\bX{1,3}VI{,3}\\b"
+
+    # MARC sub-field markers.
+    - "\\b[\u2021$][0-9a-z]\\b"
 
 script_to_roman:
   ignore:
     - " "
   ignore_ptn:
-    - "[\u2021$][0-9a-z] *"
+    # MARC sub-field markers.
+    - "\\b[\u2021$][0-9a-z]\\b"

+ 1 - 1
scriptshifter/tables/data/abkhaz_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Abkhaz (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/altai_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Altai (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 7 - 4
scriptshifter/tables/data/arabic.yml

@@ -15,11 +15,12 @@ general:
 roman_to_script:
   map:
 
-    # Original table by David Bucknum
-    # Last updated 25 January 2019
+    # Original table by David Bucknum, 5 April 2010
+    # Updated, 25 January 2019
     # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
     # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
     # Scripts Conceptually"
+    # Updated, 26 March 2025 by Randall K. Barry to reverse truncation marks for ScriptShifter
 
 
     # Punctuation marks:
@@ -135,11 +136,11 @@ roman_to_script:
     "fi\u0304-": "\u0641\u064A"
     "ka-": "\u0643"
 
-    # Vowels and vowel/consonant combinations
+    # Vowels and vowel/consonant combinations - ta-marbutah at end of word
     "ah%": "\u0629"
     "at%": "\u0629"
 
-    # tanwin
+    # tanwin at end of word
     "an%": "\u0627"
 
     # ayn-alif combo
@@ -149,6 +150,8 @@ roman_to_script:
     "\u02BBA\u0304": "\u0639\u0627"
     "\u02BBa\u0304": "\u0639\u0627"
 
+    "\u02BBI\u0304Y": "\u0639\u064A"
+    "\u02BBi\u0304y": "\u0639\u064A"
     "\u02BBI\u0304": "\u0639\u064A"
     "\u02BBi\u0304": "\u0639\u064A"
 

+ 1 - 1
scriptshifter/tables/data/azerbaijani_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Azerbaijani (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/bashkir_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Bashkir (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/belarusian.yml

@@ -1,7 +1,7 @@
 general:
   name: Belarusian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 18 - 38
scriptshifter/tables/data/bulgarian.yml

@@ -1,58 +1,38 @@
 general:
   name: Bulgarian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:
-    "G": "\u0413"
-    "g": "\u0433"
-    # this conversion shouldn't be needed, but does no harm
-    "ZH": "\u0416"
-    "Zh": "\u0416"
-    "zh": "\u0436"
-    "I\uFE20E\uFE21": "\u0462"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20e\uFE21": "\u0462"
-    # this conversion shouldn't be needed, but does no harm
-    # this conversion shouldn't be needed, but does no harm
-    "I": "\u0418"
-    "i\uFE20e\uFE21": "\u0463"
-    "i": "\u0438"
-    # this conversion shouldn't be needed, but does no harm
     "SHT": "\u0429"
     "Sht": "\u0429"
     "sht": "\u0449"
-    "T\uFE20S\uFE21": "\u0426"
-    # this conversion shouldn't be needed, but does no harm
-    "T\uFE20s\uFE21": "\u0426"
-    "t\uFE20s\uFE21": "\u0446"
-    "U\u0310": "\u046A"
+    "U\u0306": "\u042A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u016C": "\u042A"
     "u\u0306": "\u044A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u016D": "\u044A"
+    "U\u0310": "\u046A"
     "u\u0310": "\u046B"
     # this conversion is ambiguous - \u042A is also theoretically possible
     "\u02BA": "\u044A"
+    # upper case hard sign is unlikely to occur
+    "\u02BA\u0332": "\u042A"
 
 script_to_roman:
   map:
-    "\u044C": ""
-    "\u042C": ""
-    "\u044A": ""
-    "\u042A%": ""  # Final
-    "\u042A": "u\u0306"
-    "\u0413": "G"
-    "\u0433": "g"
-    "\u0416": "Zh"
-    "\u0436": "zh"
-    "\u0462": "I\uFE20E\uFE21"
-    "\u0418": "I"
-    "\u0463": "i\uFE20e\uFE21"
-    "\u0438": "i"
     "\u0429": "Sht"
+    "\u042A": "U\u0306"
+    # Capital letter hard sign at the end of a word (rare)
+    "\u042A%": "\u02BA\u0332"
+    "\u042C": "\u02B9\u0332"
     "\u0449": "sht"
-    "\u0426": "T\uFE20S\uFE21"
-    "\u0446": "t\uFE20s\uFE21"
+    "\u044A": "u\u0306"
+    # Small letter hard sign at the end of a word (rare)
+    "\u044A%": "\u02BA"
+    "\u044C": "\u02B9"
     "\u046A": "U\u0310"
     "\u046B": "u\u0310"
-    "\u042A": "u\u016C"
-    "\u044A": "u\u016D"
+    

+ 1 - 2
scriptshifter/tables/data/buriat_cyrillic.yml

@@ -1,8 +1,7 @@
 general:
   name: Buriat (Cyrillic)
   parents:
-    - _cyrillic_base
-    - _ignore_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/chukchi_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Chukchi (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/church_slavonic.yml

@@ -1,7 +1,7 @@
 general:
   name: Church Slavonic
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/chuvash_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Chuvash (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 705 - 700
scriptshifter/tables/data/asian_cyrillic.yml → scriptshifter/tables/data/cyrillic_generic.yml

@@ -1,700 +1,705 @@
-general:
-  name: Asian (Cyrillic)
-  parents:
-    - _cyrillic_base
-
-# COMMON COMBINING CHARACTERS (always follow a base letter):
-# combining grave \u0300
-# combining acute \u0301
-# combining circumflex \u0302
-# combining tilde \u0303
-# combining macron \u0304
-# combining breve \u0306
-# combining dot above \u0307
-# combining diaeresis \u0308
-# combining ring above \u030A
-# combining double acute \u030B
-# combining caron (hachek) \u030C
-# combining candrabindu \u0310
-# combining dot below \u0323
-# combining dieresis below \u0324
-# combining comma below \u0326 (Romanian, Latvian, Livonian)
-# combining cedilla \u0327 (French, Turkish, Azeri)
-# combining ogonek (hook) \u0328 (Polish, Lithuanian)
-# combining low line \u0332
-# combining double low line \u0333
-# combining left ligature \uFE20 (Cyrillic transliteration)
-# combining right ligature \uFE21 (Cyrillic transliteration)
-# soft sign/prime (spacing) \u02B9(Cyrillic transliteration)
-# hard sign/double prime (spacing) \u02BA (Cyrillic transliteration)
-# ayn(spacing) \u02BB (Semitic and Caucasian languages)
-# alif (spacing) \u02BC (Semitic languages)
-# middle dot (space) \u00B7) (Catalan)
-
-roman_to_script:
-  map:
-    "A\uFE20E\uFE21": "\u04D4"
-    "A\uFE20e\uFE21": "\u04D4"
-    "a\uFE20e\uFE21": "\u04D5"
-    "A\u0306\u0323": "\u04D0"
-    "a\u0306\u0323": "\u04D1"
-    "\u00C6": "\u04D4"
-    "\u00E6": "\u04D5"
-    "A\u0306": "\u04D8"
-    "a\u0306": "\u04D9"
-    "A\u030B": "\u04DA"
-    "a\u030B": "\u04DB"
-    "A\u0308": "\u04D2"
-    "a\u0308": "\u04D3"
-    "A\u0310": "\u0518"
-    "a\u0310": "\u0519"
-    
-    "B": "\u0411"
-    "b": "\u0431"
-    
-    "C\u0301h\u0301": "\u04BE"
-    "c\u0301h\u0301": "\u04BF"
-    "C\u0301h": "\u04BC"
-    "c\u0301h": "\u04BD"
-    "C\u0301": "\u040B"
-    "c\u0301": "\u045B"
-    "C\u0308h": "\u04F4"
-    "c\u0308h": "\u04F5"
-    "C\u0323h": "\u04CB"
-    "c\u0323h": "\u04CC"
-    
-    "D\u0301": "\u0502"
-    "d\u0301": "\u0503"
-    "D\u0307": "\u0500"
-    "d\u0307": "\u0501"
-    "D\uFE20c\uFE21h": "\u052C"
-    "d\uFE20c\uFE21h": "\u052D"
-    "D\uFE20z\uFE21h": "\u052A"
-    "d\uFE20z\uFE21h": "\u052B"
-    "D\uFE20Z\uFE21": "\u04E0"
-    "d\uFE20z\uFE21": "\u04E1"
-    "Dz\u030C": "\u040F"
-    "dz\u030C": "\u045F"
-    "D": "\u0414"
-    "d": "\u0434"
-    
-    "E\u0300": "\u0400"
-    "e\u0300": "\u0450"
-    "E\u0304": "\u0404"
-    "e\u0304": "\u0454"
-    "E\u0306": "\u04D6"
-    "e\u0306": "\u04D7"
-    "E\u0306\u0323": "\u048C"
-    "e\u0306\u0323": "\u048D"
-    "E\u0307": "\u042D"
-    "e\u0307": "\u044D"
-    "E\u0308\u0323": "\u04EC"
-    "e\u0308\u0323": "\u04ED"
-    "E\u0308": "\u0401"
-    "e\u0308": "\u0451"
-    "E\u0328": "\u0466"
-    "e\u0328": "\u0467"
-    
-    "F\u0307": "\u0472"
-    "f\u0307": "\u0473"
-    "F": "\u0424"
-    "f": "\u0444"
-    
-    "Gh\u0327": "\u04FA"
-    "gh\u0327": "\u04FB"
-    "Gh": "\u0492"
-    "gh": "\u0493"
-    "G\u0301": "\u0403"
-    "g\u0301": "\u0453"
-    "G\u0306": "\u0490"
-    "g\u0306": "\u0491"
-    "G\u0307": "\u049C"
-    "g\u0307": "\u049D"
-    "G\u0323": "\u04F6"
-    "g\u0323": "\u04F7"
-    "G\u0327": "\u0494"
-    "g\u0327": "\u0495"
-    
-    "H\u0304": "\u04FE"
-    "h\u0304": "\u04FF"
-    "H\u0327": "\u04FC"
-    "h\u0327": "\u04FD"
-    "H\u0307": "\u04BA"
-    "h\u0307": "\u04BB"
-    "H\u0308": "\u04C0"
-    "h\u0308": "\u04CF"
-    
-    "I\u0300": "\u040D"
-    "i\u0300": "\u045D"
-    "I\u0304\u0323": "\u04E2"
-    "i\u0304\u0323": "\u04E3"
-    "I\u0304": "\u0406"
-    "i\u0304": "\u0456"
-    "I\u0306\u0323": "\u048A"
-    "i\u0306\u0323": "\u048B"
-    "I\u0306": "\u0419"
-    "i\u0306": "\u0439"
-    "I\u0308\u0323": "\u04E4"
-    "i\u0308\u0323": "\u04E5"
-    "I\u0308": "\u0407"
-    "i\u0308": "\u0457"
-    "I\u0310": "\u0408"
-    "i\u0310": "\u0458"
-    
-    "I\uFE20A\uFE21": "\u042F"
-    "i\uFE20a\uFE21": "\u044F"
-    "A": "\u0410"
-    "a": "\u0430"
-    
-    "I\uFE20E\uFE21\u0304": "\u0464"
-    "i\uFE20e\uFE21\u0304": "\u0465"
-    "I\uFE20E\uFE21\u0328": "\u0468"
-    "i\uFE20e\uFE21\u0328": "\u0469"
-    "I\uFE20E\uFE21": "\u0462"
-    "i\uFE20e\uFE21": "\u0463"
-    "E": "\u0415"
-    "e": "\u0435"
-    
-    "I\uFE20O\uFE21\u0328": "\u046C"
-    "i\uFE20o\uFE21\u0328": "\u046D"
-    "I\uFE20U\uFE21": "\u042E"
-    "i\uFE20u\uFE21": "\u044E"
-    "I": "\u0418"
-    "i": "\u0438"
-    
-    "J\u0304": "\u04B8"
-    "j\u0304": "\u04B9"
-    "J\u0306": "\u04C1"
-    "j\u0306": "\u04C2"
-    "J\u0302": "\u04B6"
-    "j\u0302": "\u04B7"
-    "J\u0308": "\u04DC"
-    "j\u0308": "\u04DD"
-    
-    "K\u0300": "\u051E"
-    "k\u0300": "\u051F"
-    "K\u0301": "\u040C"
-    "k\u0301": "\u045C"
-    "K\uFE20H\uFE21": "\u04B2"
-    "k\uFE20h\uFE21": "\u04B3"
-    "Kh": "\u0425"
-    "kh": "\u0445"
-    "K\uFE20S\uFE21": "\u046E"
-    "k\uFE20s\uFE21": "\u046F"
-    "K": "\u041A"
-    "k": "\u043A"
-    
-    "Lj": "\u0409"
-    "lj": "\u0459"
-    "Lkh\u0307": "\u0514"
-    "lkh\u0307": "\u0515"
-    "L\u0301": "\u0508"
-    "l\u0301": "\u0509"
-    "L\u0321": "\u04C5"
-    "l\u0326": "\u04C6"
-    "L\u0323": "\u052E"
-    "l\u0323": "\u052F"
-    "L\u0327": "\u0512"
-    "l\u0327": "\u0513"
-    "L\u0324": "\u0520"
-    "l\u0324": "\u0521"
-    "L": "\u041B"
-    "l": "\u043B"
-    
-    "M\u0323": "\u04CD"
-    "m\u0323": "\u04CE"
-    "M": "\u041C"
-    "m": "\u043C"
-    
-    "Nj": "\u040A"
-    "nj": "\u045A"
-    "N\u0301G\u0300": "\u04A4"
-    "n\u0301g\u0300": "\u04A5"
-    "N\u0301": "\u050A"
-    "n\u0301": "\u050B"
-    "N\u0326": "\u0528"
-    "n\u0326": "\u0529"
-    "N\u0327": "\u0522"
-    "n\u0327": "\u0523"
-    "N\uFE20\u0323G\uFE21": "\u04C9"
-    "n\uFE20\u0323g\uFE21": "\u04CA"
-    "N\uFE20\u0327G\uFE21": "\u04C7"
-    "n\uFE20\u0327g\uFE21": "\u04C8"
-    "N\uFE20G\uFE21": "\u04A2"
-    "n\uFE20g\uFE21": "\u04A3"
-    "No\u0332": "\u2116"
-    "N": "\u041D"
-    "n": "\u043D"
-    
-    "G": "\u0413"
-    "g": "\u0433"
-    
-    "J": "\u0496"
-    "j": "\u0497"
-    
-    "O\u0303": "\u047C"
-    "o\u0303": "\u047D"
-    "O\u0304\u0323": "\u047A"
-    "o\u0304\u0323": "\u047B"
-    "O\u0304\uFE20T\uFE21": "\u047E"
-    "o\u0304\uFE20t\uFE21": "\u047F"
-    "O\u0304\u0324": "\u0460"
-    "o\u0304\u0324": "\u0461"
-    "O\u0304": "\u04EA"
-    "o\u0304": "\u04EB"
-    "O\u0307": "\u04E8"
-    "o\u0307": "\u04E9"
-    "O\u0308": "\u04E6"
-    "o\u0308": "\u04E7"
-    "O\u0328": "\u046A"
-    "o\u0328": "\u046B"
-    "O\uFE20u\uFE21": "\u0478"
-    "o\uFE20u\uFE21": "\u0479"
-    "O": "\u041E"
-    "o": "\u043E"
-    
-    "Ph": "\u04A6"
-    "ph": "\u04A7"
-    "P\u0323": "\u0524"
-    "p\u0323": "\u0525"
-    "P\uFE20S\uFE21": "\u0470"
-    "p\uFE20s\uFE21": "\u0471"
-    "P": "\u041F"
-    "p": "\u043F"
-    
-    "Q\u0300": "\u04A0"
-    "q\u0300": "\u04A1"
-    "Q\u0302": "\u0480"
-    "q\u0302": "\u0481"
-    "Q\u0304": "\u049E"
-    "q\u0304": "\u049F"
-    "Q\u0307": "\u04C3"
-    "q\u0307": "\u04C4"
-    "Q\u0308": "\u051A"
-    "q\u0308": "\u051B"
-    "Q": "\u049A"
-    "q": "\u049B"
-    
-    "Rkh\u0307": "\u0516"
-    "rkh\u0307": "\u0517"
-    "R\u0306": "\u048E"
-    "r\u0306": "\u048F"
-    "R": "\u0420"
-    "r": "\u0440"
-    
-    "Shch": "\u0429"
-    "shch": "\u0449"
-    "Sh\u0323": "\u0526"
-    "sh\u0323": "\u0527"
-    "Sh": "\u0428"
-    "sh": "\u0448"
-    "S\u0301": "\u050C"
-    "s\u0301": "\u050D"
-    "S\u0307": "\u0405"
-    "s\u0307": "\u0455"
-    
-    "Ch": "\u0427"
-    "ch": "\u0447"
-    "C": "\u0426"
-    "c": "\u0446"
-    
-    "Th": "\u04AA"
-    "th": "\u04AB"
-    "T\u0301": "\u050E"
-    "t\u0301": "\u050F"
-    "T\u0327": "\u04AC"
-    "t\u0327": "\u04AD"
-    "T\uFE20H\uFE21": "\u0498"
-    "t\uFE20h\uFE21": "\u0499"
-    "T\uFE20S\uFE21": "\u0426"
-    "t\uFE20s\uFE21": "\u0446"
-    "T\uFE20S\uFE21\u0307": "\u04B4"
-    "t\uFE20s\uFE21\u0307": "\u04B5"
-    
-    "S": "\u0421"
-    "s": "\u0441"
-    
-    "T": "\u0422"
-    "t": "\u0442"
-    
-    "U\u0302": "\u04B0"
-    "u\u0302": "\u04B1"
-    "U\u0304": "\u04EE"
-    "u\u0304": "\u04EF"
-    "U\u0306": "\u040E"
-    "u\u0306": "\u045E"
-    "U\u0307": "\u04AE"
-    "u\u0307": "\u04AF"
-    "U\u0308": "\u04F0"
-    "u\u0308": "\u04F1"
-    "U\u030B": "\u04F2"
-    "u\u030B": "\u04F3"
-    "U": "\u0423"
-    "u": "\u0443"
-    
-    "V\u0307": "\u0474"
-    "v\u0307": "\u0475"
-    "V\u0308": "\u0476"
-    "v\u0308": "\u0477"
-    "V": "\u0412"
-    "v": "\u0432"
-    
-    "W\u0308": "\u051C"
-    "w\u0308": "\u051D"
-    "W": "\u04A8"
-    "w": "\u04A9"
-    
-    "X": "\u0058"
-    "x": "\u0078"
-    
-    "Y\u0307": "\u0474"
-    "y\u0307": "\u0475"
-    "Y\u0308": "\u04F8"
-    "y\u0308": "\u04F9"
-    "Y": "\u042B"
-    "y": "\u044B"
-    
-    "Zh": "\u0416"
-    "zh": "\u0436"
-    "Z\u0301": "\u0504"
-    "z\u0301": "\u0505"
-    "Z\u0307": "\u0510"
-    "z\u0307": "\u0511"
-    "Z\u0308": "\u04DE"
-    "z\u0308": "\u04DF"
-    "Z\u0327": "\u0506"
-    "z\u0327": "\u0507"
-    "Z": "\u0417"
-    "z": "\u0437"
-    
-    "H": "\u0413"
-    "h": "\u0433"
-    
-    "\u0110": "\u0402"
-    "\u0111": "\u0452"
-    "\u02B9\u0333": "\u042C"
-    "\u02B9": "\u044C"
-    "\u02BA\u0333": "\u042A"
-    "\u02BA": "\u044A"
-    "\u0303": "\u0487"
-    "\u0311": "\u0484"
-    "\u0313": "\u0486"
-    "\u0314": "\u0485"
-    "\u007E": "\u0483"
-    "(|)": "\u0482"
-    "(^)": "\u0488"
-    "(')": "\u0489"
-    
-    "\u003C\u003C": "\u00AB"
-    "\u003E\u003E": "\u00BB"
-
-script_to_roman:
-  map:
-    
-    "\u00AB": "\""
-    "\u00BB": "\""
-    "\u2116": "No\u0332"
-    "\u0400": "E\u0300"
-    "\u0401": "E\u0308"
-    "\u0402": "\u0110"
-    "\u0403": "G\u0301"
-    "\u0404": "E\u0304"
-    "\u0405": "S\u0307"
-    "\u0406": "I\u0304"
-    "\u0407": "I\u0308"
-    "\u0408": "I\u0310"
-    "\u0409": "Lj"
-    "\u040A": "Nj"
-    "\u040B": "C\u0301"
-    "\u040C": "K\u0301"
-    "\u040D": "I\u0300"
-    "\u040E": "U\u0306"
-    "\u040F": "Dz\u030C"
-    "\u0410": "A"
-    "\u0411": "B"
-    "\u0412": "V"
-    "\u0413": "G"
-    "\u0414": "D"
-    "\u0415": "E"
-    "\u0416": "Zh"
-    "\u0417": "Z"
-    "\u0418": "I"
-    "\u0419": "I\u0306"
-    "\u041A": "K"
-    "\u041B": "L"
-    "\u041C": "M"
-    "\u041D": "N"
-    "\u041E": "O"
-    "\u041F": "P"
-    "\u0420": "R"
-    "\u0421": "S"
-    "\u0422": "T"
-    "\u0423": "U"
-    "\u0424": "F"
-    "\u0425": "Kh"
-    "\u0426": "T\uFE20S\uFE21"
-    "\u0427": "Ch"
-    "\u0428": "Sh"
-    "\u0429": "Shch"
-    "\u042A": "\u02BA\u0333"
-    "\u042B": "Y"
-    "\u042C": "\u02B9\u0333"
-    "\u042D": "E\u0307"
-    "\u042E": "I\uFE20U\uFE21"
-    "\u042F": "I\uFE20A\uFE21"
-    "\u0430": "a"
-    "\u0431": "b"
-    "\u0432": "v"
-    "\u0433": "g"
-    "\u0434": "d"
-    "\u0435": "e"
-    "\u0436": "zh"
-    "\u0437": "z"
-    "\u0438": "i"
-    "\u0439": "i\u0306"
-    "\u043A": "k"
-    "\u043B": "l"
-    "\u043C": "m"
-    "\u043D": "n"
-    "\u043E": "o"
-    "\u043F": "p"
-    "\u0440": "r"
-    "\u0441": "s"
-    "\u0442": "t"
-    "\u0443": "u"
-    "\u0444": "f"
-    "\u0445": "kh"
-    "\u0446": "t\uFE20s\uFE21"
-    "\u0447": "ch"
-    "\u0448": "sh"
-    "\u0449": "shch"
-    "\u044A": "\u02BA"
-    "\u044B": "y"
-    "\u044C": "\u02B9"
-    "\u044D": "e\u0307"
-    "\u044E": "i\uFE20u\uFE21"
-    "\u044F": "i\uFE20a\uFE21"
-    "\u0450": "e\u0300"
-    "\u0451": "e\u0308"
-    "\u0452": "\u0111"
-    "\u0453": "g\u0301"
-    "\u0454": "e\u0304"
-    "\u0455": "s\u0307"
-    "\u0456": "i\u0304"
-    "\u0457": "i\u0308"
-    "\u0458": "i\u0310"
-    "\u0459": "lj"
-    "\u045A": "nj"
-    "\u045B": "c\u0301"
-    "\u045C": "k\u0301"
-    "\u045D": "i\u0300"
-    "\u045E": "u\u0306"
-    "\u045F": "dz\u030C"
-    "\u0460": "O\u0304\u0324"
-    "\u0461": "o\u0304\u0324"
-    "\u0462": "I\uFE20E\uFE21"
-    "\u0463": "i\uFE20e\uFE21"
-    "\u0464": "I\uFE20E\uFE21\u0304"
-    "\u0465": "i\uFE20e\uFE21\u0304"
-    "\u0466": "E\u0328"
-    "\u0467": "e\u0328"
-    "\u0468": "I\uFE20E\uFE21\u0328"
-    "\u0469": "i\uFE20e\uFE21\u0328"
-    "\u046A": "O\u0328"
-    "\u046B": "o\u0328"
-    "\u046C": "I\uFE20O\uFE21\u0328"
-    "\u046D": "i\uFE20o\uFE21\u0328"
-    "\u046E": "K\uFE20S\uFE21"
-    "\u046F": "k\uFE20s\uFE21"
-    "\u0470": "P\uFE20S\uFE21"
-    "\u0471": "p\uFE20s\uFE21"
-    "\u0472": "F\u0307"
-    "\u0473": "f\u0307"
-    "\u0474": "V\u0307"
-    "\u0475": "v\u0307"
-    "\u0476": "V\u0308"
-    "\u0477": "v\u0308"
-    "\u0478": "O\uFE20u\uFE21"
-    "\u0479": "o\uFE20u\uFE21"
-    "\u047A": "O\u0304\u0323"
-    "\u047B": "o\u0304\u0323"
-    "\u047C": "O\u0303"
-    "\u047D": "o\u0303"
-    "\u047E": "O\u0304\uFE20T\uFE21"
-    "\u047F": "o\u0304\uFE20t\uFE21"
-    "\u0480": "Q\u0302"
-    "\u0481": "q\u0302"
-    "\u0482": "(|)"
-    "\u0483": "\u007E"
-    "\u0484": "\u0311"
-    "\u0485": "\u0314"
-    "\u0486": "\u0313"
-    "\u0487": "\u0303"
-    "\u0488": "(^)"
-    "\u0489": "(')"
-    "\u048A": "I\u0306\u0323"
-    "\u048B": "i\u0306\u0323"
-    "\u048C": "E\u0306\u0323"
-    "\u048D": "e\u0306\u0323"
-    "\u048E": "R\u0306"
-    "\u048F": "r\u0306"
-    "\u0490": "G\u0306"
-    "\u0491": "g\u0306"
-    "\u0492": "Gh"
-    "\u0493": "gh"
-    "\u0494": "G\u0327"
-    "\u0495": "g\u0327"
-    "\u0496": "J"
-    "\u0497": "j"
-    "\u0498": "T\uFE20H\uFE21"
-    "\u0499": "t\uFE20h\uFE21"
-    "\u049A": "Q"
-    "\u049B": "q"
-    "\u049C": "G\u0307"
-    "\u049D": "g\u0307"
-    "\u049E": "Q\u0304"
-    "\u049F": "q\u0304"
-    "\u04A0": "Q\u0300"
-    "\u04A1": "q\u0300"
-    "\u04A2": "N\uFE20G\uFE21"
-    "\u04A3": "n\uFE20g\uFE21"
-    "\u04A4": "N\u0301G\u0300"
-    "\u04A5": "n\u0301g\u0300"
-    "\u04A6": "Ph"
-    "\u04A7": "ph"
-    "\u04A8": "W"
-    "\u04A9": "w"
-    "\u04AA": "Th"
-    "\u04AB": "th"
-    "\u04AC": "T\u0327"
-    "\u04AD": "t\u0327"
-    "\u04AE": "U\u0307"
-    "\u04AF": "u\u0307"
-    "\u04B0": "U\u0302"
-    "\u04B1": "u\u0302"
-    "\u04B2": "K\uFE20H\uFE21"
-    "\u04B3": "k\uFE20h\uFE21"
-    "\u04B4": "T\uFE20S\uFE21\u0307"
-    "\u04B5": "t\uFE20s\uFE21\u0307"
-    "\u04B6": "J\u0302"
-    "\u04B7": "j\u0302"
-    "\u04B8": "J\u0304"
-    "\u04B9": "j\u0304"
-    "\u04BA": "H\u0307"
-    "\u04BB": "h\u0307"
-    "\u04BC": "C\u0301h"
-    "\u04BD": "c\u0301h"
-    "\u04BE": "C\u0301h\u0301"
-    "\u04BF": "c\u0301h\u0301"
-    "\u04C0": "H\u0308"
-    "\u04C1": "J\u0306"
-    "\u04C2": "j\u0306"
-    "\u04C3": "Q\u0307"
-    "\u04C4": "q\u0307"
-    "\u04C5": "L\u0326"
-    "\u04C6": "l\u0326"
-    "\u04C7": "N\uFE20\u0327G\uFE21"
-    "\u04C8": "n\uFE20\u0327g\uFE21"
-    "\u04C9": "N\uFE20\u0323G\uFE21"
-    "\u04CA": "n\uFE20\u0323g\uFE21"
-    "\u04CB": "C\u0323h"
-    "\u04CC": "c\u0323h"
-    "\u04CD": "M\u0323"
-    "\u04CE": "m\u0323"
-    "\u04CF": "h\u0308"
-    "\u04D0": "A\u0306\u0323"
-    "\u04D1": "a\u0306\u0323"
-    "\u04D2": "A\u0308"
-    "\u04D3": "a\u0308"
-    "\u04D4": "\u00C6"
-    "\u04D5": "\u00E6"
-    "\u04D6": "E\u0306"
-    "\u04D7": "e\u0306"
-    "\u04D8": "A\u0306"
-    "\u04D9": "a\u0306"
-    "\u04DA": "A\u030B"
-    "\u04DB": "a\u030B"
-    "\u04DC": "J\u0308"
-    "\u04DD": "j\u0308"
-    "\u04DE": "Z\u0308"
-    "\u04DF": "z\u0308"
-    "\u04E0": "D\uFE20Z\uFE21"
-    "\u04E1": "d\uFE20z\uFE21"
-    "\u04E2": "I\u0304\u0323"
-    "\u04E3": "i\u0304\u0323"
-    "\u04E4": "I\u0308\u0323"
-    "\u04E5": "i\u0308\u0323"
-    "\u04E6": "O\u0308"
-    "\u04E7": "o\u0308"
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
-    "\u04EA": "O\u0304"
-    "\u04EB": "o\u0304"
-    "\u04EC": "E\u0308\u0323"
-    "\u04ED": "e\u0308\u0323"
-    "\u04EE": "U\u0304"
-    "\u04EF": "u\u0304"
-    "\u04F0": "U\u0308"
-    "\u04F1": "u\u0308"
-    "\u04F2": "U\u030B"
-    "\u04F3": "u\u030B"
-    "\u04F4": "C\u0308h"
-    "\u04F5": "c\u0308h"
-    "\u04F6": "G\u0323"
-    "\u04F7": "g\u0323"
-    "\u04F8": "Y\u0308"
-    "\u04F9": "y\u0308"
-    "\u04FA": "Gh\u0327"
-    "\u04FB": "gh\u0327"
-    "\u04FC": "H\u0327"
-    "\u04FD": "h\u0327"
-    "\u04FE": "H\u0304"
-    "\u04FF": "h\u0304"
-    "\u0500": "D\u0307"
-    "\u0501": "d\u0307"
-    "\u0502": "D\u0301"
-    "\u0503": "d\u0301"
-    "\u0504": "Z\u0301"
-    "\u0505": "z\u0301"
-    "\u0506": "Z\u0327"
-    "\u0507": "z\u0327"
-    "\u0508": "L\u0301"
-    "\u0509": "l\u0301"
-    "\u050A": "N\u0301"
-    "\u050B": "n\u0301"
-    "\u050C": "S\u0301"
-    "\u050D": "s\u0301"
-    "\u050E": "T\u0301"
-    "\u050F": "t\u0301"
-    "\u0510": "Z\u0307"
-    "\u0511": "z\u0307"
-    "\u0512": "L\u0327"
-    "\u0513": "l\u0327"
-    "\u0514": "Lkh\u0307"
-    "\u0515": "lkh\u0307"
-    "\u0516": "Rkh\u0307"
-    "\u0517": "rkh\u0307"
-    "\u0518": "A\u0310"
-    "\u0519": "a\u0310"
-    "\u051A": "Q\u0308"
-    "\u051B": "q\u0308"
-    "\u051C": "W\u0308"
-    "\u051D": "w\u0308"
-    "\u051E": "K\u0300"
-    "\u051F": "k\u0300"
-    "\u0520": "L\u0324"
-    "\u0521": "l\u0324"
-    "\u0522": "N\u0327"
-    "\u0523": "n\u0327"
-    "\u0524": "P\u0323"
-    "\u0525": "p\u0323"
-    "\u0526": "Sh\u0323"
-    "\u0527": "sh\u0323"
-    "\u0528": "N\u0326"
-    "\u0529": "n\u0326"
-    "\u052A": "D\uFE20z\uFE21h"
-    "\u052B": "d\uFE20z\uFE21h"
-    "\u052C": "D\uFE20c\uFE21h"
-    "\u052D": "d\uFE20c\uFE21h"
-    "\u052E": "L\u0323"
-    "\u052F": "l\u0323"
+---
+general:
+  name: Cyrillic (Generic)
+  parents:
+    - _ignore_base
+
+# COMMON COMBINING CHARACTERS (always follow a base letter):
+# combining grave \u0300
+# combining acute \u0301
+# combining circumflex \u0302
+# combining tilde \u0303
+# combining macron \u0304
+# combining breve \u0306
+# combining dot above \u0307
+# combining diaeresis \u0308
+# combining ring above \u030A
+# combining double acute \u030B
+# combining caron (hachek) \u030C
+# combining candrabindu \u0310
+# combining dot below \u0323
+# combining dieresis below \u0324
+# combining comma below \u0326 (Romanian, Latvian, Livonian)
+# combining cedilla \u0327 (French, Turkish, Azeri)
+# combining ogonek (hook) \u0328 (Polish, Lithuanian)
+# combining low line \u0332
+# combining double low line \u0333
+# combining left ligature \uFE20 (Cyrillic transliteration)
+# combining right ligature \uFE21 (Cyrillic transliteration)
+# soft sign/prime (spacing) \u02B9(Cyrillic transliteration)
+# hard sign/double prime (spacing) \u02BA (Cyrillic transliteration)
+# ayn(spacing) \u02BB (Semitic and Caucasian languages)
+# alif (spacing) \u02BC (Semitic languages)
+# middle dot (space) \u00B7) (Catalan)
+
+roman_to_script:
+  map:
+    "A\uFE20E\uFE21": "\u04D4"
+    "A\uFE20e\uFE21": "\u04D4"
+    "a\uFE20e\uFE21": "\u04D5"
+    "A\u0306\u0323": "\u04D0"
+    "a\u0306\u0323": "\u04D1"
+    "\u00C6": "\u04D4"
+    "\u00E6": "\u04D5"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "A\u030B": "\u04DA"
+    "a\u030B": "\u04DB"
+    "A\u0308": "\u04D2"
+    "a\u0308": "\u04D3"
+    "A\u0310": "\u0518"
+    "a\u0310": "\u0519"
+
+    "B": "\u0411"
+    "b": "\u0431"
+
+    "C\u0301h\u0301": "\u04BE"
+    "c\u0301h\u0301": "\u04BF"
+    "C\u0301h": "\u04BC"
+    "c\u0301h": "\u04BD"
+    "C\u0301": "\u040B"
+    "c\u0301": "\u045B"
+    "C\u0308h": "\u04F4"
+    "c\u0308h": "\u04F5"
+    "C\u0323h": "\u04CB"
+    "c\u0323h": "\u04CC"
+
+    "D\u0301": "\u0502"
+    "d\u0301": "\u0503"
+    "D\u0307": "\u0500"
+    "d\u0307": "\u0501"
+    "D\uFE20c\uFE21h": "\u052C"
+    "d\uFE20c\uFE21h": "\u052D"
+    "D\uFE20z\uFE21h": "\u052A"
+    "d\uFE20z\uFE21h": "\u052B"
+    "D\uFE20Z\uFE21": "\u04E0"
+    "d\uFE20z\uFE21": "\u04E1"
+    "Dz\u030C": "\u040F"
+    "dz\u030C": "\u045F"
+    "D": "\u0414"
+    "d": "\u0434"
+
+    "E\u0300": "\u0400"
+    "e\u0300": "\u0450"
+    "E\u0304": "\u0404"
+    "e\u0304": "\u0454"
+    "E\u0306": "\u04D6"
+    "e\u0306": "\u04D7"
+    "E\u0306\u0323": "\u048C"
+    "e\u0306\u0323": "\u048D"
+    "E\u0307": "\u042D"
+    "e\u0307": "\u044D"
+    "E\u0308\u0323": "\u04EC"
+    "e\u0308\u0323": "\u04ED"
+    "E\u0308": "\u0401"
+    "e\u0308": "\u0451"
+    "E\u0328": "\u0466"
+    "e\u0328": "\u0467"
+
+    "F\u0307": "\u0472"
+    "f\u0307": "\u0473"
+    "F": "\u0424"
+    "f": "\u0444"
+
+    "Gh\u0327": "\u04FA"
+    "gh\u0327": "\u04FB"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    "G\u0301": "\u0403"
+    "g\u0301": "\u0453"
+    "G\u0306": "\u0490"
+    "g\u0306": "\u0491"
+    "G\u0307": "\u049C"
+    "g\u0307": "\u049D"
+    "G\u0323": "\u04F6"
+    "g\u0323": "\u04F7"
+    "G\u0327": "\u0494"
+    "g\u0327": "\u0495"
+
+    "H\u0304": "\u04FE"
+    "h\u0304": "\u04FF"
+    "H\u0327": "\u04FC"
+    "h\u0327": "\u04FD"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "H\u0308": "\u04C0"
+    "h\u0308": "\u04CF"
+
+    "I\u0300": "\u040D"
+    "i\u0300": "\u045D"
+    "I\u0304\u0323": "\u04E2"
+    "i\u0304\u0323": "\u04E3"
+    "I\u0304": "\u0406"
+    "i\u0304": "\u0456"
+    "I\u0306\u0323": "\u048A"
+    "i\u0306\u0323": "\u048B"
+    "I\u0306": "\u0419"
+    "i\u0306": "\u0439"
+    "I\u0308\u0323": "\u04E4"
+    "i\u0308\u0323": "\u04E5"
+    "I\u0308": "\u0407"
+    "i\u0308": "\u0457"
+    "I\u0310": "\u0408"
+    "i\u0310": "\u0458"
+
+    "I\uFE20A\uFE21": "\u042F"
+    "i\uFE20a\uFE21": "\u044F"
+    "A": "\u0410"
+    "a": "\u0430"
+
+    "I\uFE20E\uFE21\u0304": "\u0464"
+    "i\uFE20e\uFE21\u0304": "\u0465"
+    "I\uFE20E\uFE21\u0328": "\u0468"
+    "i\uFE20e\uFE21\u0328": "\u0469"
+    "I\uFE20E\uFE21": "\u0462"
+    "i\uFE20e\uFE21": "\u0463"
+    "E": "\u0415"
+    "e": "\u0435"
+
+    "I\uFE20O\uFE21\u0328": "\u046C"
+    "i\uFE20o\uFE21\u0328": "\u046D"
+    "I\uFE20U\uFE21": "\u042E"
+    "i\uFE20u\uFE21": "\u044E"
+    "I": "\u0418"
+    "i": "\u0438"
+
+    "J\u0304": "\u04B8"
+    "j\u0304": "\u04B9"
+    "J\u0306": "\u04C1"
+    "j\u0306": "\u04C2"
+    "J\u0302": "\u04B6"
+    "j\u0302": "\u04B7"
+    "J\u0308": "\u04DC"
+    "j\u0308": "\u04DD"
+
+    "K\u0300": "\u051E"
+    "k\u0300": "\u051F"
+    "K\u0301": "\u040C"
+    "k\u0301": "\u045C"
+    "K\uFE20H\uFE21": "\u04B2"
+    "k\uFE20h\uFE21": "\u04B3"
+    "Kh": "\u0425"
+    "kh": "\u0445"
+    "K\uFE20S\uFE21": "\u046E"
+    "k\uFE20s\uFE21": "\u046F"
+    "K": "\u041A"
+    "k": "\u043A"
+
+    "Lj": "\u0409"
+    "lj": "\u0459"
+    "Lkh\u0307": "\u0514"
+    "lkh\u0307": "\u0515"
+    "L\u0301": "\u0508"
+    "l\u0301": "\u0509"
+    "L\u0321": "\u04C5"
+    "l\u0326": "\u04C6"
+    "L\u0323": "\u052E"
+    "l\u0323": "\u052F"
+    "L\u0327": "\u0512"
+    "l\u0327": "\u0513"
+    "L\u0324": "\u0520"
+    "l\u0324": "\u0521"
+    "L": "\u041B"
+    "l": "\u043B"
+
+    "M\u0323": "\u04CD"
+    "m\u0323": "\u04CE"
+    "M": "\u041C"
+    "m": "\u043C"
+
+    "Nj": "\u040A"
+    "nj": "\u045A"
+    "N\u0301G\u0300": "\u04A4"
+    "n\u0301g\u0300": "\u04A5"
+    "N\u0301": "\u050A"
+    "n\u0301": "\u050B"
+    "N\u0326": "\u0528"
+    "n\u0326": "\u0529"
+    "N\u0327": "\u0522"
+    "n\u0327": "\u0523"
+    "N\uFE20\u0323G\uFE21": "\u04C9"
+    "n\uFE20\u0323g\uFE21": "\u04CA"
+    "N\uFE20\u0327G\uFE21": "\u04C7"
+    "n\uFE20\u0327g\uFE21": "\u04C8"
+    "N\uFE20G\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "No\u0332": "\u2116"
+    "N": "\u041D"
+    "n": "\u043D"
+
+    "G": "\u0413"
+    "g": "\u0433"
+
+    "J": "\u0496"
+    "j": "\u0497"
+
+    "O\u0303": "\u047C"
+    "o\u0303": "\u047D"
+    "O\u0304\u0323": "\u047A"
+    "o\u0304\u0323": "\u047B"
+    "O\u0304\uFE20T\uFE21": "\u047E"
+    "o\u0304\uFE20t\uFE21": "\u047F"
+    "O\u0304\u0324": "\u0460"
+    "o\u0304\u0324": "\u0461"
+    "O\u0304": "\u04EA"
+    "o\u0304": "\u04EB"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "O\u0308": "\u04E6"
+    "o\u0308": "\u04E7"
+    "O\u0328": "\u046A"
+    "o\u0328": "\u046B"
+    "O\uFE20u\uFE21": "\u0478"
+    "o\uFE20u\uFE21": "\u0479"
+    "O": "\u041E"
+    "o": "\u043E"
+
+    "Ph": "\u04A6"
+    "ph": "\u04A7"
+    "P\u0323": "\u0524"
+    "p\u0323": "\u0525"
+    "P\uFE20S\uFE21": "\u0470"
+    "p\uFE20s\uFE21": "\u0471"
+    "P": "\u041F"
+    "p": "\u043F"
+
+    "Q\u0300": "\u04A0"
+    "q\u0300": "\u04A1"
+    "Q\u0302": "\u0480"
+    "q\u0302": "\u0481"
+    "Q\u0304": "\u049E"
+    "q\u0304": "\u049F"
+    "Q\u0307": "\u04C3"
+    "q\u0307": "\u04C4"
+    "Q\u0308": "\u051A"
+    "q\u0308": "\u051B"
+    "Q": "\u049A"
+    "q": "\u049B"
+
+    "Rkh\u0307": "\u0516"
+    "rkh\u0307": "\u0517"
+    "R\u0306": "\u048E"
+    "r\u0306": "\u048F"
+    "R": "\u0420"
+    "r": "\u0440"
+
+    "Shch": "\u0429"
+    "shch": "\u0449"
+    "Sh\u0323": "\u0526"
+    "sh\u0323": "\u0527"
+    "Sh": "\u0428"
+    "sh": "\u0448"
+    "S\u0301": "\u050C"
+    "s\u0301": "\u050D"
+    "S\u0307": "\u0405"
+    "s\u0307": "\u0455"
+
+    "Ch": "\u0427"
+    "ch": "\u0447"
+    "C": "\u0426"
+    "c": "\u0446"
+
+    "Th": "\u04AA"
+    "th": "\u04AB"
+    "T\u0301": "\u050E"
+    "t\u0301": "\u050F"
+    "T\u0327": "\u04AC"
+    "t\u0327": "\u04AD"
+    "T\uFE20H\uFE21": "\u0498"
+    "t\uFE20h\uFE21": "\u0499"
+    "T\uFE20S\uFE21": "\u0426"
+    "t\uFE20s\uFE21": "\u0446"
+    "T\uFE20S\uFE21\u0307": "\u04B4"
+    "t\uFE20s\uFE21\u0307": "\u04B5"
+
+    "S": "\u0421"
+    "s": "\u0441"
+
+    "T": "\u0422"
+    "t": "\u0442"
+
+    "U\u0302": "\u04B0"
+    "u\u0302": "\u04B1"
+    "U\u0304": "\u04EE"
+    "u\u0304": "\u04EF"
+    "U\u0306": "\u040E"
+    "u\u0306": "\u045E"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "U\u0308": "\u04F0"
+    "u\u0308": "\u04F1"
+    "U\u030B": "\u04F2"
+    "u\u030B": "\u04F3"
+    "U": "\u0423"
+    "u": "\u0443"
+
+    "V\u0307": "\u0474"
+    "v\u0307": "\u0475"
+    "V\u0308": "\u0476"
+    "v\u0308": "\u0477"
+    "V": "\u0412"
+    "v": "\u0432"
+
+    "W\u0308": "\u051C"
+    "w\u0308": "\u051D"
+    "W": "\u04A8"
+    "w": "\u04A9"
+
+    "X": "\u0058"
+    "x": "\u0078"
+
+    "Y\u0307": "\u0474"
+    "y\u0307": "\u0475"
+    "Y\u0308": "\u04F8"
+    "y\u0308": "\u04F9"
+    "Y": "\u042B"
+    "y": "\u044B"
+
+    "Zh": "\u0416"
+    "zh": "\u0436"
+    "Z\u0301": "\u0504"
+    "z\u0301": "\u0505"
+    "Z\u0307": "\u0510"
+    "z\u0307": "\u0511"
+    "Z\u0308": "\u04DE"
+    "z\u0308": "\u04DF"
+    "Z\u0327": "\u0506"
+    "z\u0327": "\u0507"
+    "Z": "\u0417"
+    "z": "\u0437"
+
+    "H": "\u0413"
+    "h": "\u0433"
+
+    "\u0110": "\u0402"
+    "\u0111": "\u0452"
+    "\u02B9\u0333": "\u042C"
+    "\u02B9": "\u044C"
+    "\u02BA\u0333": "\u042A"
+    "\u02BA": "\u044A"
+    "\u0303": "\u0487"
+    "\u0311": "\u0484"
+    "\u0313": "\u0486"
+    "\u0314": "\u0485"
+    "\u007E": "\u0483"
+    "(|)": "\u0482"
+    "(^)": "\u0488"
+    "(')": "\u0489"
+
+    # Two Less-than signs mapped to Left-pointing double angle quotation mark
+    "\u003C\u003C": "\u00AB"
+    # Two Greater-than signs mapped to Right-pointing double angle quotation mark
+    "\u003E\u003E": "\u00BB"
+
+script_to_roman:
+  map:
+
+    # Left-pointing double angle quotation mark mapped to Two Less-than signs
+    "\u00AB": "\u003C\u003C"
+    # Right-pointing double angle quotation mark mapped to Two Greater-than signs
+    "\u00BB": "\u003E\u003E"
+    "\u2116": "No\u0332"
+    "\u0400": "E\u0300"
+    "\u0401": "E\u0308"
+    "\u0402": "\u0110"
+    "\u0403": "G\u0301"
+    "\u0404": "E\u0304"
+    "\u0405": "S\u0307"
+    "\u0406": "I\u0304"
+    "\u0407": "I\u0308"
+    "\u0408": "I\u0310"
+    "\u0409": "Lj"
+    "\u040A": "Nj"
+    "\u040B": "C\u0301"
+    "\u040C": "K\u0301"
+    "\u040D": "I\u0300"
+    "\u040E": "U\u0306"
+    "\u040F": "Dz\u030C"
+    "\u0410": "A"
+    "\u0411": "B"
+    "\u0412": "V"
+    "\u0413": "G"
+    "\u0414": "D"
+    "\u0415": "E"
+    "\u0416": "Zh"
+    "\u0417": "Z"
+    "\u0418": "I"
+    "\u0419": "I\u0306"
+    "\u041A": "K"
+    "\u041B": "L"
+    "\u041C": "M"
+    "\u041D": "N"
+    "\u041E": "O"
+    "\u041F": "P"
+    "\u0420": "R"
+    "\u0421": "S"
+    "\u0422": "T"
+    "\u0423": "U"
+    "\u0424": "F"
+    "\u0425": "Kh"
+    "\u0426": "T\uFE20S\uFE21"
+    "\u0427": "Ch"
+    "\u0428": "Sh"
+    "\u0429": "Shch"
+    "\u042A": "\u02BA\u0333"
+    "\u042B": "Y"
+    "\u042C": "\u02B9\u0333"
+    "\u042D": "E\u0307"
+    "\u042E": "I\uFE20U\uFE21"
+    "\u042F": "I\uFE20A\uFE21"
+    "\u0430": "a"
+    "\u0431": "b"
+    "\u0432": "v"
+    "\u0433": "g"
+    "\u0434": "d"
+    "\u0435": "e"
+    "\u0436": "zh"
+    "\u0437": "z"
+    "\u0438": "i"
+    "\u0439": "i\u0306"
+    "\u043A": "k"
+    "\u043B": "l"
+    "\u043C": "m"
+    "\u043D": "n"
+    "\u043E": "o"
+    "\u043F": "p"
+    "\u0440": "r"
+    "\u0441": "s"
+    "\u0442": "t"
+    "\u0443": "u"
+    "\u0444": "f"
+    "\u0445": "kh"
+    "\u0446": "t\uFE20s\uFE21"
+    "\u0447": "ch"
+    "\u0448": "sh"
+    "\u0449": "shch"
+    "\u044A": "\u02BA"
+    "\u044B": "y"
+    "\u044C": "\u02B9"
+    "\u044D": "e\u0307"
+    "\u044E": "i\uFE20u\uFE21"
+    "\u044F": "i\uFE20a\uFE21"
+    "\u0450": "e\u0300"
+    "\u0451": "e\u0308"
+    "\u0452": "\u0111"
+    "\u0453": "g\u0301"
+    "\u0454": "e\u0304"
+    "\u0455": "s\u0307"
+    "\u0456": "i\u0304"
+    "\u0457": "i\u0308"
+    "\u0458": "i\u0310"
+    "\u0459": "lj"
+    "\u045A": "nj"
+    "\u045B": "c\u0301"
+    "\u045C": "k\u0301"
+    "\u045D": "i\u0300"
+    "\u045E": "u\u0306"
+    "\u045F": "dz\u030C"
+    "\u0460": "O\u0304\u0324"
+    "\u0461": "o\u0304\u0324"
+    "\u0462": "I\uFE20E\uFE21"
+    "\u0463": "i\uFE20e\uFE21"
+    "\u0464": "I\uFE20E\uFE21\u0304"
+    "\u0465": "i\uFE20e\uFE21\u0304"
+    "\u0466": "E\u0328"
+    "\u0467": "e\u0328"
+    "\u0468": "I\uFE20E\uFE21\u0328"
+    "\u0469": "i\uFE20e\uFE21\u0328"
+    "\u046A": "O\u0328"
+    "\u046B": "o\u0328"
+    "\u046C": "I\uFE20O\uFE21\u0328"
+    "\u046D": "i\uFE20o\uFE21\u0328"
+    "\u046E": "K\uFE20S\uFE21"
+    "\u046F": "k\uFE20s\uFE21"
+    "\u0470": "P\uFE20S\uFE21"
+    "\u0471": "p\uFE20s\uFE21"
+    "\u0472": "F\u0307"
+    "\u0473": "f\u0307"
+    "\u0474": "V\u0307"
+    "\u0475": "v\u0307"
+    "\u0476": "V\u0308"
+    "\u0477": "v\u0308"
+    "\u0478": "O\uFE20u\uFE21"
+    "\u0479": "o\uFE20u\uFE21"
+    "\u047A": "O\u0304\u0323"
+    "\u047B": "o\u0304\u0323"
+    "\u047C": "O\u0303"
+    "\u047D": "o\u0303"
+    "\u047E": "O\u0304\uFE20T\uFE21"
+    "\u047F": "o\u0304\uFE20t\uFE21"
+    "\u0480": "Q\u0302"
+    "\u0481": "q\u0302"
+    "\u0482": "(|)"
+    "\u0483": "\u007E"
+    "\u0484": "\u0311"
+    "\u0485": "\u0314"
+    "\u0486": "\u0313"
+    "\u0487": "\u0303"
+    "\u0488": "(^)"
+    "\u0489": "(')"
+    "\u048A": "I\u0306\u0323"
+    "\u048B": "i\u0306\u0323"
+    "\u048C": "E\u0306\u0323"
+    "\u048D": "e\u0306\u0323"
+    "\u048E": "R\u0306"
+    "\u048F": "r\u0306"
+    "\u0490": "G\u0306"
+    "\u0491": "g\u0306"
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u0494": "G\u0327"
+    "\u0495": "g\u0327"
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u0498": "T\uFE20H\uFE21"
+    "\u0499": "t\uFE20h\uFE21"
+    "\u049A": "Q"
+    "\u049B": "q"
+    "\u049C": "G\u0307"
+    "\u049D": "g\u0307"
+    "\u049E": "Q\u0304"
+    "\u049F": "q\u0304"
+    "\u04A0": "Q\u0300"
+    "\u04A1": "q\u0300"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04A4": "N\u0301G\u0300"
+    "\u04A5": "n\u0301g\u0300"
+    "\u04A6": "Ph"
+    "\u04A7": "ph"
+    "\u04A8": "W"
+    "\u04A9": "w"
+    "\u04AA": "Th"
+    "\u04AB": "th"
+    "\u04AC": "T\u0327"
+    "\u04AD": "t\u0327"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04B0": "U\u0302"
+    "\u04B1": "u\u0302"
+    "\u04B2": "K\uFE20H\uFE21"
+    "\u04B3": "k\uFE20h\uFE21"
+    "\u04B4": "T\uFE20S\uFE21\u0307"
+    "\u04B5": "t\uFE20s\uFE21\u0307"
+    "\u04B6": "J\u0302"
+    "\u04B7": "j\u0302"
+    "\u04B8": "J\u0304"
+    "\u04B9": "j\u0304"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04BC": "C\u0301h"
+    "\u04BD": "c\u0301h"
+    "\u04BE": "C\u0301h\u0301"
+    "\u04BF": "c\u0301h\u0301"
+    "\u04C0": "H\u0308"
+    "\u04C1": "J\u0306"
+    "\u04C2": "j\u0306"
+    "\u04C3": "Q\u0307"
+    "\u04C4": "q\u0307"
+    "\u04C5": "L\u0326"
+    "\u04C6": "l\u0326"
+    "\u04C7": "N\uFE20\u0327G\uFE21"
+    "\u04C8": "n\uFE20\u0327g\uFE21"
+    "\u04C9": "N\uFE20\u0323G\uFE21"
+    "\u04CA": "n\uFE20\u0323g\uFE21"
+    "\u04CB": "C\u0323h"
+    "\u04CC": "c\u0323h"
+    "\u04CD": "M\u0323"
+    "\u04CE": "m\u0323"
+    "\u04CF": "h\u0308"
+    "\u04D0": "A\u0306\u0323"
+    "\u04D1": "a\u0306\u0323"
+    "\u04D2": "A\u0308"
+    "\u04D3": "a\u0308"
+    "\u04D4": "\u00C6"
+    "\u04D5": "\u00E6"
+    "\u04D6": "E\u0306"
+    "\u04D7": "e\u0306"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04DA": "A\u030B"
+    "\u04DB": "a\u030B"
+    "\u04DC": "J\u0308"
+    "\u04DD": "j\u0308"
+    "\u04DE": "Z\u0308"
+    "\u04DF": "z\u0308"
+    "\u04E0": "D\uFE20Z\uFE21"
+    "\u04E1": "d\uFE20z\uFE21"
+    "\u04E2": "I\u0304\u0323"
+    "\u04E3": "i\u0304\u0323"
+    "\u04E4": "I\u0308\u0323"
+    "\u04E5": "i\u0308\u0323"
+    "\u04E6": "O\u0308"
+    "\u04E7": "o\u0308"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04EA": "O\u0304"
+    "\u04EB": "o\u0304"
+    "\u04EC": "E\u0308\u0323"
+    "\u04ED": "e\u0308\u0323"
+    "\u04EE": "U\u0304"
+    "\u04EF": "u\u0304"
+    "\u04F0": "U\u0308"
+    "\u04F1": "u\u0308"
+    "\u04F2": "U\u030B"
+    "\u04F3": "u\u030B"
+    "\u04F4": "C\u0308h"
+    "\u04F5": "c\u0308h"
+    "\u04F6": "G\u0323"
+    "\u04F7": "g\u0323"
+    "\u04F8": "Y\u0308"
+    "\u04F9": "y\u0308"
+    "\u04FA": "Gh\u0327"
+    "\u04FB": "gh\u0327"
+    "\u04FC": "H\u0327"
+    "\u04FD": "h\u0327"
+    "\u04FE": "H\u0304"
+    "\u04FF": "h\u0304"
+    "\u0500": "D\u0307"
+    "\u0501": "d\u0307"
+    "\u0502": "D\u0301"
+    "\u0503": "d\u0301"
+    "\u0504": "Z\u0301"
+    "\u0505": "z\u0301"
+    "\u0506": "Z\u0327"
+    "\u0507": "z\u0327"
+    "\u0508": "L\u0301"
+    "\u0509": "l\u0301"
+    "\u050A": "N\u0301"
+    "\u050B": "n\u0301"
+    "\u050C": "S\u0301"
+    "\u050D": "s\u0301"
+    "\u050E": "T\u0301"
+    "\u050F": "t\u0301"
+    "\u0510": "Z\u0307"
+    "\u0511": "z\u0307"
+    "\u0512": "L\u0327"
+    "\u0513": "l\u0327"
+    "\u0514": "Lkh\u0307"
+    "\u0515": "lkh\u0307"
+    "\u0516": "Rkh\u0307"
+    "\u0517": "rkh\u0307"
+    "\u0518": "A\u0310"
+    "\u0519": "a\u0310"
+    "\u051A": "Q\u0308"
+    "\u051B": "q\u0308"
+    "\u051C": "W\u0308"
+    "\u051D": "w\u0308"
+    "\u051E": "K\u0300"
+    "\u051F": "k\u0300"
+    "\u0520": "L\u0324"
+    "\u0521": "l\u0324"
+    "\u0522": "N\u0327"
+    "\u0523": "n\u0327"
+    "\u0524": "P\u0323"
+    "\u0525": "p\u0323"
+    "\u0526": "Sh\u0323"
+    "\u0527": "sh\u0323"
+    "\u0528": "N\u0326"
+    "\u0529": "n\u0326"
+    "\u052A": "D\uFE20z\uFE21h"
+    "\u052B": "d\uFE20z\uFE21h"
+    "\u052C": "D\uFE20c\uFE21h"
+    "\u052D": "d\uFE20c\uFE21h"
+    "\u052E": "L\u0323"
+    "\u052F": "l\u0323"

+ 1 - 1
scriptshifter/tables/data/dungan_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Dungan (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/even-evenki_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Even/Evenki (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/gagauz_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Gagauz (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 207 - 209
scriptshifter/tables/data/greek_classical.yml

@@ -52,7 +52,7 @@ script_to_roman:
       - "\u1F8E"  # ᾎ 	Greek Capital Letter Alpha With Psili And Perispomeni And Prosgegrammeni
     # Rough alpha
     "\u1F01":  # ἁ 	Greek Small Letter Alpha With Dasia
-      - "\u03B1\u0314" # Small alpha + combining reversed comma above
+      - "\u03B1\u0314"  # Small alpha + combining reversed comma above
       - "\u1F81"  # ᾁ 	Greek Small Letter Alpha With Dasia And Ypogegrammeni
     "\u1F03":  # ἃ 	Greek Small Letter Alpha With Dasia And Varia
       - "\u1F05"  # ἅ 	Greek Small Letter Alpha With Dasia And Oxia
@@ -61,7 +61,7 @@ script_to_roman:
       - "\u1F85"  # ᾅ 	Greek Small Letter Alpha With Dasia And Oxia And Ypogegrammeni
       - "\u1F87"  # ᾇ 	Greek Small Letter Alpha With Dasia And Perispomeni And Ypogegrammeni
     "\u1F09":  # Ἁ 	Greek Capital Letter Alpha With Dasia
-      - "\u0391\u0314" # Capital alpha + combining reversed comma above
+      - "\u0391\u0314"  # Capital alpha + combining reversed comma above
       - "\u1F89"  # ᾉ 	Greek Capital Letter Alpha With Dasia And Prosgegrammeni
     "\u1F0B":  # Ἃ 	Greek Capital Letter Alpha With Dasia And Varia
       - "\u1F8B"  # ᾋ 	Greek Capital Letter Alpha With Dasia And Varia And Prosgegrammeni
@@ -226,7 +226,7 @@ script_to_roman:
       - "\u1FE3"  # ΰ 	Greek Small Letter Upsilon With Dialytika And Oxia
       - "\u1FE7"  # ῧ 	Greek Small Letter Upsilon With Dialytika And Perispomeni
     "\u03A5":
-        # NOTE: Capital upsilon + psili seems to be absent from Unicode table.
+      # NOTE: Capital upsilon + psili seems to be absent from Unicode table.
       - "\u03AB"  # Ϋ 	Greek Capital Letter Upsilon With Dialytika
       - "\u1F59"  # Ὑ 	Greek Capital Letter Upsilon With Dasia
       - "\u1FE8"  # Ῠ 	Greek Capital Letter Upsilon With Vrachy
@@ -297,7 +297,7 @@ script_to_roman:
       - "\u1FAD"  # ᾭ 	Greek Capital Letter Omega With Dasia And Oxia And Prosgegrammeni
       - "\u1FAF"  # ᾯ 	Greek Capital Letter Omega With Dasia And Perispomeni And Prosgegrammeni
 
-    # Remove all combining diacritics.
+    # Remove combining diacritics irrelevant to transliteration.
     "":
       - "\u0314"
       - "\u0342"
@@ -323,7 +323,7 @@ script_to_roman:
     "\u201C": "\"\u0332"
     "\u201D": "\"\u0333"
     "\u2018": "'\u0332"
-    #"\u2019": "'\u0333"
+    # "\u2019": "'\u0333"
     "\u2116": "No\u0332"
     # "\u0300": ""
     # "\u0301": ""
@@ -421,7 +421,6 @@ script_to_roman:
     # \u03A2 reserved
     "\u03A3": "S"
     "\u03A4": "T"
-    "\u1F59": "Hy"
     "\u03A5": "Y"
     "\u03A5\u03B9": "Ui"
     "\u03A5\u1F31": "Hui"
@@ -518,7 +517,6 @@ script_to_roman:
     "\u1F51": "hy"
     "\u1F59": "Hy"
     "\u03C5": "y"
-    "\u03CB": "y"
     "\u03C5\u03B9": "ui"
     "\u03C5\u1F31": "hui"
     "\u03C6": "ph"
@@ -551,9 +549,9 @@ script_to_roman:
     "\u03D9": "ḳ"
     "\u03DA": "6\u0333"
     "\u03DB": "6\u0332"
-    #"\u03DC": "G\u0332"
+    # "\u03DC": "G\u0332"
     "\u03DC": "W"
-    #"\u03DD": "g\u0332"
+    # "\u03DD": "g\u0332"
     "\u03DD": "w"
     "\u03DE": "K\u0324"
     "\u03DF": "k\u0324"
@@ -591,203 +589,203 @@ script_to_roman:
     "\u03FF": ".)\u0333"
 
 roman_to_script:
-    map:
-      ".)\u0333": "\u03FF"
-      ".)": "\u037D"
-      "?\u0333": "\u037E"
-      "?": "\u037E"
-      "\"\u0332": "\u201C"
-      "\"\u0333": "\u201D"
-      "'\u0332": "\u2018"
-      "'\u0333": "\u2019"
-      "(.\u0333": "\u03FE"
-      "(.": "\u037C"
-      "|)\u0333": "\u03FD"
-      "|)": "\u037B"
-      # Left pointing double angle quotation mark
-      "\u003C\u003C": "\u00AB"
-      # Right pointing double angle quotation mark
-      "\u003E\u003E": "\u00BB"
-      "6\u0332": "\u03DB"
-      "6\u0333": "\u03DA"
-      "Au": "\u0391\u03C5"
-      "au": "\u03B1\u03C5"
-      "a\u0301": "\u03AC"
-      "B": "\u0392"
-      "b": "\u03B2"
-      "b\u0333": "\u03D0"
-      "Ch": "\u03A7"
-      "ch": "\u03C7"
-      "c\u030C": "\u03EB"
-      "\u1E0E": "\u039D\u03C4"
-      "\u1E0F": "\u03BD\u03C4"
-      "D": "\u0394"
-      "d": "\u03B4"
-      "Eu": "\u0395\u03C5"
-      "eu": "\u03B5\u03C5"
-      "E\u0301": "\u0388"
-      "e\u0301": "\u03AD"
-      "\u0112\u0301": "\u0389\u0314"
-      "\u0113\u0301": "\u03AE"
-      "\u0112\u0301": "\u0389"
-      "\u0112": "\u0397"
-      "\u0112u": "\u0397\u03C5"
-      "\u0113": "\u03B7"
-      "\u0113u": "\u03B7\u03C5"
-      "h\u0113u": "\u03B7\u1F51"
-      "e\u0332": "\u03F5"
-      "e\u0333": "\u03F6"
-      "F": "\u03E4"
-      "f": "\u03E5"
-      #"G\u0332": "\u03DC"
-      "W": "\u03DC"
-      #"g\u0332": "\u03DD"
-      "w": "\u03DD"
-      "Ha\u0301": "\u0386\u0314"
-      "ha\u0301": "\u03AC\u0314"
-      "Ha": "\u1F09"
-      "ha": "\u03B1\u0314"
-      "A": "\u0391"
-      "a": "\u03B1"
-      "h\u0113\u0301": "\u03AE\u0314"
-      "He\u0301": "\u0388\u0314"
-      "he\u0301": "\u03AD\u0314"
-      "H\u0113": "\u1F29"
-      "H\u0113u": "\u1F29\u03C5"
-      "h\u0113": "\u1F21"
-      "h\u0113u": "\u1F21\u13C5"
-      "He": "\u1F19"
-      "he": "\u1F11"
-      "E": "\u0395"
-      "e": "\u03B5"
-      "Hi\u0301": "\u038A\u0314"
-      "hi\u0301": "\u03AF\u0314"
-      "Hi": "\u1F39"
-      "hi": "\u1F31"
-      "Ho\u0301": "\u038F\u0314"
-      "Ho\u0301": "\u038C\u0314"
-      "h\u014D": "\u1F61"
-      "H\u014D": "\u1F69"
-      "Ho": "\u1F49"
-      "ho": "\u1F41"
-      "H\u0307": "\u03E8"
-      "h\u0307": "\u03E9"
-      "H\u0323": "\u0370"
-      "h\u0323": "\u0371"
-      "H\u0332": "\u03E6"
-      "h\u0332": "\u03E7"
-      "Hy\u0301": "\u038E\u0314"
-      "Hy": "\u1F59"
-      "hy": "\u1F51"
-      "Iu": "\u0399\u03C5"
-      "iu": "\u03B9\u03C5"
-      "I\u0301": "\u038A"
-      "i\u0301": "\u03AF"
-      "I\u0308": "\u03AA"
-      "i\u0308\u0301": "\u0390"
-      "i\u0308": "\u03CA"
-      "J": "\u037F"
-      "j": "\u03F3"
-      "K\u0323y": "\u03EC"
-      "k\u0323y": "\u03ED"
-      "K\u0332": "\u03DE"
-      "k\u0332": "\u03DF"
-      "K\u0326": "\u03CF"
-      "k\u0326": "\u03D7"
-      "k\u0332": "\u03F0"
-      "L": "\u039B"
-      "l": "\u03BB"
-      "M": "\u039C"
-      "m": "\u03BC"
-      "nch": "\u03B3\u03C7"
-      "ng": "\u03B3\u03B3"
-      "%nk%": "\u03B3\u03BA"
-      "nx": "\u03B3\u03BE"
-      "No\u0332": "\u2116"
-      "N": "\u039D"
-      "n": "\u03BD"
-      "K": "\u039A"
-      "k": "\u03BA"
-      "G": "\u0393"
-      "g": "\u03B3"
-      "Ou": "\u039F\u03C5"
-      "ou": "\u03BF\u03C5"
-      "O\u0301": "\u038C"
-      "o\u0301": "\u03CC"
-      "\u014C\u0301": "\u038F"
-      "\u014D\u0301": "\u03CE"
-      "\u014C": "\u03A9"
-      "\u014Cu": "\u03A9\u03C5"
-      "\u014D": "\u03C9"
-      "\u014Du": "\u03D9\u03C5"
-      "O": "\u039F"
-      "o": "\u03BF"
-      "Ph": "\u03A6"
-      "ph": "\u03C6"
-      "Ps": "\u03A8"
-      "ps": "\u03C8"
-      "p\u0333h\u0333": "\u03D5"
-      "p\u0333": "\u03D6"
-      "P": "\u03A0"
-      "p": "\u03C0"
-      "Ḳ": "\u03D8"
-      "ḳ": "\u03D9"
-      "Rh": "\u1FEC"
-      "rh": "\u1FE5"
-      "r\u0332": "\u03F1"
-      "r\u0333": "\u03FC"
-      "R": "\u03A1"
-      "r": "\u03C1"
-      "S\uFE20\u0332S\uFE21\u0332": "\u0372"
-      "s\uFE20\u0332s\uFE21\u0332": "\u0373"
-      "S\uFE20H\uFE21": "\u03F7"
-      "s\uFE20h\uFE21": "\u03F8"
-      "S\uFE20S\uFE21": "\u03E1"
-      "s\uFE20s\uFE21": "\u03E0"
-      "S\u030C": "\u03E2"
-      "s\u030C": "\u03E3"
-      "S\u0323": "\u03FA"
-      "s\u0323": "\u03FB"
-      "S": "\u03F9"
-      "s": "\u03F2"
-      "S": "\u03A3"
-      "%s": "\u03C2"
-      "s": "\u03C3"
-      "T\u0333H\u0333": "\u03F4"
-      "t\u0333h\u0333": "\u03D1"
-      "Th": "\u0398"
-      "th": "\u03B8"
-      "T\u0323i": "\u03EE"
-      "t\u0323i": "\u03EF"
-      "T": "\u03A4"
-      "t": "\u03C4"
-      "I": "\u0399"
-      "i": "\u03B9"
-      "\u0020\u0301": "\u0384"
-      "\u0020\u0308\u0301": "\u0385"
-      ";\u0333": "\u0387"
-      "\u02B9": "\u0374"
-      "\u0326": "\u0375"
-      "\u0328": "\u037A"
-      "V": "\u0392"
-      "v": "\u03B2"
-      "W\u0323": "\u0376"
-      "w\u0323": "\u0377"
-      "X": "\u039E"
-      "x": "\u03BE"
-      "Y\u0301\u0333": "\u03D3"
-      "Y\u0301": "\u038E"
-      "y\u0301": "\u03CD"
-      "Y\u0308\u0333": "\u03D4"
-      "y\u0308\u0301": "\u03B0"
-      "Y\u0308": "\u03AB"
-      "y\u0308": "\u03CB"
-      "Y\u0333": "\u03D2"
-      "Y": "\u03A5"
-      "Ui": "\u03A5\u03B9"
-      "Hui": "\u03A5\u1F31"
-      "y": "\u03C5"
-      "ui": "\u03C5\u03B9"
-      "hui": "\u03C5\u1F31"
-      "Z": "\u0396"
-      "z": "\u03B6"
+  map:
+    ".)\u0333": "\u03FF"
+    ".)": "\u037D"
+    "?\u0333": "\u037E"
+    "?": "\u037E"
+    "\"\u0332": "\u201C"
+    "\"\u0333": "\u201D"
+    "'\u0332": "\u2018"
+    "'\u0333": "\u2019"
+    "(.\u0333": "\u03FE"
+    "(.": "\u037C"
+    "|)\u0333": "\u03FD"
+    "|)": "\u037B"
+    # Left pointing double angle quotation mark
+    "\u003C\u003C": "\u00AB"
+    # Right pointing double angle quotation mark
+    "\u003E\u003E": "\u00BB"
+    "6\u0332": "\u03DB"
+    "6\u0333": "\u03DA"
+    "Au": "\u0391\u03C5"
+    "au": "\u03B1\u03C5"
+    "a\u0301": "\u03AC"
+    "B": "\u0392"
+    "b": "\u03B2"
+    "b\u0333": "\u03D0"
+    "Ch": "\u03A7"
+    "ch": "\u03C7"
+    "c\u030C": "\u03EB"
+    "\u1E0E": "\u039D\u03C4"
+    "\u1E0F": "\u03BD\u03C4"
+    "D": "\u0394"
+    "d": "\u03B4"
+    "Eu": "\u0395\u03C5"
+    "eu": "\u03B5\u03C5"
+    "E\u0301": "\u0388"
+    "e\u0301": "\u03AD"
+    "\u0113\u0301": "\u03AE"
+    # "\u0112\u0301": "\u0389\u0314"
+    "\u0112\u0301": "\u0389"
+    "\u0112": "\u0397"
+    "\u0112u": "\u0397\u03C5"
+    "\u0113": "\u03B7"
+    "\u0113u": "\u03B7\u03C5"
+    "e\u0332": "\u03F5"
+    "e\u0333": "\u03F6"
+    "F": "\u03E4"
+    "f": "\u03E5"
+    # "G\u0332": "\u03DC"
+    "W": "\u03DC"
+    # "g\u0332": "\u03DD"
+    "w": "\u03DD"
+    "Ha\u0301": "\u0386\u0314"
+    "ha\u0301": "\u03AC\u0314"
+    "Ha": "\u1F09"
+    "ha": "\u03B1\u0314"
+    "A": "\u0391"
+    "a": "\u03B1"
+    "h\u0113\u0301": "\u03AE\u0314"
+    "He\u0301": "\u0388\u0314"
+    "he\u0301": "\u03AD\u0314"
+    "H\u0113": "\u1F29"
+    "H\u0113u": "\u1F29\u03C5"
+    "h\u0113": "\u1F21"
+    "h\u0113u": "\u03B7\u1F51"
+    # "h\u0113u": "\u1F21\u13C5"  # FIXME this looks wrong.
+    "He": "\u1F19"
+    "he": "\u1F11"
+    "E": "\u0395"
+    "e": "\u03B5"
+    "Hi\u0301": "\u038A\u0314"
+    "hi\u0301": "\u03AF\u0314"
+    "Hi": "\u1F39"
+    "hi": "\u1F31"
+    # "Ho\u0301": "\u038F\u0314"
+    "Ho\u0301": "\u038C\u0314"
+    "h\u014D": "\u1F61"
+    "H\u014D": "\u1F69"
+    "Ho": "\u1F49"
+    "ho": "\u1F41"
+    "H\u0307": "\u03E8"
+    "h\u0307": "\u03E9"
+    "H\u0323": "\u0370"
+    "h\u0323": "\u0371"
+    "H\u0332": "\u03E6"
+    "h\u0332": "\u03E7"
+    "Hy\u0301": "\u038E\u0314"
+    "Hy": "\u1F59"
+    "hy": "\u1F51"
+    "Iu": "\u0399\u03C5"
+    "iu": "\u03B9\u03C5"
+    "I\u0301": "\u038A"
+    "i\u0301": "\u03AF"
+    "I\u0308": "\u03AA"
+    "i\u0308\u0301": "\u0390"
+    "i\u0308": "\u03CA"
+    "J": "\u037F"
+    "j": "\u03F3"
+    "K\u0323y": "\u03EC"
+    "k\u0323y": "\u03ED"
+    "K\u0326": "\u03CF"
+    "k\u0326": "\u03D7"
+    "K\u0332": "\u03DE"
+    # "k\u0332": "\u03DF"  # FIXME ambiguous.
+    "k\u0332": "\u03F0"
+    "L": "\u039B"
+    "l": "\u03BB"
+    "M": "\u039C"
+    "m": "\u03BC"
+    "nch": "\u03B3\u03C7"
+    "ng": "\u03B3\u03B3"
+    "%nk%": "\u03B3\u03BA"
+    "nx": "\u03B3\u03BE"
+    "No\u0332": "\u2116"
+    "N": "\u039D"
+    "n": "\u03BD"
+    "K": "\u039A"
+    "k": "\u03BA"
+    "G": "\u0393"
+    "g": "\u03B3"
+    "Ou": "\u039F\u03C5"
+    "ou": "\u03BF\u03C5"
+    "O\u0301": "\u038C"
+    "o\u0301": "\u03CC"
+    "\u014C\u0301": "\u038F"
+    "\u014D\u0301": "\u03CE"
+    "\u014C": "\u03A9"
+    "\u014Cu": "\u03A9\u03C5"
+    "\u014D": "\u03C9"
+    "\u014Du": "\u03D9\u03C5"
+    "O": "\u039F"
+    "o": "\u03BF"
+    "Ph": "\u03A6"
+    "ph": "\u03C6"
+    "Ps": "\u03A8"
+    "ps": "\u03C8"
+    "p\u0333h\u0333": "\u03D5"
+    "p\u0333": "\u03D6"
+    "P": "\u03A0"
+    "p": "\u03C0"
+    "Ḳ": "\u03D8"
+    "ḳ": "\u03D9"
+    "Rh": "\u1FEC"
+    "rh": "\u1FE5"
+    "r\u0332": "\u03F1"
+    "r\u0333": "\u03FC"
+    "R": "\u03A1"
+    "r": "\u03C1"
+    "S\uFE20\u0332S\uFE21\u0332": "\u0372"
+    "s\uFE20\u0332s\uFE21\u0332": "\u0373"
+    "S\uFE20H\uFE21": "\u03F7"
+    "s\uFE20h\uFE21": "\u03F8"
+    "S\uFE20S\uFE21": "\u03E1"
+    "s\uFE20s\uFE21": "\u03E0"
+    "S\u030C": "\u03E2"
+    "s\u030C": "\u03E3"
+    "S\u0323": "\u03FA"
+    "s\u0323": "\u03FB"
+    # "S": "\u03F9"  # FIXME ambiguous.
+    "S": "\u03A3"
+    # "s": "\u03F2"  # FIXME ambiguous.
+    "%s": "\u03C2"
+    "s": "\u03C3"
+    "T\u0333H\u0333": "\u03F4"
+    "t\u0333h\u0333": "\u03D1"
+    "Th": "\u0398"
+    "th": "\u03B8"
+    "T\u0323i": "\u03EE"
+    "t\u0323i": "\u03EF"
+    "T": "\u03A4"
+    "t": "\u03C4"
+    "I": "\u0399"
+    "i": "\u03B9"
+    "\u0020\u0301": "\u0384"
+    "\u0020\u0308\u0301": "\u0385"
+    ";\u0333": "\u0387"
+    "\u02B9": "\u0374"
+    "\u0326": "\u0375"
+    "\u0328": "\u037A"
+    "V": "\u0392"
+    "v": "\u03B2"
+    "W\u0323": "\u0376"
+    "w\u0323": "\u0377"
+    "X": "\u039E"
+    "x": "\u03BE"
+    "Y\u0301\u0333": "\u03D3"
+    "Y\u0301": "\u038E"
+    "y\u0301": "\u03CD"
+    "Y\u0308\u0333": "\u03D4"
+    "y\u0308\u0301": "\u03B0"
+    "Y\u0308": "\u03AB"
+    "y\u0308": "\u03CB"
+    "Y\u0333": "\u03D2"
+    "Y": "\u03A5"
+    "Ui": "\u03A5\u03B9"
+    "Hui": "\u03A5\u1F31"
+    "y": "\u03C5"
+    "ui": "\u03C5\u03B9"
+    "hui": "\u03C5\u1F31"
+    "Z": "\u0396"
+    "z": "\u03B6"

+ 18 - 0
scriptshifter/tables/data/greek_modern.yml

@@ -12,3 +12,21 @@ roman_to_script:
   map:
     "V": "\u0392"
     "v": "\u03B2"
+    "Ha": "\u0391"
+    "ha": "\u03B1"
+    "He": "\u0395"
+    "he": "\u03B5"
+    "H\u0113": "\u0397"
+    "h\u0113": "\u03B7"
+    "Hi": "\u0399"
+    "hi": "\u03B9"
+    "Ho": "\u039F"
+    "ho": "\u03BF"
+    "Hou": "\u039F\u03C5"
+    "hou": "\u03BF\u03C5"
+    "H\u014D": "\u03A9"
+    "h\u014D": "\u03C9"
+    "Hy": "\u03A5"
+    "Hui": "\u03A5\u03B9"
+    "hy": "\u03C5"
+    "hui": "\u03C5\u03B9"

+ 1 - 1
scriptshifter/tables/data/kalmyk_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kalmyk (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/kara-kalpak_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kara-Kalpak (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/karachay-balkar_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Karachay-Balkar (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/karelian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Karelian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/kazakh_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kazakh (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/khakass_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Khakass (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/khanty_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Khanty (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/komi_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Komi (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/koryak_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Koryak (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/kyrgyz_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kyrgyz (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/lithuanian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Lithuanian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 62 - 16
scriptshifter/tables/data/macedonian.yml

@@ -1,50 +1,99 @@
 general:
   name: Macedonian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:
     "G\u0301": "\u0403"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F4": "\u0403"
     "G": "\u0413"
     "g\u0301": "\u0453"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F5": "\u0453"
     "g": "\u0433"
     "\u0110": "\u0402"
-    # this conversion shouldn't be needed, but does no harm
-    "DZ\u030C": "\u040F"
-    # this conversion shouldn't be needed, but does no harm
-    "DZ": "\u0405"
-    "Dz\u030C": "\u040F"
-    "Dz": "\u0405"
+    "D\uFE20Z\u030C\uFE21": "\u040F"
+    "D\uFE20z\u030C\uFE21": "\u040F"
+    "d\uFE20Z\u030C\uFE21": "\u040F"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C4": "\u040F"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C5": "\u040F"
+    "d\uFE20z\u030C\uFE21": "\u045F"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C6": "\u045F"
+    "D\uFE20Z\uFE21": "\u0405"
+    "D\uFE20z\uFE21": "\u0405"
+    "d\uFE20Z\uFE21": "\u0405"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F1": "\u0405"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F2": "\u0405"
+    "d\uFE20z\uFE21": "\u0455"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F3": "\u0455"
     "\u0111": "\u0452"
     "dz\u030C": "\u045F"
     "dz": "\u0455"
     "Z\u030C": "\u0416"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u017D": "\u0416"
     "z\u030C": "\u0436"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u017E": "\u0436"
     "z": "\u0437"
     "I": "\u0418"
     "i": "\u0438"
     "J": "\u0408"
     "j": "\u0458"
     "K\u0301": "\u040C"
-    "H": "\u0425"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u1E30": "\u040C"
     "k\u0301": "\u045C"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u1E31": "\u045C"
+    "H": "\u0425"
     "h": "\u0445"
-    # this conversion shouldn't be needed, but does no harm
     "LJ": "\u0409"
     "Lj": "\u0409"
+    "lJ": "\u0409"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C7": "\u0409"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C8": "\u0409"
     "lj": "\u0459"
-    # this conversion shouldn't be needed, but does no harm
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C9": "\u0459"
     "NJ": "\u040A"
     "Nj": "\u040A"
+    "nJ": "\u040A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01CA": "\u040A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01CB": "\u040A"
     "nj": "\u045A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01CC": "\u045A"
     "S\u030C": "\u0428"
+    "\u0160": "\u0428"
     "s\u030C": "\u0448"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u0161": "\u0448"
     "C\u0301": "\u040B"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u0106": "\u040B"
     "C\u030C": "\u0427"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u010C": "\u0427"
     "C": "\u0426"
     "c\u0301": "\u045B"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u0107": "\u045B"
     "c\u030C": "\u0447"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u010D": "\u0447"
     "c": "\u0446"
 
 script_to_roman:
@@ -57,8 +106,8 @@ script_to_roman:
     "\u0452": "\u0111"
     "\u0416": "Z\u030C"
     "\u0436": "z\u030C"
-    "\u0405": "Dz"
-    "\u0455": "dz"
+    "\u0405": "D\uFE20Z\uFE21"
+    "\u0455": "d\uFE20z\uFE21"
     "\u0418": "I"
     "\u0438": "i"
     "\u0408": "J"
@@ -79,9 +128,6 @@ script_to_roman:
     "\u0446": "c"
     "\u0427": "C\u030C"
     "\u0447": "c\u030C"
-    "\u040F": "Dz\u030C"
-    "\u045F": "dz\u030C"
-    "\u1029": "D\uFE20Z\uFE21"
-    "\u0455": "d\uFE20z\uFE21"
     "\u040F": "D\uFE20Z\u030C\uFE21"
     "\u045F": "d\uFE20z\u030C\uFE21"
+

+ 191 - 0
scriptshifter/tables/data/manchu.yml

@@ -0,0 +1,191 @@
+---
+general:
+  name: Manchu
+  parents:
+    - _ignore_base
+  case_sensitive: false
+  
+roman_to_script:
+
+  map:
+    # Generates Narrow No-Break Space
+    "\u002D": "\u202F"
+    "Ai": "\u1820\u1873"
+    "ai": "\u1820\u1873"
+    "A": "\u1820"
+    "a": "\u1820"
+    "E": "\u185D"
+    "e": "\u185D"
+    "O": "\u1823"
+    "o": "\u1823"
+    "U\u0304": "\u1861"
+    "u\u0304": "\u1861"
+    "U\u0308": "\u1861"
+    "u\u0308": "\u1861"
+    "U": "\u1860"
+    "u": "\u1860"
+    "I": "\u1873"
+    "i": "\u1873"
+    "B": "\u182A"
+    "b": "\u182A"
+    "Cy": "\u1871\u1873"
+    "cy": "\u1871\u1873"
+    "C": "\u1834"
+    "c": "\u1834"
+    "DZ": "\u186F"
+    "Dz": "\u186F"
+    "dz": "\u186F"
+    "D": "\u1869"
+    "d": "\u1869"
+    "Fa": "\u1876\u1820"
+    "fa": "\u1876\u1820"
+    "Fe": "\u1876\u1850"
+    "fe": "\u1876\u1850"
+    "Fi": "\u1838\u185E"
+    "fi": "\u1838\u185E"
+    "Fo": "\u1838\u1823"
+    "fo": "\u1838\u1823"
+    "Fu\u0304": "\u1838\u1861"
+    "fu\u0304": "\u1838\u1861"
+    "Fu\u0308": "\u1838\u1861"
+    "fu\u0308": "\u1838\u1861"
+    "Fu": "\u1838\u1860"
+    "fu": "\u1838\u1860"
+    "F": "\u1838"
+    "f": "\u1838"
+    "G\u0027": "\u186C"
+    "g\u0027": "\u186C"
+    "G": "\u1864"
+    "g": "\u1864"
+    "H\u0027": "\u186D"
+    "h\u0027": "\u186D"
+    "H": "\u1865"
+    "h": "\u1865"
+    "Jy": "\u1877\u1873"
+    "jy": "\u1872\u1873"
+    "J": "\u1835"
+    "j": "\u1835"
+    "K\u0027": "\u183A"
+    "k\u0027": "\u183A"
+    "K": "\u1874"
+    "k": "\u1874"
+    "L": "\u182F"
+    "l": "\u182F"
+    "M": "\u182E"
+    "m": "\u182E"
+    "NG": "\u1829"
+    "ng": "\u1829"
+    "N": "\u1828"
+    "n": "\u1828"
+    "P": "\u1866"
+    "p": "\u1866"
+    "R": "\u1875"
+    "r": "\u1875"
+    "Sy": "\u186E\u185F"
+    "sy": "\u186E\u185F"
+    "S\u030C": "\u1867"
+    "s\u030C": "\u1867"
+    "S": "\u1830"
+    "s": "\u1830"
+    "TS": "\u186E"
+    "Ts": "\u186E"
+    "ts": "\u186E"
+    "T": "\u1868"
+    "t": "\u1868"
+    "W": "\u1838"
+    "w": "\u1838"
+    "Y": "\u1836"
+    "y": "\u1836"
+    "ZH": "\u1877"
+    "Zh": "\u1877"
+    "zh": "\u1877"
+    "Z\u030C": "\u1870"
+    "z\u030C": "\u1870"
+    "Z": "\u1853"
+    "z": "\u1853"
+    "...": "\u1801"
+    "..": "\u1803"
+    ".": "\u180A"
+    ",": "\u1802"
+    ":": "\u1804"
+    # Left pointing double angle quotation mark
+    "\u003C\u003C": "\u300A"
+    # Right pointing double angle quotation mark
+    "\u003E\u003E": "\u300B"
+    "0": "\u1810"
+    "1": "\u1811"
+    "2": "\u1812"
+    "3": "\u1813"
+    "4": "\u1814"
+    "5": "\u1815"
+    "6": "\u1816"
+    "7": "\u1817"
+    "8": "\u1818"
+    "9": "\u1819"
+    
+script_to_roman:
+
+  map:
+    # Generates Narrow No-Break Space
+    "\u202F": "\u002D"
+    "\u1801": "..."
+    "\u1802": ","
+    "\u1803": ".."
+    "\u1804": ":"
+    "\u180A": "."
+    "\u180E": "-"
+    "\u1810": "0"
+    "\u1811": "1"
+    "\u1812": "2"
+    "\u1813": "3"
+    "\u1814": "4"
+    "\u1815": "5"
+    "\u1816": "6"
+    "\u1817": "7"
+    "\u1818": "8"
+    "\u1819": "9"
+    "\u1820": "a"
+    "\u1822\u1822": "i"
+    "\u1822": "i"
+    "\u1823": "o"
+    "\u1828": "n"
+    "\u1829": "ng"
+    "\u182A": "b"
+    "\u182E": "m"
+    "\u182F": "l"
+    "\u1830": "s"
+    "\u1834": "c"
+    "\u1835": "j"
+    "\u1836": "y"
+    "\u1838": "w"
+    "\u183A": "k\u0027"
+    "\u1853": "z"
+    "\u1856": "v"
+    "\u185D": "e"
+    "\u1860": "u"
+    "\u1861": "u\u0304"
+    "\u1862": "ng"
+    "\u1864": "g"
+    "\u1865": "h"
+    "\u1866": "p"
+    "\u1867": "s\u030C"
+    "\u1868": "t"
+    "\u1869": "d"
+    "\u186A": "j"
+    "\u186C": "g\u0027"
+    "\u186D": "h\u0027"
+    "\u186E\u185F": "sy"
+    "\u186E": "ts"
+    "\u186F": "dz"
+    "\u1870": "z\u030C"
+    "\u1871\u1873": "cy"
+    "\u1877\u1873": "jy"
+    "\u1873": "i"
+    "\u1874": "k"
+    "\u1875": "r"
+    "\u1876": "f"
+    "\u202F": "\u002D"
+    # Left pointing double angle quotation mark
+    "\u300A": "\u003C\u003C"
+    # Right pointing double angle quotation mark
+    "\u300B": " \u003E\u003E"

+ 1 - 1
scriptshifter/tables/data/mansi_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mansi (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/mari_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mari (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/moldovan_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Moldovan (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/mongolian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mongolian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/mordvin_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mordvin (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/nenets_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Nenets (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/ossetic_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Ossetic (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/romani_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Romani (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/russian.yml

@@ -1,7 +1,7 @@
 general:
   name: Russian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/serbian.yml

@@ -1,7 +1,7 @@
 general:
   name: Serbian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/shor_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Shor (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/syriac_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Syriac (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tajik_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tajik (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tatar-kryashen_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tatar-Kryashen (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tatar_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tatar (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 275 - 0
scriptshifter/tables/data/tod_mongolian.yml

@@ -0,0 +1,275 @@
+---
+general:
+  name: Tod (Mongolian)
+  parents:
+    - _ignore_base
+  case_sensitive: false
+  
+roman_to_script:
+
+  map:
+    # Generates Narrow No-Break Space
+    "\u002Daca": "\u202F\u1820\u1854\u1820"
+    "\u002DA": "\u180E\u1820"
+    "\u002Da": "\u180E\u1820"
+    "A": "\u1820"
+    "a": "\u1820"
+    "\u002Dece": "\u202F\u1844\u1854\u1844"
+    "\u002DE": "\u180E\u1844"
+    "\u002De": "\u180E\u1844"
+    # Generates Narrow No-Break Space
+    "\u002D": "\u202F"
+    "E": "\u1844"
+    "e": "\u1844"
+    "I": "\u1845"
+    "i": "\u1845"
+    "O\u0308": "\u1848"
+    "o\u0308": "\u1848"
+    "O": "\u1846"
+    "o": "\u1846"
+    "U\u0308": "\u1849"
+    "u\u0308": "\u1849"
+    "U": "\u1847"
+    "u": "\u1847"
+    # n followed by a g with dot
+    "ng\u0307": "\u1828\u184E"
+    # this conversion shouldn't be needed, but does no harm
+    "nG": "\u184A"
+    "ng": "\u184A"
+    "N\u0303": "\u185B"
+    "n\u0303": "\u185B"
+    "N": "\u1828"
+    "n": "\u1828"
+    "B": "\u184B"
+    "b": "\u184B"
+    "P": "\u184C"
+    "p": "\u184C"
+    "Q": "\u184E"
+    "q": "\u184E"
+    "KH": "\u183B"
+    "Kh": "\u183B"
+    # this conversion shouldn't be needed, but does no harm
+    "kH": "\u183B"
+    "kh": "\u183B"
+    "K\u0307": "\u1857"
+    "k\u0307": "\u1857"
+    "Ka": "\u1857\u1820"
+    "ka": "\u1857\u1820"
+    "Ke": "\u184D\u1844"
+    "ke": "\u184D\u1844"
+    "Ki": "\u184D\u1845"
+    "ki": "\u184D\u1845"
+    "Ko\u0308": "\u184D\u1848"
+    "ko\u0308": "\u184D\u1848"
+    "Ko": "\u1857\u1846"
+    "ko": "\u1857\u1846"
+    "Ku\u0308": "\u184D\u1849"
+    "ku\u0308": "\u184D\u1849"
+    "Ku": "\u1857\u1847"
+    "ku": "\u1857\u1847"
+    "K": "\u1857"
+    "k": "\u1857"
+    "G\u0307": "\u184E"
+    "g\u0307": "\u184E"
+    "G": "\u184E"
+    "g": "\u184E"
+    "M": "\u184F"
+    "m": "\u184F"
+    "LH": "\u1840"
+    "Lh": "\u1840"
+    # this conversion shouldn't be needed, but does no harm
+    "lH": "\u1840"
+    "lh": "\u1840"
+    "L": "\u182F"
+    "l": "\u182F"
+    "TS\u0307": "\u1854"
+    # this conversion shouldn't be needed, but does no harm
+    "Ts\u0307": "\u1854"
+    # this conversion shouldn't be needed, but does no harm
+    "tS\u0307": "\u1854"
+    "ts\u0307": "\u1854"
+    "S\u0301": "\u1831"
+    "s\u0301": "\u1831"
+    "S": "\u1830"
+    "s": "\u1830"
+    "T": "\u1850"
+    "t": "\u1850"
+    "D": "\u1851"
+    "d": "\u1851"
+    "J\u0301": "\u185A"
+    "j\u0301": "\u185A"
+    "J": "\u1853"
+    "j": "\u1853"
+    "Y": "\u1855"
+    "y": "\u1855"
+    "V": "\u1856"
+    "v": "\u1856"
+    "W": "\u1856"
+    "w": "\u1856"
+    "F": "\u1839"
+    "f": "\u1839"
+    "Xa": "\u184D\u1820"
+    "xa": "\u184D\u1820"
+    "Xe": "\u184D\u1844"
+    "xe": "\u184D\u1844"
+    "Xi": "\u184D\u1845"
+    "xi": "\u184D\u1845"
+    "Xo\u0308": "\u184D\u1848"
+    "xo\u0308": "\u184D\u1848"
+    "Xo": "\u184D\u1846"
+    "xo": "\u184D\u1846"
+    "Xu\u0308": "\u184D\u1849"
+    "xu\u0308": "\u184D\u1849"
+    "Xu": "\u184D\u1847"
+    "xu": "\u184D\u1847"
+    "X": "\u184D"
+    "x": "\u184D"
+    "Z\u0301": "\u183F"
+    "z\u0301": "\u183F"
+    "ZR": "\u183F"
+    # this conversion shouldn't be needed, but does no harm
+    "Zr": "\u183F"
+    # this conversion shouldn't be needed, but does no harm
+    "zR": "\u183F"
+    "zr": "\u183F"
+    "R": "\u1837"
+    "r": "\u1837"
+    "ZH": "\u1841"
+    "Zh": "\u1841"
+    # this conversion shouldn't be needed, but does no harm
+    "zH": "\u1841"
+    "zh": "\u1841"
+    "CH": "\u1842"
+    "Ch": "\u1842"
+    # this conversion shouldn't be needed, but does no harm
+    "cH": "\u1842"
+    "ch": "\u1842"
+    "C\u0307": "\u1878"
+    "c\u0307": "\u1878"
+    "C\u0301": "\u183C"
+    "c\u0301": "\u183C"
+    "C": "\u1852"
+    "c": "\u1852"
+    "H": "\u183E"
+    "h": "\u183E"
+    "Z": "\u1834"
+    "z": "\u1834"
+    "...": "\u1801"
+    "..": "\u1803"
+    ".": "\u180A"
+    ",": "\u1802"
+    ":": "\u1804"
+    # Left pointing double angle quotation mark
+    "\u003C\u003C": "\u300A"
+    # Right pointing double angle quotation mark
+    "\u003E\u003E": "\u300B"
+    "0": "\u1810"
+    "1": "\u1811"
+    "2": "\u1812"
+    "3": "\u1813"
+    "4": "\u1814"
+    "5": "\u1815"
+    "6": "\u1816"
+    "7": "\u1817"
+    "8": "\u1818"
+    "9": "\u1819"
+    "\u0304": "\u1843"
+
+script_to_roman:
+
+  map:
+    "\u184E\u1820": "g\u0307a"
+    "\u184E\u1846": "g\u0307o"
+    "\u184E\u1847": "g\u0307u"
+    "\u1820\u184E": "aq"
+    "\u1846\u184E": "oq"
+    "\u1847\u184E": "uq"
+    "\u184E\u1844": "ge"
+    "\u184E\u1845": "gi"
+    "\u184E\u1848": "go\u0308"
+    "\u184E\u1849": "gu\u0308"
+    "\u1844\u184E": "eq"
+    "\u1845\u184E": "iq"
+    "\u1848\u184E": "o\u0308q"
+    "\u1849\u184E": "u\u0308q"
+    "\u184D\u1820": "xa"
+    "\u184D\u1846": "xo"
+    "\u184D\u1847": "xu"
+    "\u184D\u1844": "ke"
+    "\u184D\u1845": "ki"
+    "\u184D\u1848": "ko\u0308"
+    "\u184D\u1849": "ku\u0308"
+    "\u180E\u1820": "\u002Da"
+    "\u180E\u1844": "\u002De"
+    "\u180E\u1845": "U002Di"
+    "\u180E": "\u002D"
+    "\u202F": "\u002D"
+    "\u1801": "..."
+    "\u1803": "."
+    "\u1802": ","
+    "\u1804": ":"
+    "\u1810": "0"
+    "\u1811": "1"
+    "\u1812": "2"
+    "\u1813": "3"
+    "\u1814": "4"
+    "\u1815": "5"
+    "\u1816": "6"
+    "\u1817": "7"
+    "\u1818": "8"
+    "\u1819": "9"
+    "\u1820": "a"
+    "\u1828": "n"
+    "\u182F": "l"
+    "\u1830": "s"
+    "\u1831": "s\u0301"
+    "\u1834": "z"
+    "\u1837": "r"
+    "\u1839": "f"
+    "\u183C": "c\u0301"
+    "\u183E": "h"
+    "\u183F": "z\u0301"
+    "\u1843": "\u0304"
+    "\u1844": "e"
+    "\u1845": "i"
+    "\u1846": "o"
+    "\u1847": "u"
+    "\u1848": "o\u0308"
+    "\u1849": "u\u0308"
+    "\u184A": "ng"
+    "\u184B": "b"
+    "\u184C": "p"
+    "\u184D\u1820": "xa"
+    "\u184D\u1844": "xe"
+    "\u184D\u1845": "xi"
+    "\u184D\u1848": "xo\u0308"
+    "\u184D\u1846": "xo"
+    "\u184D\u1849": "xu\u0308"
+    "\u184D\u1847": "xu"
+    "\u184D": "q"
+    "\u184E": "g"
+    "\u184F": "m"
+    "\u1850": "t"
+    "\u1851": "d"
+    "\u1852": "c"
+    "\u1853": "j"
+    "\u1854": "ts"
+    "\u1855": "y"
+    "\u1856": "v"
+    "\u1857\u1820": "ka"
+    "\u1857\u1844": "ke"
+    "\u1857\u1845": "ki"
+    "\u1857\u1846": "ko"
+    "\u1857\u1847": "ku"
+    "\u1857\u1848": "ko\u0308"
+    "\u1857\u1849": "ku\u0308"
+    "\u1857": "k"
+    "\u1858": "g"
+    "\u1859": "h"
+    "\u185A": "j\u0301"
+    "\u185B": "k\u0307"
+    "\u185C": "j"
+    "\u00AB": "\u003C\u003C"
+    "\u00BB": "\u003E\u003E"
+    "\u300A": "\u0022"
+    "\u300B": "\u0022"

+ 1 - 1
scriptshifter/tables/data/turkmen_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Turkmen (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tuvinian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tuvinian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/udmurt_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Udmurt (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/uighur_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Uighur (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/ukrainian.yml

@@ -1,7 +1,7 @@
 general:
   name: Ukrainian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/uzbek_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Uzbek (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/yakut_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Yakut (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/yuit_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Yuit (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 16 - 0
scriptshifter/tables/index.yml

@@ -23,6 +23,22 @@ arabic:
 armenian:
   marc_code: arm
   name: Armenian
+# asian_cyrillic:
+#   description: >
+#     Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza,
+#     Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat,
+#     Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz,
+#     Ingush, Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak,
+#     Karelian, Khakass, Khanty, Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp,
+#     Lezghian, Lithuanian, Mansi, Mari, Moldovan, Molodstov, Mordvin, Nanai,
+#     Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, Selkup, Shor,
+#     Tabasaran, Tat, Tuva, Udekhe, Udmurt, Yakut.
+#   marc_code: >
+#     abk, ady, alt, ava, bak, che, chv, dar, ale, esk, kbd, xal, krc, kaa,
+#     krl, kom, kum, lez, lit, chm, nog, oss, rum, rom, sel, udm, sah
+#   name: Asian Cyrillic
+assamese:
+  name: assamese
 azerbaijani_cyrillic:
   marc_code: aze
   name: Azerbaijani (Cyrillic)

+ 13 - 1
scriptshifter/trans.py

@@ -2,10 +2,11 @@ import logging
 
 from importlib import import_module
 from re import Pattern, compile
+from unicodedata import normalize as precomp_normalize
 
 from scriptshifter.exceptions import BREAK, CONT
 from scriptshifter.tables import (
-        BOW, EOW, WORD_BOUNDARY, FEAT_CASEI, FEAT_R2S, FEAT_S2R, HOOK_PKG_PATH,
+        BOW, EOW, WORD_BOUNDARY, FEAT_R2S, FEAT_S2R, HOOK_PKG_PATH,
         get_connection, get_lang_dcap, get_lang_general, get_lang_hooks,
         get_lang_ignore, get_lang_map, get_lang_normalize)
 
@@ -345,6 +346,17 @@ def _normalize_src(ctx, norm_rules):
     NOTE: this manipluates the protected source attribute so it may not
     correspond to the originally provided source.
     """
+    # Normalize precomposed Unicode characters.
+    #
+    # In using diacritics, LC standards prefer the decomposed form (combining
+    # diacritic + base character) to the pre-composed form (single Unicode
+    # symbol for the letter with diacritic).
+    #
+    # Note: only safe for R2S.
+    if ctx.t_dir == FEAT_R2S:
+        logger.debug("Normalizing pre-composed symbols.")
+        ctx._src = precomp_normalize("NFD", ctx.src)
+
     for nk, nv in norm_rules.items():
         ctx._src = ctx.src.replace(nk, nv)