ソースを参照

Merge branch 'main' into test

scossu 2 ヶ月 前
コミット
1d20276196
53 ファイル変更871 行追加965 行削除
  1. 0 133
      scriptshifter/tables/data/_cyrillic_base.yml
  2. 22 17
      scriptshifter/tables/data/_ignore_base.yml
  3. 1 1
      scriptshifter/tables/data/abkhaz_cyrillic.yml
  4. 1 1
      scriptshifter/tables/data/altai_cyrillic.yml
  5. 1 1
      scriptshifter/tables/data/azerbaijani_cyrillic.yml
  6. 1 1
      scriptshifter/tables/data/bashkir_cyrillic.yml
  7. 1 1
      scriptshifter/tables/data/belarusian.yml
  8. 18 38
      scriptshifter/tables/data/bulgarian.yml
  9. 1 2
      scriptshifter/tables/data/buriat_cyrillic.yml
  10. 1 1
      scriptshifter/tables/data/chukchi_cyrillic.yml
  11. 1 1
      scriptshifter/tables/data/church_slavonic.yml
  12. 1 1
      scriptshifter/tables/data/chuvash_cyrillic.yml
  13. 705 700
      scriptshifter/tables/data/cyrillic_generic.yml
  14. 1 1
      scriptshifter/tables/data/dungan_cyrillic.yml
  15. 1 1
      scriptshifter/tables/data/even-evenki_cyrillic.yml
  16. 1 1
      scriptshifter/tables/data/gagauz_cyrillic.yml
  17. 2 0
      scriptshifter/tables/data/greek_modern.yml
  18. 1 1
      scriptshifter/tables/data/kalmyk_cyrillic.yml
  19. 1 1
      scriptshifter/tables/data/kara-kalpak_cyrillic.yml
  20. 1 1
      scriptshifter/tables/data/karachay-balkar_cyrillic.yml
  21. 1 1
      scriptshifter/tables/data/karelian_cyrillic.yml
  22. 1 1
      scriptshifter/tables/data/kazakh_cyrillic.yml
  23. 1 1
      scriptshifter/tables/data/khakass_cyrillic.yml
  24. 1 1
      scriptshifter/tables/data/khanty_cyrillic.yml
  25. 1 1
      scriptshifter/tables/data/komi_cyrillic.yml
  26. 1 1
      scriptshifter/tables/data/koryak_cyrillic.yml
  27. 1 1
      scriptshifter/tables/data/kyrgyz_cyrillic.yml
  28. 1 1
      scriptshifter/tables/data/lithuanian_cyrillic.yml
  29. 62 16
      scriptshifter/tables/data/macedonian.yml
  30. 1 1
      scriptshifter/tables/data/mansi_cyrillic.yml
  31. 1 1
      scriptshifter/tables/data/mari_cyrillic.yml
  32. 1 1
      scriptshifter/tables/data/moldovan_cyrillic.yml
  33. 1 1
      scriptshifter/tables/data/mongolian_cyrillic.yml
  34. 1 1
      scriptshifter/tables/data/mordvin_cyrillic.yml
  35. 1 1
      scriptshifter/tables/data/nenets_cyrillic.yml
  36. 1 1
      scriptshifter/tables/data/ossetic_cyrillic.yml
  37. 1 1
      scriptshifter/tables/data/romani_cyrillic.yml
  38. 1 1
      scriptshifter/tables/data/russian.yml
  39. 1 1
      scriptshifter/tables/data/serbian.yml
  40. 1 1
      scriptshifter/tables/data/shor_cyrillic.yml
  41. 1 1
      scriptshifter/tables/data/syriac_cyrillic.yml
  42. 1 1
      scriptshifter/tables/data/tajik_cyrillic.yml
  43. 1 1
      scriptshifter/tables/data/tatar-kryashen_cyrillic.yml
  44. 1 1
      scriptshifter/tables/data/tatar_cyrillic.yml
  45. 1 1
      scriptshifter/tables/data/turkmen_cyrillic.yml
  46. 1 1
      scriptshifter/tables/data/tuvinian_cyrillic.yml
  47. 1 1
      scriptshifter/tables/data/udmurt_cyrillic.yml
  48. 1 1
      scriptshifter/tables/data/uighur_cyrillic.yml
  49. 1 1
      scriptshifter/tables/data/ukrainian.yml
  50. 1 1
      scriptshifter/tables/data/uzbek_cyrillic.yml
  51. 1 1
      scriptshifter/tables/data/yakut_cyrillic.yml
  52. 1 1
      scriptshifter/tables/data/yuit_cyrillic.yml
  53. 16 14
      scriptshifter/tables/index.yml

+ 0 - 133
scriptshifter/tables/data/_cyrillic_base.yml

@@ -1,133 +0,0 @@
-general:
-  name: Cyrillic base
-  parents:
-    - _ignore_base
-  notes: >
-    copied from Russian .cfg file and stripped
-    off language-specific tokens. Russian ignore list
-    has been left here on purpose, assuming it's valid
-    for all child languages.
-
-roman_to_script:
-  map:
-    "A": "\u0410"
-    "a": "\u0430"
-    "B": "\u0411"
-    "b": "\u0431"
-    "V": "\u0412"
-    "v": "\u0432"
-    "D": "\u0414"
-    "d": "\u0434"
-    "E": "\u0415"
-    "e": "\u0435"
-    # this conversion shouldn't be needed, but does no harm
-    "Z": "\u0417"
-    "z": "\u0437"
-    "I\u0306": "\u0419"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20U\uFE21": "\u042E"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20u\uFE21": "\u042E"
-    "I\uFE20A\uFE21": "\u042F"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20a\uFE21": "\u042F"
-    "i\u0306": "\u0439"
-    "i\uFE20u\uFE21": "\u044E"
-    "i\uFE20a\uFE21": "\u044F"
-    # this conversion shouldn't be needed, but does no harm
-    "KH": "\u0425"
-    "Kh": "\u0425"
-    "K": "\u041A"
-    "kh": "\u0445"
-    "k": "\u043A"
-    "L": "\u041B"
-    "l": "\u043B"
-    "M": "\u041C"
-    "m": "\u043C"
-    "N": "\u041D"
-    "n": "\u043D"
-    "O": "\u041E"
-    "o": "\u043E"
-    "P": "\u041F"
-    "p": "\u043F"
-    "R": "\u0420"
-    "r": "\u0440"
-    # this conversion shouldn't be needed, but does no harm
-    # this conversion shouldn't be needed, but does no harm
-    "SH": "\u0428"
-    "Sh": "\u0428"
-    "S": "\u0421"
-    "sh": "\u0448"
-    "s": "\u0441"
-    # this conversion shouldn't be needed, but does no harm
-    "T": "\u0422"
-    "t": "\u0442"
-    "U": "\u0423"
-    "u": "\u0443"
-    "F": "\u0424"
-    "f": "\u0444"
-    # this conversion shouldn't be needed, but does no harm
-    "CH": "\u0427"
-    "Ch": "\u0427"
-    "ch": "\u0447"
-    # this conversion shouldn't be needed, but does no harm
-    "\uFE20": ""
-    # this conversion shouldn't be needed, but does no harm
-    "\uFE21": ""
-    # this conversion is ambiguous - \u042C is also theoretically possible
-    "\u02B9": "\u044C"
-
-script_to_roman:
-  map:
-    "\u0404": "I\uFE20E\uFE21"
-    "\u0407": "I\u0308"
-    "\u0410": "A"
-    "\u0411": "B"
-    "\u0412": "V"
-    "\u0414": "D"
-    "\u0415": "E"
-    "\u0417": "Z"
-    "\u0419": "I\u0306"
-    "\u041A": "K"
-    "\u041B": "L"
-    "\u041C": "M"
-    "\u041D": "N"
-    "\u041E": "O"
-    "\u041F": "P"
-    "\u0420": "R"
-    "\u0421": "S"
-    "\u0422": "T"
-    "\u0423": "U"
-    "\u0424": "F"
-    "\u0425": "Kh"
-    "\u0427": "Ch"
-    "\u0428": "Sh"
-    "\u0429": "Shch"
-    "\u042C": "\u02B9"
-    "\u042E": "I\uFE20U\uFE21"
-    "\u042F": "I\uFE20A\uFE21"
-    "\u0430": "a"
-    "\u0431": "b"
-    "\u0432": "v"
-    "\u0434": "d"
-    "\u0435": "e"
-    "\u0437": "z"
-    "\u0439": "i\u0306"
-    "\u043A": "k"
-    "\u043B": "l"
-    "\u043C": "m"
-    "\u043D": "n"
-    "\u043E": "o"
-    "\u043F": "p"
-    "\u0440": "r"
-    "\u0441": "s"
-    "\u0442": "t"
-    "\u0443": "u"
-    "\u0444": "f"
-    "\u0445": "kh"
-    "\u0447": "ch"
-    "\u0448": "sh"
-    "\u0449": "shch"
-    "\u044C": "\u02B9"
-    "\u044E": "i\uFE20u\uFE21"
-    "\u044F": "i\uFE20a\uFE21"

+ 22 - 17
scriptshifter/tables/data/_ignore_base.yml

@@ -9,6 +9,12 @@ roman_to_script:
     - "date of publication not identified"
     - "place of publication not identified"
     - "publisher not identified"
+    - "and one other"
+    - "et al."
+  ignore_ptn:
+    - "and ([a-z0-9]+ )?others"
+
+    # Incorrectly entered (but frequently found) Roman numerals.
     # NOTE There is ambiguity about ignoring these
     # words. Note that the single-character Roman
     # numerals are not included on purpose.
@@ -16,25 +22,24 @@ roman_to_script:
     # dedicated U+2160÷U+216F (uppercase Roman
     # numerals) and/or U+2170÷U+217F (lower case Roman
     # numerals) ranges to avoid this ambiguity.
-    - "and one other"
-    - "et al."
-  ignore_ptn:
-    - "and ([a-z0-9]+ )?others"
-    - "I{2,3}"
-    - "I(V|X)"
-    - "LI{,3}"
-    - "LI?(V|X)"
-    - "L(V|X{1,3})I{,3}"
-    - "LX{1,3}I?V"
-    - "LX{1,3}VI{,3}"
-    - "(V|X{1,3})I{,3}"
-    - "X{1,3}I{,3}"
-    - "X{1,3}I(V|X)"
-    - "X{1,3}VI{,3}"
-    - "[\u2021$][0-9a-z] *"
+    - "\\bI{2,3}\\b"
+    - "\\bI(V|X)\\b"
+    - "\\bLI{,3}\\b"
+    - "\\bLI?(V|X)\\b"
+    - "\\bL(V|X{1,3})I{,3}\\b"
+    - "\\bLX{1,3}I?V\\b"
+    - "\\bLX{1,3}VI{,3}\\b"
+    - "\\b(V|X{1,3})I{,3}\\b"
+    - "\\bX{1,3}I{,3}\\b"
+    - "\\bX{1,3}I(V|X)\\b"
+    - "\\bX{1,3}VI{,3}\\b"
+
+    # MARC sub-field markers.
+    - "\\b[\u2021$][0-9a-z]\\b"
 
 script_to_roman:
   ignore:
     - " "
   ignore_ptn:
-    - "[\u2021$][0-9a-z] *"
+    # MARC sub-field markers.
+    - "\\b[\u2021$][0-9a-z]\\b"

+ 1 - 1
scriptshifter/tables/data/abkhaz_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Abkhaz (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/altai_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Altai (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/azerbaijani_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Azerbaijani (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/bashkir_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Bashkir (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/belarusian.yml

@@ -1,7 +1,7 @@
 general:
   name: Belarusian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 18 - 38
scriptshifter/tables/data/bulgarian.yml

@@ -1,58 +1,38 @@
 general:
   name: Bulgarian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:
-    "G": "\u0413"
-    "g": "\u0433"
-    # this conversion shouldn't be needed, but does no harm
-    "ZH": "\u0416"
-    "Zh": "\u0416"
-    "zh": "\u0436"
-    "I\uFE20E\uFE21": "\u0462"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20e\uFE21": "\u0462"
-    # this conversion shouldn't be needed, but does no harm
-    # this conversion shouldn't be needed, but does no harm
-    "I": "\u0418"
-    "i\uFE20e\uFE21": "\u0463"
-    "i": "\u0438"
-    # this conversion shouldn't be needed, but does no harm
     "SHT": "\u0429"
     "Sht": "\u0429"
     "sht": "\u0449"
-    "T\uFE20S\uFE21": "\u0426"
-    # this conversion shouldn't be needed, but does no harm
-    "T\uFE20s\uFE21": "\u0426"
-    "t\uFE20s\uFE21": "\u0446"
-    "U\u0310": "\u046A"
+    "U\u0306": "\u042A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u016C": "\u042A"
     "u\u0306": "\u044A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u016D": "\u044A"
+    "U\u0310": "\u046A"
     "u\u0310": "\u046B"
     # this conversion is ambiguous - \u042A is also theoretically possible
     "\u02BA": "\u044A"
+    # upper case hard sign is unlikely to occur
+    "\u02BA\u0332": "\u042A"
 
 script_to_roman:
   map:
-    "\u044C": ""
-    "\u042C": ""
-    "\u044A": ""
-    "\u042A%": ""  # Final
-    "\u042A": "u\u0306"
-    "\u0413": "G"
-    "\u0433": "g"
-    "\u0416": "Zh"
-    "\u0436": "zh"
-    "\u0462": "I\uFE20E\uFE21"
-    "\u0418": "I"
-    "\u0463": "i\uFE20e\uFE21"
-    "\u0438": "i"
     "\u0429": "Sht"
+    "\u042A": "U\u0306"
+    # Capital letter hard sign at the end of a word (rare)
+    "\u042A%": "\u02BA\u0332"
+    "\u042C": "\u02B9\u0332"
     "\u0449": "sht"
-    "\u0426": "T\uFE20S\uFE21"
-    "\u0446": "t\uFE20s\uFE21"
+    "\u044A": "u\u0306"
+    # Small letter hard sign at the end of a word (rare)
+    "\u044A%": "\u02BA"
+    "\u044C": "\u02B9"
     "\u046A": "U\u0310"
     "\u046B": "u\u0310"
-    "\u042A": "u\u016C"
-    "\u044A": "u\u016D"
+    

+ 1 - 2
scriptshifter/tables/data/buriat_cyrillic.yml

@@ -1,8 +1,7 @@
 general:
   name: Buriat (Cyrillic)
   parents:
-    - _cyrillic_base
-    - _ignore_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/chukchi_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Chukchi (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/church_slavonic.yml

@@ -1,7 +1,7 @@
 general:
   name: Church Slavonic
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/chuvash_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Chuvash (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 705 - 700
scriptshifter/tables/data/asian_cyrillic.yml → scriptshifter/tables/data/cyrillic_generic.yml

@@ -1,700 +1,705 @@
-general:
-  name: Asian (Cyrillic)
-  parents:
-    - _cyrillic_base
-
-# COMMON COMBINING CHARACTERS (always follow a base letter):
-# combining grave \u0300
-# combining acute \u0301
-# combining circumflex \u0302
-# combining tilde \u0303
-# combining macron \u0304
-# combining breve \u0306
-# combining dot above \u0307
-# combining diaeresis \u0308
-# combining ring above \u030A
-# combining double acute \u030B
-# combining caron (hachek) \u030C
-# combining candrabindu \u0310
-# combining dot below \u0323
-# combining dieresis below \u0324
-# combining comma below \u0326 (Romanian, Latvian, Livonian)
-# combining cedilla \u0327 (French, Turkish, Azeri)
-# combining ogonek (hook) \u0328 (Polish, Lithuanian)
-# combining low line \u0332
-# combining double low line \u0333
-# combining left ligature \uFE20 (Cyrillic transliteration)
-# combining right ligature \uFE21 (Cyrillic transliteration)
-# soft sign/prime (spacing) \u02B9(Cyrillic transliteration)
-# hard sign/double prime (spacing) \u02BA (Cyrillic transliteration)
-# ayn(spacing) \u02BB (Semitic and Caucasian languages)
-# alif (spacing) \u02BC (Semitic languages)
-# middle dot (space) \u00B7) (Catalan)
-
-roman_to_script:
-  map:
-    "A\uFE20E\uFE21": "\u04D4"
-    "A\uFE20e\uFE21": "\u04D4"
-    "a\uFE20e\uFE21": "\u04D5"
-    "A\u0306\u0323": "\u04D0"
-    "a\u0306\u0323": "\u04D1"
-    "\u00C6": "\u04D4"
-    "\u00E6": "\u04D5"
-    "A\u0306": "\u04D8"
-    "a\u0306": "\u04D9"
-    "A\u030B": "\u04DA"
-    "a\u030B": "\u04DB"
-    "A\u0308": "\u04D2"
-    "a\u0308": "\u04D3"
-    "A\u0310": "\u0518"
-    "a\u0310": "\u0519"
-    
-    "B": "\u0411"
-    "b": "\u0431"
-    
-    "C\u0301h\u0301": "\u04BE"
-    "c\u0301h\u0301": "\u04BF"
-    "C\u0301h": "\u04BC"
-    "c\u0301h": "\u04BD"
-    "C\u0301": "\u040B"
-    "c\u0301": "\u045B"
-    "C\u0308h": "\u04F4"
-    "c\u0308h": "\u04F5"
-    "C\u0323h": "\u04CB"
-    "c\u0323h": "\u04CC"
-    
-    "D\u0301": "\u0502"
-    "d\u0301": "\u0503"
-    "D\u0307": "\u0500"
-    "d\u0307": "\u0501"
-    "D\uFE20c\uFE21h": "\u052C"
-    "d\uFE20c\uFE21h": "\u052D"
-    "D\uFE20z\uFE21h": "\u052A"
-    "d\uFE20z\uFE21h": "\u052B"
-    "D\uFE20Z\uFE21": "\u04E0"
-    "d\uFE20z\uFE21": "\u04E1"
-    "Dz\u030C": "\u040F"
-    "dz\u030C": "\u045F"
-    "D": "\u0414"
-    "d": "\u0434"
-    
-    "E\u0300": "\u0400"
-    "e\u0300": "\u0450"
-    "E\u0304": "\u0404"
-    "e\u0304": "\u0454"
-    "E\u0306": "\u04D6"
-    "e\u0306": "\u04D7"
-    "E\u0306\u0323": "\u048C"
-    "e\u0306\u0323": "\u048D"
-    "E\u0307": "\u042D"
-    "e\u0307": "\u044D"
-    "E\u0308\u0323": "\u04EC"
-    "e\u0308\u0323": "\u04ED"
-    "E\u0308": "\u0401"
-    "e\u0308": "\u0451"
-    "E\u0328": "\u0466"
-    "e\u0328": "\u0467"
-    
-    "F\u0307": "\u0472"
-    "f\u0307": "\u0473"
-    "F": "\u0424"
-    "f": "\u0444"
-    
-    "Gh\u0327": "\u04FA"
-    "gh\u0327": "\u04FB"
-    "Gh": "\u0492"
-    "gh": "\u0493"
-    "G\u0301": "\u0403"
-    "g\u0301": "\u0453"
-    "G\u0306": "\u0490"
-    "g\u0306": "\u0491"
-    "G\u0307": "\u049C"
-    "g\u0307": "\u049D"
-    "G\u0323": "\u04F6"
-    "g\u0323": "\u04F7"
-    "G\u0327": "\u0494"
-    "g\u0327": "\u0495"
-    
-    "H\u0304": "\u04FE"
-    "h\u0304": "\u04FF"
-    "H\u0327": "\u04FC"
-    "h\u0327": "\u04FD"
-    "H\u0307": "\u04BA"
-    "h\u0307": "\u04BB"
-    "H\u0308": "\u04C0"
-    "h\u0308": "\u04CF"
-    
-    "I\u0300": "\u040D"
-    "i\u0300": "\u045D"
-    "I\u0304\u0323": "\u04E2"
-    "i\u0304\u0323": "\u04E3"
-    "I\u0304": "\u0406"
-    "i\u0304": "\u0456"
-    "I\u0306\u0323": "\u048A"
-    "i\u0306\u0323": "\u048B"
-    "I\u0306": "\u0419"
-    "i\u0306": "\u0439"
-    "I\u0308\u0323": "\u04E4"
-    "i\u0308\u0323": "\u04E5"
-    "I\u0308": "\u0407"
-    "i\u0308": "\u0457"
-    "I\u0310": "\u0408"
-    "i\u0310": "\u0458"
-    
-    "I\uFE20A\uFE21": "\u042F"
-    "i\uFE20a\uFE21": "\u044F"
-    "A": "\u0410"
-    "a": "\u0430"
-    
-    "I\uFE20E\uFE21\u0304": "\u0464"
-    "i\uFE20e\uFE21\u0304": "\u0465"
-    "I\uFE20E\uFE21\u0328": "\u0468"
-    "i\uFE20e\uFE21\u0328": "\u0469"
-    "I\uFE20E\uFE21": "\u0462"
-    "i\uFE20e\uFE21": "\u0463"
-    "E": "\u0415"
-    "e": "\u0435"
-    
-    "I\uFE20O\uFE21\u0328": "\u046C"
-    "i\uFE20o\uFE21\u0328": "\u046D"
-    "I\uFE20U\uFE21": "\u042E"
-    "i\uFE20u\uFE21": "\u044E"
-    "I": "\u0418"
-    "i": "\u0438"
-    
-    "J\u0304": "\u04B8"
-    "j\u0304": "\u04B9"
-    "J\u0306": "\u04C1"
-    "j\u0306": "\u04C2"
-    "J\u0302": "\u04B6"
-    "j\u0302": "\u04B7"
-    "J\u0308": "\u04DC"
-    "j\u0308": "\u04DD"
-    
-    "K\u0300": "\u051E"
-    "k\u0300": "\u051F"
-    "K\u0301": "\u040C"
-    "k\u0301": "\u045C"
-    "K\uFE20H\uFE21": "\u04B2"
-    "k\uFE20h\uFE21": "\u04B3"
-    "Kh": "\u0425"
-    "kh": "\u0445"
-    "K\uFE20S\uFE21": "\u046E"
-    "k\uFE20s\uFE21": "\u046F"
-    "K": "\u041A"
-    "k": "\u043A"
-    
-    "Lj": "\u0409"
-    "lj": "\u0459"
-    "Lkh\u0307": "\u0514"
-    "lkh\u0307": "\u0515"
-    "L\u0301": "\u0508"
-    "l\u0301": "\u0509"
-    "L\u0321": "\u04C5"
-    "l\u0326": "\u04C6"
-    "L\u0323": "\u052E"
-    "l\u0323": "\u052F"
-    "L\u0327": "\u0512"
-    "l\u0327": "\u0513"
-    "L\u0324": "\u0520"
-    "l\u0324": "\u0521"
-    "L": "\u041B"
-    "l": "\u043B"
-    
-    "M\u0323": "\u04CD"
-    "m\u0323": "\u04CE"
-    "M": "\u041C"
-    "m": "\u043C"
-    
-    "Nj": "\u040A"
-    "nj": "\u045A"
-    "N\u0301G\u0300": "\u04A4"
-    "n\u0301g\u0300": "\u04A5"
-    "N\u0301": "\u050A"
-    "n\u0301": "\u050B"
-    "N\u0326": "\u0528"
-    "n\u0326": "\u0529"
-    "N\u0327": "\u0522"
-    "n\u0327": "\u0523"
-    "N\uFE20\u0323G\uFE21": "\u04C9"
-    "n\uFE20\u0323g\uFE21": "\u04CA"
-    "N\uFE20\u0327G\uFE21": "\u04C7"
-    "n\uFE20\u0327g\uFE21": "\u04C8"
-    "N\uFE20G\uFE21": "\u04A2"
-    "n\uFE20g\uFE21": "\u04A3"
-    "No\u0332": "\u2116"
-    "N": "\u041D"
-    "n": "\u043D"
-    
-    "G": "\u0413"
-    "g": "\u0433"
-    
-    "J": "\u0496"
-    "j": "\u0497"
-    
-    "O\u0303": "\u047C"
-    "o\u0303": "\u047D"
-    "O\u0304\u0323": "\u047A"
-    "o\u0304\u0323": "\u047B"
-    "O\u0304\uFE20T\uFE21": "\u047E"
-    "o\u0304\uFE20t\uFE21": "\u047F"
-    "O\u0304\u0324": "\u0460"
-    "o\u0304\u0324": "\u0461"
-    "O\u0304": "\u04EA"
-    "o\u0304": "\u04EB"
-    "O\u0307": "\u04E8"
-    "o\u0307": "\u04E9"
-    "O\u0308": "\u04E6"
-    "o\u0308": "\u04E7"
-    "O\u0328": "\u046A"
-    "o\u0328": "\u046B"
-    "O\uFE20u\uFE21": "\u0478"
-    "o\uFE20u\uFE21": "\u0479"
-    "O": "\u041E"
-    "o": "\u043E"
-    
-    "Ph": "\u04A6"
-    "ph": "\u04A7"
-    "P\u0323": "\u0524"
-    "p\u0323": "\u0525"
-    "P\uFE20S\uFE21": "\u0470"
-    "p\uFE20s\uFE21": "\u0471"
-    "P": "\u041F"
-    "p": "\u043F"
-    
-    "Q\u0300": "\u04A0"
-    "q\u0300": "\u04A1"
-    "Q\u0302": "\u0480"
-    "q\u0302": "\u0481"
-    "Q\u0304": "\u049E"
-    "q\u0304": "\u049F"
-    "Q\u0307": "\u04C3"
-    "q\u0307": "\u04C4"
-    "Q\u0308": "\u051A"
-    "q\u0308": "\u051B"
-    "Q": "\u049A"
-    "q": "\u049B"
-    
-    "Rkh\u0307": "\u0516"
-    "rkh\u0307": "\u0517"
-    "R\u0306": "\u048E"
-    "r\u0306": "\u048F"
-    "R": "\u0420"
-    "r": "\u0440"
-    
-    "Shch": "\u0429"
-    "shch": "\u0449"
-    "Sh\u0323": "\u0526"
-    "sh\u0323": "\u0527"
-    "Sh": "\u0428"
-    "sh": "\u0448"
-    "S\u0301": "\u050C"
-    "s\u0301": "\u050D"
-    "S\u0307": "\u0405"
-    "s\u0307": "\u0455"
-    
-    "Ch": "\u0427"
-    "ch": "\u0447"
-    "C": "\u0426"
-    "c": "\u0446"
-    
-    "Th": "\u04AA"
-    "th": "\u04AB"
-    "T\u0301": "\u050E"
-    "t\u0301": "\u050F"
-    "T\u0327": "\u04AC"
-    "t\u0327": "\u04AD"
-    "T\uFE20H\uFE21": "\u0498"
-    "t\uFE20h\uFE21": "\u0499"
-    "T\uFE20S\uFE21": "\u0426"
-    "t\uFE20s\uFE21": "\u0446"
-    "T\uFE20S\uFE21\u0307": "\u04B4"
-    "t\uFE20s\uFE21\u0307": "\u04B5"
-    
-    "S": "\u0421"
-    "s": "\u0441"
-    
-    "T": "\u0422"
-    "t": "\u0442"
-    
-    "U\u0302": "\u04B0"
-    "u\u0302": "\u04B1"
-    "U\u0304": "\u04EE"
-    "u\u0304": "\u04EF"
-    "U\u0306": "\u040E"
-    "u\u0306": "\u045E"
-    "U\u0307": "\u04AE"
-    "u\u0307": "\u04AF"
-    "U\u0308": "\u04F0"
-    "u\u0308": "\u04F1"
-    "U\u030B": "\u04F2"
-    "u\u030B": "\u04F3"
-    "U": "\u0423"
-    "u": "\u0443"
-    
-    "V\u0307": "\u0474"
-    "v\u0307": "\u0475"
-    "V\u0308": "\u0476"
-    "v\u0308": "\u0477"
-    "V": "\u0412"
-    "v": "\u0432"
-    
-    "W\u0308": "\u051C"
-    "w\u0308": "\u051D"
-    "W": "\u04A8"
-    "w": "\u04A9"
-    
-    "X": "\u0058"
-    "x": "\u0078"
-    
-    "Y\u0307": "\u0474"
-    "y\u0307": "\u0475"
-    "Y\u0308": "\u04F8"
-    "y\u0308": "\u04F9"
-    "Y": "\u042B"
-    "y": "\u044B"
-    
-    "Zh": "\u0416"
-    "zh": "\u0436"
-    "Z\u0301": "\u0504"
-    "z\u0301": "\u0505"
-    "Z\u0307": "\u0510"
-    "z\u0307": "\u0511"
-    "Z\u0308": "\u04DE"
-    "z\u0308": "\u04DF"
-    "Z\u0327": "\u0506"
-    "z\u0327": "\u0507"
-    "Z": "\u0417"
-    "z": "\u0437"
-    
-    "H": "\u0413"
-    "h": "\u0433"
-    
-    "\u0110": "\u0402"
-    "\u0111": "\u0452"
-    "\u02B9\u0333": "\u042C"
-    "\u02B9": "\u044C"
-    "\u02BA\u0333": "\u042A"
-    "\u02BA": "\u044A"
-    "\u0303": "\u0487"
-    "\u0311": "\u0484"
-    "\u0313": "\u0486"
-    "\u0314": "\u0485"
-    "\u007E": "\u0483"
-    "(|)": "\u0482"
-    "(^)": "\u0488"
-    "(')": "\u0489"
-    
-    "\u003C\u003C": "\u00AB"
-    "\u003E\u003E": "\u00BB"
-
-script_to_roman:
-  map:
-    
-    "\u00AB": "\""
-    "\u00BB": "\""
-    "\u2116": "No\u0332"
-    "\u0400": "E\u0300"
-    "\u0401": "E\u0308"
-    "\u0402": "\u0110"
-    "\u0403": "G\u0301"
-    "\u0404": "E\u0304"
-    "\u0405": "S\u0307"
-    "\u0406": "I\u0304"
-    "\u0407": "I\u0308"
-    "\u0408": "I\u0310"
-    "\u0409": "Lj"
-    "\u040A": "Nj"
-    "\u040B": "C\u0301"
-    "\u040C": "K\u0301"
-    "\u040D": "I\u0300"
-    "\u040E": "U\u0306"
-    "\u040F": "Dz\u030C"
-    "\u0410": "A"
-    "\u0411": "B"
-    "\u0412": "V"
-    "\u0413": "G"
-    "\u0414": "D"
-    "\u0415": "E"
-    "\u0416": "Zh"
-    "\u0417": "Z"
-    "\u0418": "I"
-    "\u0419": "I\u0306"
-    "\u041A": "K"
-    "\u041B": "L"
-    "\u041C": "M"
-    "\u041D": "N"
-    "\u041E": "O"
-    "\u041F": "P"
-    "\u0420": "R"
-    "\u0421": "S"
-    "\u0422": "T"
-    "\u0423": "U"
-    "\u0424": "F"
-    "\u0425": "Kh"
-    "\u0426": "T\uFE20S\uFE21"
-    "\u0427": "Ch"
-    "\u0428": "Sh"
-    "\u0429": "Shch"
-    "\u042A": "\u02BA\u0333"
-    "\u042B": "Y"
-    "\u042C": "\u02B9\u0333"
-    "\u042D": "E\u0307"
-    "\u042E": "I\uFE20U\uFE21"
-    "\u042F": "I\uFE20A\uFE21"
-    "\u0430": "a"
-    "\u0431": "b"
-    "\u0432": "v"
-    "\u0433": "g"
-    "\u0434": "d"
-    "\u0435": "e"
-    "\u0436": "zh"
-    "\u0437": "z"
-    "\u0438": "i"
-    "\u0439": "i\u0306"
-    "\u043A": "k"
-    "\u043B": "l"
-    "\u043C": "m"
-    "\u043D": "n"
-    "\u043E": "o"
-    "\u043F": "p"
-    "\u0440": "r"
-    "\u0441": "s"
-    "\u0442": "t"
-    "\u0443": "u"
-    "\u0444": "f"
-    "\u0445": "kh"
-    "\u0446": "t\uFE20s\uFE21"
-    "\u0447": "ch"
-    "\u0448": "sh"
-    "\u0449": "shch"
-    "\u044A": "\u02BA"
-    "\u044B": "y"
-    "\u044C": "\u02B9"
-    "\u044D": "e\u0307"
-    "\u044E": "i\uFE20u\uFE21"
-    "\u044F": "i\uFE20a\uFE21"
-    "\u0450": "e\u0300"
-    "\u0451": "e\u0308"
-    "\u0452": "\u0111"
-    "\u0453": "g\u0301"
-    "\u0454": "e\u0304"
-    "\u0455": "s\u0307"
-    "\u0456": "i\u0304"
-    "\u0457": "i\u0308"
-    "\u0458": "i\u0310"
-    "\u0459": "lj"
-    "\u045A": "nj"
-    "\u045B": "c\u0301"
-    "\u045C": "k\u0301"
-    "\u045D": "i\u0300"
-    "\u045E": "u\u0306"
-    "\u045F": "dz\u030C"
-    "\u0460": "O\u0304\u0324"
-    "\u0461": "o\u0304\u0324"
-    "\u0462": "I\uFE20E\uFE21"
-    "\u0463": "i\uFE20e\uFE21"
-    "\u0464": "I\uFE20E\uFE21\u0304"
-    "\u0465": "i\uFE20e\uFE21\u0304"
-    "\u0466": "E\u0328"
-    "\u0467": "e\u0328"
-    "\u0468": "I\uFE20E\uFE21\u0328"
-    "\u0469": "i\uFE20e\uFE21\u0328"
-    "\u046A": "O\u0328"
-    "\u046B": "o\u0328"
-    "\u046C": "I\uFE20O\uFE21\u0328"
-    "\u046D": "i\uFE20o\uFE21\u0328"
-    "\u046E": "K\uFE20S\uFE21"
-    "\u046F": "k\uFE20s\uFE21"
-    "\u0470": "P\uFE20S\uFE21"
-    "\u0471": "p\uFE20s\uFE21"
-    "\u0472": "F\u0307"
-    "\u0473": "f\u0307"
-    "\u0474": "V\u0307"
-    "\u0475": "v\u0307"
-    "\u0476": "V\u0308"
-    "\u0477": "v\u0308"
-    "\u0478": "O\uFE20u\uFE21"
-    "\u0479": "o\uFE20u\uFE21"
-    "\u047A": "O\u0304\u0323"
-    "\u047B": "o\u0304\u0323"
-    "\u047C": "O\u0303"
-    "\u047D": "o\u0303"
-    "\u047E": "O\u0304\uFE20T\uFE21"
-    "\u047F": "o\u0304\uFE20t\uFE21"
-    "\u0480": "Q\u0302"
-    "\u0481": "q\u0302"
-    "\u0482": "(|)"
-    "\u0483": "\u007E"
-    "\u0484": "\u0311"
-    "\u0485": "\u0314"
-    "\u0486": "\u0313"
-    "\u0487": "\u0303"
-    "\u0488": "(^)"
-    "\u0489": "(')"
-    "\u048A": "I\u0306\u0323"
-    "\u048B": "i\u0306\u0323"
-    "\u048C": "E\u0306\u0323"
-    "\u048D": "e\u0306\u0323"
-    "\u048E": "R\u0306"
-    "\u048F": "r\u0306"
-    "\u0490": "G\u0306"
-    "\u0491": "g\u0306"
-    "\u0492": "Gh"
-    "\u0493": "gh"
-    "\u0494": "G\u0327"
-    "\u0495": "g\u0327"
-    "\u0496": "J"
-    "\u0497": "j"
-    "\u0498": "T\uFE20H\uFE21"
-    "\u0499": "t\uFE20h\uFE21"
-    "\u049A": "Q"
-    "\u049B": "q"
-    "\u049C": "G\u0307"
-    "\u049D": "g\u0307"
-    "\u049E": "Q\u0304"
-    "\u049F": "q\u0304"
-    "\u04A0": "Q\u0300"
-    "\u04A1": "q\u0300"
-    "\u04A2": "N\uFE20G\uFE21"
-    "\u04A3": "n\uFE20g\uFE21"
-    "\u04A4": "N\u0301G\u0300"
-    "\u04A5": "n\u0301g\u0300"
-    "\u04A6": "Ph"
-    "\u04A7": "ph"
-    "\u04A8": "W"
-    "\u04A9": "w"
-    "\u04AA": "Th"
-    "\u04AB": "th"
-    "\u04AC": "T\u0327"
-    "\u04AD": "t\u0327"
-    "\u04AE": "U\u0307"
-    "\u04AF": "u\u0307"
-    "\u04B0": "U\u0302"
-    "\u04B1": "u\u0302"
-    "\u04B2": "K\uFE20H\uFE21"
-    "\u04B3": "k\uFE20h\uFE21"
-    "\u04B4": "T\uFE20S\uFE21\u0307"
-    "\u04B5": "t\uFE20s\uFE21\u0307"
-    "\u04B6": "J\u0302"
-    "\u04B7": "j\u0302"
-    "\u04B8": "J\u0304"
-    "\u04B9": "j\u0304"
-    "\u04BA": "H\u0307"
-    "\u04BB": "h\u0307"
-    "\u04BC": "C\u0301h"
-    "\u04BD": "c\u0301h"
-    "\u04BE": "C\u0301h\u0301"
-    "\u04BF": "c\u0301h\u0301"
-    "\u04C0": "H\u0308"
-    "\u04C1": "J\u0306"
-    "\u04C2": "j\u0306"
-    "\u04C3": "Q\u0307"
-    "\u04C4": "q\u0307"
-    "\u04C5": "L\u0326"
-    "\u04C6": "l\u0326"
-    "\u04C7": "N\uFE20\u0327G\uFE21"
-    "\u04C8": "n\uFE20\u0327g\uFE21"
-    "\u04C9": "N\uFE20\u0323G\uFE21"
-    "\u04CA": "n\uFE20\u0323g\uFE21"
-    "\u04CB": "C\u0323h"
-    "\u04CC": "c\u0323h"
-    "\u04CD": "M\u0323"
-    "\u04CE": "m\u0323"
-    "\u04CF": "h\u0308"
-    "\u04D0": "A\u0306\u0323"
-    "\u04D1": "a\u0306\u0323"
-    "\u04D2": "A\u0308"
-    "\u04D3": "a\u0308"
-    "\u04D4": "\u00C6"
-    "\u04D5": "\u00E6"
-    "\u04D6": "E\u0306"
-    "\u04D7": "e\u0306"
-    "\u04D8": "A\u0306"
-    "\u04D9": "a\u0306"
-    "\u04DA": "A\u030B"
-    "\u04DB": "a\u030B"
-    "\u04DC": "J\u0308"
-    "\u04DD": "j\u0308"
-    "\u04DE": "Z\u0308"
-    "\u04DF": "z\u0308"
-    "\u04E0": "D\uFE20Z\uFE21"
-    "\u04E1": "d\uFE20z\uFE21"
-    "\u04E2": "I\u0304\u0323"
-    "\u04E3": "i\u0304\u0323"
-    "\u04E4": "I\u0308\u0323"
-    "\u04E5": "i\u0308\u0323"
-    "\u04E6": "O\u0308"
-    "\u04E7": "o\u0308"
-    "\u04E8": "O\u0307"
-    "\u04E9": "o\u0307"
-    "\u04EA": "O\u0304"
-    "\u04EB": "o\u0304"
-    "\u04EC": "E\u0308\u0323"
-    "\u04ED": "e\u0308\u0323"
-    "\u04EE": "U\u0304"
-    "\u04EF": "u\u0304"
-    "\u04F0": "U\u0308"
-    "\u04F1": "u\u0308"
-    "\u04F2": "U\u030B"
-    "\u04F3": "u\u030B"
-    "\u04F4": "C\u0308h"
-    "\u04F5": "c\u0308h"
-    "\u04F6": "G\u0323"
-    "\u04F7": "g\u0323"
-    "\u04F8": "Y\u0308"
-    "\u04F9": "y\u0308"
-    "\u04FA": "Gh\u0327"
-    "\u04FB": "gh\u0327"
-    "\u04FC": "H\u0327"
-    "\u04FD": "h\u0327"
-    "\u04FE": "H\u0304"
-    "\u04FF": "h\u0304"
-    "\u0500": "D\u0307"
-    "\u0501": "d\u0307"
-    "\u0502": "D\u0301"
-    "\u0503": "d\u0301"
-    "\u0504": "Z\u0301"
-    "\u0505": "z\u0301"
-    "\u0506": "Z\u0327"
-    "\u0507": "z\u0327"
-    "\u0508": "L\u0301"
-    "\u0509": "l\u0301"
-    "\u050A": "N\u0301"
-    "\u050B": "n\u0301"
-    "\u050C": "S\u0301"
-    "\u050D": "s\u0301"
-    "\u050E": "T\u0301"
-    "\u050F": "t\u0301"
-    "\u0510": "Z\u0307"
-    "\u0511": "z\u0307"
-    "\u0512": "L\u0327"
-    "\u0513": "l\u0327"
-    "\u0514": "Lkh\u0307"
-    "\u0515": "lkh\u0307"
-    "\u0516": "Rkh\u0307"
-    "\u0517": "rkh\u0307"
-    "\u0518": "A\u0310"
-    "\u0519": "a\u0310"
-    "\u051A": "Q\u0308"
-    "\u051B": "q\u0308"
-    "\u051C": "W\u0308"
-    "\u051D": "w\u0308"
-    "\u051E": "K\u0300"
-    "\u051F": "k\u0300"
-    "\u0520": "L\u0324"
-    "\u0521": "l\u0324"
-    "\u0522": "N\u0327"
-    "\u0523": "n\u0327"
-    "\u0524": "P\u0323"
-    "\u0525": "p\u0323"
-    "\u0526": "Sh\u0323"
-    "\u0527": "sh\u0323"
-    "\u0528": "N\u0326"
-    "\u0529": "n\u0326"
-    "\u052A": "D\uFE20z\uFE21h"
-    "\u052B": "d\uFE20z\uFE21h"
-    "\u052C": "D\uFE20c\uFE21h"
-    "\u052D": "d\uFE20c\uFE21h"
-    "\u052E": "L\u0323"
-    "\u052F": "l\u0323"
+---
+general:
+  name: Cyrillic (Generic)
+  parents:
+    - _ignore_base
+
+# COMMON COMBINING CHARACTERS (always follow a base letter):
+# combining grave \u0300
+# combining acute \u0301
+# combining circumflex \u0302
+# combining tilde \u0303
+# combining macron \u0304
+# combining breve \u0306
+# combining dot above \u0307
+# combining diaeresis \u0308
+# combining ring above \u030A
+# combining double acute \u030B
+# combining caron (hachek) \u030C
+# combining candrabindu \u0310
+# combining dot below \u0323
+# combining dieresis below \u0324
+# combining comma below \u0326 (Romanian, Latvian, Livonian)
+# combining cedilla \u0327 (French, Turkish, Azeri)
+# combining ogonek (hook) \u0328 (Polish, Lithuanian)
+# combining low line \u0332
+# combining double low line \u0333
+# combining left ligature \uFE20 (Cyrillic transliteration)
+# combining right ligature \uFE21 (Cyrillic transliteration)
+# soft sign/prime (spacing) \u02B9(Cyrillic transliteration)
+# hard sign/double prime (spacing) \u02BA (Cyrillic transliteration)
+# ayn(spacing) \u02BB (Semitic and Caucasian languages)
+# alif (spacing) \u02BC (Semitic languages)
+# middle dot (space) \u00B7) (Catalan)
+
+roman_to_script:
+  map:
+    "A\uFE20E\uFE21": "\u04D4"
+    "A\uFE20e\uFE21": "\u04D4"
+    "a\uFE20e\uFE21": "\u04D5"
+    "A\u0306\u0323": "\u04D0"
+    "a\u0306\u0323": "\u04D1"
+    "\u00C6": "\u04D4"
+    "\u00E6": "\u04D5"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "A\u030B": "\u04DA"
+    "a\u030B": "\u04DB"
+    "A\u0308": "\u04D2"
+    "a\u0308": "\u04D3"
+    "A\u0310": "\u0518"
+    "a\u0310": "\u0519"
+
+    "B": "\u0411"
+    "b": "\u0431"
+
+    "C\u0301h\u0301": "\u04BE"
+    "c\u0301h\u0301": "\u04BF"
+    "C\u0301h": "\u04BC"
+    "c\u0301h": "\u04BD"
+    "C\u0301": "\u040B"
+    "c\u0301": "\u045B"
+    "C\u0308h": "\u04F4"
+    "c\u0308h": "\u04F5"
+    "C\u0323h": "\u04CB"
+    "c\u0323h": "\u04CC"
+
+    "D\u0301": "\u0502"
+    "d\u0301": "\u0503"
+    "D\u0307": "\u0500"
+    "d\u0307": "\u0501"
+    "D\uFE20c\uFE21h": "\u052C"
+    "d\uFE20c\uFE21h": "\u052D"
+    "D\uFE20z\uFE21h": "\u052A"
+    "d\uFE20z\uFE21h": "\u052B"
+    "D\uFE20Z\uFE21": "\u04E0"
+    "d\uFE20z\uFE21": "\u04E1"
+    "Dz\u030C": "\u040F"
+    "dz\u030C": "\u045F"
+    "D": "\u0414"
+    "d": "\u0434"
+
+    "E\u0300": "\u0400"
+    "e\u0300": "\u0450"
+    "E\u0304": "\u0404"
+    "e\u0304": "\u0454"
+    "E\u0306": "\u04D6"
+    "e\u0306": "\u04D7"
+    "E\u0306\u0323": "\u048C"
+    "e\u0306\u0323": "\u048D"
+    "E\u0307": "\u042D"
+    "e\u0307": "\u044D"
+    "E\u0308\u0323": "\u04EC"
+    "e\u0308\u0323": "\u04ED"
+    "E\u0308": "\u0401"
+    "e\u0308": "\u0451"
+    "E\u0328": "\u0466"
+    "e\u0328": "\u0467"
+
+    "F\u0307": "\u0472"
+    "f\u0307": "\u0473"
+    "F": "\u0424"
+    "f": "\u0444"
+
+    "Gh\u0327": "\u04FA"
+    "gh\u0327": "\u04FB"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    "G\u0301": "\u0403"
+    "g\u0301": "\u0453"
+    "G\u0306": "\u0490"
+    "g\u0306": "\u0491"
+    "G\u0307": "\u049C"
+    "g\u0307": "\u049D"
+    "G\u0323": "\u04F6"
+    "g\u0323": "\u04F7"
+    "G\u0327": "\u0494"
+    "g\u0327": "\u0495"
+
+    "H\u0304": "\u04FE"
+    "h\u0304": "\u04FF"
+    "H\u0327": "\u04FC"
+    "h\u0327": "\u04FD"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "H\u0308": "\u04C0"
+    "h\u0308": "\u04CF"
+
+    "I\u0300": "\u040D"
+    "i\u0300": "\u045D"
+    "I\u0304\u0323": "\u04E2"
+    "i\u0304\u0323": "\u04E3"
+    "I\u0304": "\u0406"
+    "i\u0304": "\u0456"
+    "I\u0306\u0323": "\u048A"
+    "i\u0306\u0323": "\u048B"
+    "I\u0306": "\u0419"
+    "i\u0306": "\u0439"
+    "I\u0308\u0323": "\u04E4"
+    "i\u0308\u0323": "\u04E5"
+    "I\u0308": "\u0407"
+    "i\u0308": "\u0457"
+    "I\u0310": "\u0408"
+    "i\u0310": "\u0458"
+
+    "I\uFE20A\uFE21": "\u042F"
+    "i\uFE20a\uFE21": "\u044F"
+    "A": "\u0410"
+    "a": "\u0430"
+
+    "I\uFE20E\uFE21\u0304": "\u0464"
+    "i\uFE20e\uFE21\u0304": "\u0465"
+    "I\uFE20E\uFE21\u0328": "\u0468"
+    "i\uFE20e\uFE21\u0328": "\u0469"
+    "I\uFE20E\uFE21": "\u0462"
+    "i\uFE20e\uFE21": "\u0463"
+    "E": "\u0415"
+    "e": "\u0435"
+
+    "I\uFE20O\uFE21\u0328": "\u046C"
+    "i\uFE20o\uFE21\u0328": "\u046D"
+    "I\uFE20U\uFE21": "\u042E"
+    "i\uFE20u\uFE21": "\u044E"
+    "I": "\u0418"
+    "i": "\u0438"
+
+    "J\u0304": "\u04B8"
+    "j\u0304": "\u04B9"
+    "J\u0306": "\u04C1"
+    "j\u0306": "\u04C2"
+    "J\u0302": "\u04B6"
+    "j\u0302": "\u04B7"
+    "J\u0308": "\u04DC"
+    "j\u0308": "\u04DD"
+
+    "K\u0300": "\u051E"
+    "k\u0300": "\u051F"
+    "K\u0301": "\u040C"
+    "k\u0301": "\u045C"
+    "K\uFE20H\uFE21": "\u04B2"
+    "k\uFE20h\uFE21": "\u04B3"
+    "Kh": "\u0425"
+    "kh": "\u0445"
+    "K\uFE20S\uFE21": "\u046E"
+    "k\uFE20s\uFE21": "\u046F"
+    "K": "\u041A"
+    "k": "\u043A"
+
+    "Lj": "\u0409"
+    "lj": "\u0459"
+    "Lkh\u0307": "\u0514"
+    "lkh\u0307": "\u0515"
+    "L\u0301": "\u0508"
+    "l\u0301": "\u0509"
+    "L\u0321": "\u04C5"
+    "l\u0326": "\u04C6"
+    "L\u0323": "\u052E"
+    "l\u0323": "\u052F"
+    "L\u0327": "\u0512"
+    "l\u0327": "\u0513"
+    "L\u0324": "\u0520"
+    "l\u0324": "\u0521"
+    "L": "\u041B"
+    "l": "\u043B"
+
+    "M\u0323": "\u04CD"
+    "m\u0323": "\u04CE"
+    "M": "\u041C"
+    "m": "\u043C"
+
+    "Nj": "\u040A"
+    "nj": "\u045A"
+    "N\u0301G\u0300": "\u04A4"
+    "n\u0301g\u0300": "\u04A5"
+    "N\u0301": "\u050A"
+    "n\u0301": "\u050B"
+    "N\u0326": "\u0528"
+    "n\u0326": "\u0529"
+    "N\u0327": "\u0522"
+    "n\u0327": "\u0523"
+    "N\uFE20\u0323G\uFE21": "\u04C9"
+    "n\uFE20\u0323g\uFE21": "\u04CA"
+    "N\uFE20\u0327G\uFE21": "\u04C7"
+    "n\uFE20\u0327g\uFE21": "\u04C8"
+    "N\uFE20G\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    "No\u0332": "\u2116"
+    "N": "\u041D"
+    "n": "\u043D"
+
+    "G": "\u0413"
+    "g": "\u0433"
+
+    "J": "\u0496"
+    "j": "\u0497"
+
+    "O\u0303": "\u047C"
+    "o\u0303": "\u047D"
+    "O\u0304\u0323": "\u047A"
+    "o\u0304\u0323": "\u047B"
+    "O\u0304\uFE20T\uFE21": "\u047E"
+    "o\u0304\uFE20t\uFE21": "\u047F"
+    "O\u0304\u0324": "\u0460"
+    "o\u0304\u0324": "\u0461"
+    "O\u0304": "\u04EA"
+    "o\u0304": "\u04EB"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "O\u0308": "\u04E6"
+    "o\u0308": "\u04E7"
+    "O\u0328": "\u046A"
+    "o\u0328": "\u046B"
+    "O\uFE20u\uFE21": "\u0478"
+    "o\uFE20u\uFE21": "\u0479"
+    "O": "\u041E"
+    "o": "\u043E"
+
+    "Ph": "\u04A6"
+    "ph": "\u04A7"
+    "P\u0323": "\u0524"
+    "p\u0323": "\u0525"
+    "P\uFE20S\uFE21": "\u0470"
+    "p\uFE20s\uFE21": "\u0471"
+    "P": "\u041F"
+    "p": "\u043F"
+
+    "Q\u0300": "\u04A0"
+    "q\u0300": "\u04A1"
+    "Q\u0302": "\u0480"
+    "q\u0302": "\u0481"
+    "Q\u0304": "\u049E"
+    "q\u0304": "\u049F"
+    "Q\u0307": "\u04C3"
+    "q\u0307": "\u04C4"
+    "Q\u0308": "\u051A"
+    "q\u0308": "\u051B"
+    "Q": "\u049A"
+    "q": "\u049B"
+
+    "Rkh\u0307": "\u0516"
+    "rkh\u0307": "\u0517"
+    "R\u0306": "\u048E"
+    "r\u0306": "\u048F"
+    "R": "\u0420"
+    "r": "\u0440"
+
+    "Shch": "\u0429"
+    "shch": "\u0449"
+    "Sh\u0323": "\u0526"
+    "sh\u0323": "\u0527"
+    "Sh": "\u0428"
+    "sh": "\u0448"
+    "S\u0301": "\u050C"
+    "s\u0301": "\u050D"
+    "S\u0307": "\u0405"
+    "s\u0307": "\u0455"
+
+    "Ch": "\u0427"
+    "ch": "\u0447"
+    "C": "\u0426"
+    "c": "\u0446"
+
+    "Th": "\u04AA"
+    "th": "\u04AB"
+    "T\u0301": "\u050E"
+    "t\u0301": "\u050F"
+    "T\u0327": "\u04AC"
+    "t\u0327": "\u04AD"
+    "T\uFE20H\uFE21": "\u0498"
+    "t\uFE20h\uFE21": "\u0499"
+    "T\uFE20S\uFE21": "\u0426"
+    "t\uFE20s\uFE21": "\u0446"
+    "T\uFE20S\uFE21\u0307": "\u04B4"
+    "t\uFE20s\uFE21\u0307": "\u04B5"
+
+    "S": "\u0421"
+    "s": "\u0441"
+
+    "T": "\u0422"
+    "t": "\u0442"
+
+    "U\u0302": "\u04B0"
+    "u\u0302": "\u04B1"
+    "U\u0304": "\u04EE"
+    "u\u0304": "\u04EF"
+    "U\u0306": "\u040E"
+    "u\u0306": "\u045E"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "U\u0308": "\u04F0"
+    "u\u0308": "\u04F1"
+    "U\u030B": "\u04F2"
+    "u\u030B": "\u04F3"
+    "U": "\u0423"
+    "u": "\u0443"
+
+    "V\u0307": "\u0474"
+    "v\u0307": "\u0475"
+    "V\u0308": "\u0476"
+    "v\u0308": "\u0477"
+    "V": "\u0412"
+    "v": "\u0432"
+
+    "W\u0308": "\u051C"
+    "w\u0308": "\u051D"
+    "W": "\u04A8"
+    "w": "\u04A9"
+
+    "X": "\u0058"
+    "x": "\u0078"
+
+    "Y\u0307": "\u0474"
+    "y\u0307": "\u0475"
+    "Y\u0308": "\u04F8"
+    "y\u0308": "\u04F9"
+    "Y": "\u042B"
+    "y": "\u044B"
+
+    "Zh": "\u0416"
+    "zh": "\u0436"
+    "Z\u0301": "\u0504"
+    "z\u0301": "\u0505"
+    "Z\u0307": "\u0510"
+    "z\u0307": "\u0511"
+    "Z\u0308": "\u04DE"
+    "z\u0308": "\u04DF"
+    "Z\u0327": "\u0506"
+    "z\u0327": "\u0507"
+    "Z": "\u0417"
+    "z": "\u0437"
+
+    "H": "\u0413"
+    "h": "\u0433"
+
+    "\u0110": "\u0402"
+    "\u0111": "\u0452"
+    "\u02B9\u0333": "\u042C"
+    "\u02B9": "\u044C"
+    "\u02BA\u0333": "\u042A"
+    "\u02BA": "\u044A"
+    "\u0303": "\u0487"
+    "\u0311": "\u0484"
+    "\u0313": "\u0486"
+    "\u0314": "\u0485"
+    "\u007E": "\u0483"
+    "(|)": "\u0482"
+    "(^)": "\u0488"
+    "(')": "\u0489"
+
+    # Two Less-than signs mapped to Left-pointing double angle quotation mark
+    "\u003C\u003C": "\u00AB"
+    # Two Greater-than signs mapped to Right-pointing double angle quotation mark
+    "\u003E\u003E": "\u00BB"
+
+script_to_roman:
+  map:
+
+    # Left-pointing double angle quotation mark mapped to Two Less-than signs
+    "\u00AB": "\u003C\u003C"
+    # Right-pointing double angle quotation mark mapped to Two Greater-than signs
+    "\u00BB": "\u003E\u003E"
+    "\u2116": "No\u0332"
+    "\u0400": "E\u0300"
+    "\u0401": "E\u0308"
+    "\u0402": "\u0110"
+    "\u0403": "G\u0301"
+    "\u0404": "E\u0304"
+    "\u0405": "S\u0307"
+    "\u0406": "I\u0304"
+    "\u0407": "I\u0308"
+    "\u0408": "I\u0310"
+    "\u0409": "Lj"
+    "\u040A": "Nj"
+    "\u040B": "C\u0301"
+    "\u040C": "K\u0301"
+    "\u040D": "I\u0300"
+    "\u040E": "U\u0306"
+    "\u040F": "Dz\u030C"
+    "\u0410": "A"
+    "\u0411": "B"
+    "\u0412": "V"
+    "\u0413": "G"
+    "\u0414": "D"
+    "\u0415": "E"
+    "\u0416": "Zh"
+    "\u0417": "Z"
+    "\u0418": "I"
+    "\u0419": "I\u0306"
+    "\u041A": "K"
+    "\u041B": "L"
+    "\u041C": "M"
+    "\u041D": "N"
+    "\u041E": "O"
+    "\u041F": "P"
+    "\u0420": "R"
+    "\u0421": "S"
+    "\u0422": "T"
+    "\u0423": "U"
+    "\u0424": "F"
+    "\u0425": "Kh"
+    "\u0426": "T\uFE20S\uFE21"
+    "\u0427": "Ch"
+    "\u0428": "Sh"
+    "\u0429": "Shch"
+    "\u042A": "\u02BA\u0333"
+    "\u042B": "Y"
+    "\u042C": "\u02B9\u0333"
+    "\u042D": "E\u0307"
+    "\u042E": "I\uFE20U\uFE21"
+    "\u042F": "I\uFE20A\uFE21"
+    "\u0430": "a"
+    "\u0431": "b"
+    "\u0432": "v"
+    "\u0433": "g"
+    "\u0434": "d"
+    "\u0435": "e"
+    "\u0436": "zh"
+    "\u0437": "z"
+    "\u0438": "i"
+    "\u0439": "i\u0306"
+    "\u043A": "k"
+    "\u043B": "l"
+    "\u043C": "m"
+    "\u043D": "n"
+    "\u043E": "o"
+    "\u043F": "p"
+    "\u0440": "r"
+    "\u0441": "s"
+    "\u0442": "t"
+    "\u0443": "u"
+    "\u0444": "f"
+    "\u0445": "kh"
+    "\u0446": "t\uFE20s\uFE21"
+    "\u0447": "ch"
+    "\u0448": "sh"
+    "\u0449": "shch"
+    "\u044A": "\u02BA"
+    "\u044B": "y"
+    "\u044C": "\u02B9"
+    "\u044D": "e\u0307"
+    "\u044E": "i\uFE20u\uFE21"
+    "\u044F": "i\uFE20a\uFE21"
+    "\u0450": "e\u0300"
+    "\u0451": "e\u0308"
+    "\u0452": "\u0111"
+    "\u0453": "g\u0301"
+    "\u0454": "e\u0304"
+    "\u0455": "s\u0307"
+    "\u0456": "i\u0304"
+    "\u0457": "i\u0308"
+    "\u0458": "i\u0310"
+    "\u0459": "lj"
+    "\u045A": "nj"
+    "\u045B": "c\u0301"
+    "\u045C": "k\u0301"
+    "\u045D": "i\u0300"
+    "\u045E": "u\u0306"
+    "\u045F": "dz\u030C"
+    "\u0460": "O\u0304\u0324"
+    "\u0461": "o\u0304\u0324"
+    "\u0462": "I\uFE20E\uFE21"
+    "\u0463": "i\uFE20e\uFE21"
+    "\u0464": "I\uFE20E\uFE21\u0304"
+    "\u0465": "i\uFE20e\uFE21\u0304"
+    "\u0466": "E\u0328"
+    "\u0467": "e\u0328"
+    "\u0468": "I\uFE20E\uFE21\u0328"
+    "\u0469": "i\uFE20e\uFE21\u0328"
+    "\u046A": "O\u0328"
+    "\u046B": "o\u0328"
+    "\u046C": "I\uFE20O\uFE21\u0328"
+    "\u046D": "i\uFE20o\uFE21\u0328"
+    "\u046E": "K\uFE20S\uFE21"
+    "\u046F": "k\uFE20s\uFE21"
+    "\u0470": "P\uFE20S\uFE21"
+    "\u0471": "p\uFE20s\uFE21"
+    "\u0472": "F\u0307"
+    "\u0473": "f\u0307"
+    "\u0474": "V\u0307"
+    "\u0475": "v\u0307"
+    "\u0476": "V\u0308"
+    "\u0477": "v\u0308"
+    "\u0478": "O\uFE20u\uFE21"
+    "\u0479": "o\uFE20u\uFE21"
+    "\u047A": "O\u0304\u0323"
+    "\u047B": "o\u0304\u0323"
+    "\u047C": "O\u0303"
+    "\u047D": "o\u0303"
+    "\u047E": "O\u0304\uFE20T\uFE21"
+    "\u047F": "o\u0304\uFE20t\uFE21"
+    "\u0480": "Q\u0302"
+    "\u0481": "q\u0302"
+    "\u0482": "(|)"
+    "\u0483": "\u007E"
+    "\u0484": "\u0311"
+    "\u0485": "\u0314"
+    "\u0486": "\u0313"
+    "\u0487": "\u0303"
+    "\u0488": "(^)"
+    "\u0489": "(')"
+    "\u048A": "I\u0306\u0323"
+    "\u048B": "i\u0306\u0323"
+    "\u048C": "E\u0306\u0323"
+    "\u048D": "e\u0306\u0323"
+    "\u048E": "R\u0306"
+    "\u048F": "r\u0306"
+    "\u0490": "G\u0306"
+    "\u0491": "g\u0306"
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u0494": "G\u0327"
+    "\u0495": "g\u0327"
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u0498": "T\uFE20H\uFE21"
+    "\u0499": "t\uFE20h\uFE21"
+    "\u049A": "Q"
+    "\u049B": "q"
+    "\u049C": "G\u0307"
+    "\u049D": "g\u0307"
+    "\u049E": "Q\u0304"
+    "\u049F": "q\u0304"
+    "\u04A0": "Q\u0300"
+    "\u04A1": "q\u0300"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04A4": "N\u0301G\u0300"
+    "\u04A5": "n\u0301g\u0300"
+    "\u04A6": "Ph"
+    "\u04A7": "ph"
+    "\u04A8": "W"
+    "\u04A9": "w"
+    "\u04AA": "Th"
+    "\u04AB": "th"
+    "\u04AC": "T\u0327"
+    "\u04AD": "t\u0327"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04B0": "U\u0302"
+    "\u04B1": "u\u0302"
+    "\u04B2": "K\uFE20H\uFE21"
+    "\u04B3": "k\uFE20h\uFE21"
+    "\u04B4": "T\uFE20S\uFE21\u0307"
+    "\u04B5": "t\uFE20s\uFE21\u0307"
+    "\u04B6": "J\u0302"
+    "\u04B7": "j\u0302"
+    "\u04B8": "J\u0304"
+    "\u04B9": "j\u0304"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04BC": "C\u0301h"
+    "\u04BD": "c\u0301h"
+    "\u04BE": "C\u0301h\u0301"
+    "\u04BF": "c\u0301h\u0301"
+    "\u04C0": "H\u0308"
+    "\u04C1": "J\u0306"
+    "\u04C2": "j\u0306"
+    "\u04C3": "Q\u0307"
+    "\u04C4": "q\u0307"
+    "\u04C5": "L\u0326"
+    "\u04C6": "l\u0326"
+    "\u04C7": "N\uFE20\u0327G\uFE21"
+    "\u04C8": "n\uFE20\u0327g\uFE21"
+    "\u04C9": "N\uFE20\u0323G\uFE21"
+    "\u04CA": "n\uFE20\u0323g\uFE21"
+    "\u04CB": "C\u0323h"
+    "\u04CC": "c\u0323h"
+    "\u04CD": "M\u0323"
+    "\u04CE": "m\u0323"
+    "\u04CF": "h\u0308"
+    "\u04D0": "A\u0306\u0323"
+    "\u04D1": "a\u0306\u0323"
+    "\u04D2": "A\u0308"
+    "\u04D3": "a\u0308"
+    "\u04D4": "\u00C6"
+    "\u04D5": "\u00E6"
+    "\u04D6": "E\u0306"
+    "\u04D7": "e\u0306"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u04DA": "A\u030B"
+    "\u04DB": "a\u030B"
+    "\u04DC": "J\u0308"
+    "\u04DD": "j\u0308"
+    "\u04DE": "Z\u0308"
+    "\u04DF": "z\u0308"
+    "\u04E0": "D\uFE20Z\uFE21"
+    "\u04E1": "d\uFE20z\uFE21"
+    "\u04E2": "I\u0304\u0323"
+    "\u04E3": "i\u0304\u0323"
+    "\u04E4": "I\u0308\u0323"
+    "\u04E5": "i\u0308\u0323"
+    "\u04E6": "O\u0308"
+    "\u04E7": "o\u0308"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04EA": "O\u0304"
+    "\u04EB": "o\u0304"
+    "\u04EC": "E\u0308\u0323"
+    "\u04ED": "e\u0308\u0323"
+    "\u04EE": "U\u0304"
+    "\u04EF": "u\u0304"
+    "\u04F0": "U\u0308"
+    "\u04F1": "u\u0308"
+    "\u04F2": "U\u030B"
+    "\u04F3": "u\u030B"
+    "\u04F4": "C\u0308h"
+    "\u04F5": "c\u0308h"
+    "\u04F6": "G\u0323"
+    "\u04F7": "g\u0323"
+    "\u04F8": "Y\u0308"
+    "\u04F9": "y\u0308"
+    "\u04FA": "Gh\u0327"
+    "\u04FB": "gh\u0327"
+    "\u04FC": "H\u0327"
+    "\u04FD": "h\u0327"
+    "\u04FE": "H\u0304"
+    "\u04FF": "h\u0304"
+    "\u0500": "D\u0307"
+    "\u0501": "d\u0307"
+    "\u0502": "D\u0301"
+    "\u0503": "d\u0301"
+    "\u0504": "Z\u0301"
+    "\u0505": "z\u0301"
+    "\u0506": "Z\u0327"
+    "\u0507": "z\u0327"
+    "\u0508": "L\u0301"
+    "\u0509": "l\u0301"
+    "\u050A": "N\u0301"
+    "\u050B": "n\u0301"
+    "\u050C": "S\u0301"
+    "\u050D": "s\u0301"
+    "\u050E": "T\u0301"
+    "\u050F": "t\u0301"
+    "\u0510": "Z\u0307"
+    "\u0511": "z\u0307"
+    "\u0512": "L\u0327"
+    "\u0513": "l\u0327"
+    "\u0514": "Lkh\u0307"
+    "\u0515": "lkh\u0307"
+    "\u0516": "Rkh\u0307"
+    "\u0517": "rkh\u0307"
+    "\u0518": "A\u0310"
+    "\u0519": "a\u0310"
+    "\u051A": "Q\u0308"
+    "\u051B": "q\u0308"
+    "\u051C": "W\u0308"
+    "\u051D": "w\u0308"
+    "\u051E": "K\u0300"
+    "\u051F": "k\u0300"
+    "\u0520": "L\u0324"
+    "\u0521": "l\u0324"
+    "\u0522": "N\u0327"
+    "\u0523": "n\u0327"
+    "\u0524": "P\u0323"
+    "\u0525": "p\u0323"
+    "\u0526": "Sh\u0323"
+    "\u0527": "sh\u0323"
+    "\u0528": "N\u0326"
+    "\u0529": "n\u0326"
+    "\u052A": "D\uFE20z\uFE21h"
+    "\u052B": "d\uFE20z\uFE21h"
+    "\u052C": "D\uFE20c\uFE21h"
+    "\u052D": "d\uFE20c\uFE21h"
+    "\u052E": "L\u0323"
+    "\u052F": "l\u0323"

+ 1 - 1
scriptshifter/tables/data/dungan_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Dungan (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/even-evenki_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Even/Evenki (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/gagauz_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Gagauz (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 2 - 0
scriptshifter/tables/data/greek_modern.yml

@@ -16,6 +16,8 @@ roman_to_script:
     "ha": "\u03B1"
     "He": "\u0395"
     "he": "\u03B5"
+    "H\u0113": "\u0397"
+    "h\u0113": "\u03B7"
     "Hi": "\u0399"
     "hi": "\u03B9"
     "Ho": "\u039F"

+ 1 - 1
scriptshifter/tables/data/kalmyk_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kalmyk (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/kara-kalpak_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kara-Kalpak (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/karachay-balkar_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Karachay-Balkar (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/karelian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Karelian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/kazakh_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kazakh (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/khakass_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Khakass (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/khanty_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Khanty (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/komi_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Komi (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/koryak_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Koryak (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/kyrgyz_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Kyrgyz (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/lithuanian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Lithuanian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 62 - 16
scriptshifter/tables/data/macedonian.yml

@@ -1,50 +1,99 @@
 general:
   name: Macedonian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:
     "G\u0301": "\u0403"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F4": "\u0403"
     "G": "\u0413"
     "g\u0301": "\u0453"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F5": "\u0453"
     "g": "\u0433"
     "\u0110": "\u0402"
-    # this conversion shouldn't be needed, but does no harm
-    "DZ\u030C": "\u040F"
-    # this conversion shouldn't be needed, but does no harm
-    "DZ": "\u0405"
-    "Dz\u030C": "\u040F"
-    "Dz": "\u0405"
+    "D\uFE20Z\u030C\uFE21": "\040F"
+    "D\uFE20z\u030C\uFE21": "\040F"
+    "d\uFE20Z\u030C\uFE21": "\040F"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C4": "\u040F"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C5": "\u040F"
+    "d\uFE20z\u030C\uFE21": "\045F"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C6": "\u045F"
+    "D\uFE20Z\uFE21": "\u0405"
+    "D\uFE20z\uFE21": "\u0405"
+    "d\uFE20Z\uFE21": "\u0405"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F1": "\u0405"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F2": "\u0405"
+    "d\uFE20z\uFE21": "\u0455"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01F3": "\u0455"
     "\u0111": "\u0452"
     "dz\u030C": "\u045F"
     "dz": "\u0455"
     "Z\u030C": "\u0416"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u017D": "\u0416"
     "z\u030C": "\u0436"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u017E": "\u0436"
     "z": "\u0437"
     "I": "\u0418"
     "i": "\u0438"
     "J": "\u0408"
     "j": "\u0458"
     "K\u0301": "\u040C"
-    "H": "\u0425"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u1E30": "\u040C"
     "k\u0301": "\u045C"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u1E31": "\u045C"
+    "H": "\u0425"
     "h": "\u0445"
-    # this conversion shouldn't be needed, but does no harm
     "LJ": "\u0409"
     "Lj": "\u0409"
+    "lJ": "\u0409"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C7": "\u0409"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C8": "\u0409"
     "lj": "\u0459"
-    # this conversion shouldn't be needed, but does no harm
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01C9": "\u0459"
     "NJ": "\u040A"
     "Nj": "\u040A"
+    "nJ": "\u040A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01CA": "\u040A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01CB": "\u040A"
     "nj": "\u045A"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u01CC": "\u045A"
     "S\u030C": "\u0428"
+    "\u0160": "\u0428"
     "s\u030C": "\u0448"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u0161": "\u0448"
     "C\u0301": "\u040B"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u0106": "\u040B"
     "C\u030C": "\u0427"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u010C": "\u0427"
     "C": "\u0426"
     "c\u0301": "\u045B"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u0107": "\u045B"
     "c\u030C": "\u0447"
+    # Mapping from precomposed non-MARC-8 Latin equivalent
+    "\u010D": "\u0447"
     "c": "\u0446"
 
 script_to_roman:
@@ -57,8 +106,8 @@ script_to_roman:
     "\u0452": "\u0111"
     "\u0416": "Z\u030C"
     "\u0436": "z\u030C"
-    "\u0405": "Dz"
-    "\u0455": "dz"
+    "\u0405": "D\uFE20Z\uFE21"
+    "\u0455": "d\uFE20z\uFE21"
     "\u0418": "I"
     "\u0438": "i"
     "\u0408": "J"
@@ -79,9 +128,6 @@ script_to_roman:
     "\u0446": "c"
     "\u0427": "C\u030C"
     "\u0447": "c\u030C"
-    "\u040F": "Dz\u030C"
-    "\u045F": "dz\u030C"
-    "\u1029": "D\uFE20Z\uFE21"
-    "\u0455": "d\uFE20z\uFE21"
     "\u040F": "D\uFE20Z\u030C\uFE21"
     "\u045F": "d\uFE20z\u030C\uFE21"
+

+ 1 - 1
scriptshifter/tables/data/mansi_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mansi (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/mari_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mari (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/moldovan_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Moldovan (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/mongolian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mongolian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/mordvin_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Mordvin (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/nenets_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Nenets (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/ossetic_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Ossetic (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/romani_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Romani (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/russian.yml

@@ -1,7 +1,7 @@
 general:
   name: Russian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/serbian.yml

@@ -1,7 +1,7 @@
 general:
   name: Serbian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/shor_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Shor (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/syriac_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Syriac (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tajik_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tajik (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tatar-kryashen_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tatar-Kryashen (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tatar_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tatar (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/turkmen_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Turkmen (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/tuvinian_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Tuvinian (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/udmurt_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Udmurt (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/uighur_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Uighur (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/ukrainian.yml

@@ -1,7 +1,7 @@
 general:
   name: Ukrainian
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/uzbek_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Uzbek (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/yakut_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Yakut (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 1 - 1
scriptshifter/tables/data/yuit_cyrillic.yml

@@ -1,7 +1,7 @@
 general:
   name: Yuit (Cyrillic)
   parents:
-    - _cyrillic_base
+    - cyrillic_generic
 
 roman_to_script:
   map:

+ 16 - 14
scriptshifter/tables/index.yml

@@ -23,20 +23,20 @@ arabic:
 armenian:
   marc_code: arm
   name: Armenian
-asian_cyrillic:
-  description: >
-    Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza,
-    Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat,
-    Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz,
-    Ingush, Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak,
-    Karelian, Khakass, Khanty, Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp,
-    Lezghian, Lithuanian, Mansi, Mari, Moldovan, Molodstov, Mordvin, Nanai,
-    Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, Selkup, Shor,
-    Tabasaran, Tat, Tuva, Udekhe, Udmurt, Yakut.
-  marc_code: >
-    abk, ady, alt, ava, bak, che, chv, dar, ale, esk, kbd, xal, krc, kaa,
-    krl, kom, kum, lez, lit, chm, nog, oss, rum, rom, sel, udm, sah
-  name: Asian Cyrillic
+# asian_cyrillic:
+#   description: >
+#     Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza,
+#     Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat,
+#     Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz,
+#     Ingush, Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak,
+#     Karelian, Khakass, Khanty, Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp,
+#     Lezghian, Lithuanian, Mansi, Mari, Moldovan, Molodstov, Mordvin, Nanai,
+#     Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, Selkup, Shor,
+#     Tabasaran, Tat, Tuva, Udekhe, Udmurt, Yakut.
+#   marc_code: >
+#     abk, ady, alt, ava, bak, che, chv, dar, ale, esk, kbd, xal, krc, kaa,
+#     krl, kom, kum, lez, lit, chm, nog, oss, rum, rom, sel, udm, sah
+#   name: Asian Cyrillic
 assamese:
   name: assamese
 azerbaijani_cyrillic:
@@ -71,6 +71,8 @@ church_slavonic:
 chuvash_cyrillic:
   marc_code: chv
   name: Chuvash (Cyrillic)
+cyrillic_generic:
+  name: Cyrillic (generic)
 devanagari:
   marc_code: hin, san
   name: Devanagari