|
@@ -0,0 +1,452 @@
|
|
|
|
+general:
|
|
|
|
+ name: Asian Cyrillic
|
|
|
|
+ inherits: _cyrillic_base
|
|
|
|
+
|
|
|
|
+roman_to_script:
|
|
|
|
+ map:
|
|
|
|
+ # COMMON COMBINING CHARACTERS (always follow a base letter):
|
|
|
|
+ # combining grave U+0300
|
|
|
|
+ # combining acute U+0301
|
|
|
|
+ # combining circumflex U+0302
|
|
|
|
+ # combining macron U+0304
|
|
|
|
+ # combining breve U+0306
|
|
|
|
+ # combining dot above U+0307
|
|
|
|
+ # combining diaeresis U+0308
|
|
|
|
+ # combining ring above U+030A
|
|
|
|
+ # combining double acute U+030B
|
|
|
|
+ # combining caron (hachek) U+030C
|
|
|
|
+ # combining candrabindu U+0310
|
|
|
|
+ # combining dot below U+0323
|
|
|
|
+ # combining comma below U+0326 (Romanian, Latvian, Livonian)
|
|
|
|
+ # combining cedilla U+0327 (French, Turkish, Azeri)
|
|
|
|
+ # combining ogonek (hook) U+0328 (Polish, Lithuanian)
|
|
|
|
+ # combining left ligature U+FE20 (Cyrillic transliteration)
|
|
|
|
+ # combining right ligature U+FE21 (Cyrillic transliteration)
|
|
|
|
+ # soft sign/prime (spacing) U+02B9(Cyrillic transliteration)
|
|
|
|
+ # hard sign/double prime (spacing) U+02BA (Cyrillic transliteration)
|
|
|
|
+ # ayn(spacing) U+02BB (Semitic and Caucasian languages)
|
|
|
|
+ # alif (spacing) U+02BC (Semitic languages)
|
|
|
|
+ # middle dot (space) U+00B7) (Catalan)
|
|
|
|
+
|
|
|
|
+ # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF "I/i" LIGATED TO "A/a" (all capitalization patterns)
|
|
|
|
+ "I\uFE20A\uFE21": "\u042F"
|
|
|
|
+ "I\uFE20a\uFE21": "\u042F"
|
|
|
|
+ "i\uFE20a\uFE21": "\u044F"
|
|
|
|
+ "i\uFE20A\uFE21": "\u044F"
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF "A/a" WITH BREVE (0306)
|
|
|
|
+ "A\u0306": "\u04D8"
|
|
|
|
+ "a\u0306": "\u04D9"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"A\u0306": "\u04D2"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"a\u0306": "\u04D3"
|
|
|
|
+
|
|
|
|
+ # REMAINING LONE "A/a"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ "V\u0307": "\u0474"
|
|
|
|
+ "v\u0307": "\u0475"
|
|
|
|
+
|
|
|
|
+ "Gh": "\u0492"
|
|
|
|
+ "GH": "\u0492"
|
|
|
|
+ "gH": "U=0493"
|
|
|
|
+ "gh": "U=0493"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"Gh": "\u0494"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"GH": "\u0494"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"gH": "\u0495"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"gh": "\u0495"
|
|
|
|
+
|
|
|
|
+ "G\u0301": "\u0494"
|
|
|
|
+ "g\u0301": "\u0495"
|
|
|
|
+ "G\u0307": "\u049C"
|
|
|
|
+ "g\u0307": "\u049D"
|
|
|
|
+ "G": "\u0413"
|
|
|
|
+ "g": "\u0433"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
|
|
|
|
+ "I\uFE20E\uFE21\u0304": "\u0464"
|
|
|
|
+ "I\uFE20E\u0304\uFE21": "\u0464"
|
|
|
|
+ "I\uFE20e\uFE21\u0304": "\u0464"
|
|
|
|
+ "I\uFE20e\u0304\uFE21": "\u0464"
|
|
|
|
+ "I\uFE20E\uFE21\u0328": "\u0468"
|
|
|
|
+ "I\uFE20E\u0328\uFE21": "\u0468"
|
|
|
|
+ "I\uFE20e\uFE21\u0328": "\u0468"
|
|
|
|
+ "I\uFE20e\u0328\uFE21": "\u0468"
|
|
|
|
+ "i\uFE20e\uFE21\u0304": "\u0465"
|
|
|
|
+ "i\uFE20e\u0304\uFE21": "\u0465"
|
|
|
|
+ "i\uFE20E\uFE21\u0304": "\u0465"
|
|
|
|
+ "i\uFE20E\u0304\uFE21": "\u0465"
|
|
|
|
+ "i\uFE20e\uFE21\u0328": "\u0469"
|
|
|
|
+ "i\uFE20e\u0328\uFE21": "\u0469"
|
|
|
|
+ "i\uFE20E\uFE21\u0328": "\u0469"
|
|
|
|
+ "i\uFE20E\u0328\uFE21": "\u0469"
|
|
|
|
+ "I\uFE20E\uFE21": "\u0462"
|
|
|
|
+ "I\uFE20e\uFE21": "\u0462"
|
|
|
|
+ "i\uFE20e\uFE21": "\u0463"
|
|
|
|
+ "i\uFE20E\uFE21": "\u0463"
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
|
|
|
|
+ "E\u030C": "\u0462"
|
|
|
|
+ "E\u0304": "\u0404"
|
|
|
|
+ "E\u0307": "\u042D"
|
|
|
|
+ "E\u0308": "\u0401"
|
|
|
|
+ "E\u0328": "\u0466"
|
|
|
|
+ "e\u030C": "\u0463"
|
|
|
|
+ "e\u0304": "\u0454"
|
|
|
|
+ "e\u0307": "\u044D"
|
|
|
|
+ "e\u0308": "\u0451"
|
|
|
|
+ "e\u0328": "\u0467"
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF REMAINING LONE "E/e"
|
|
|
|
+
|
|
|
|
+ "ZH": "\u0416"
|
|
|
|
+ "Zh": "\u0416"
|
|
|
|
+ "zH": "\u0436"
|
|
|
|
+ "zh": "\u0436"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns)
|
|
|
|
+ "T\uFE20H\uFE21": "\u0498"
|
|
|
|
+ "T\uFE20h\uFE21": "\u0498"
|
|
|
|
+ "t\uFE20H\uFE21": "\u0499"
|
|
|
|
+ "t\uFE20h\uFE21": "\u0499"
|
|
|
|
+ "Th": "\u04AA"
|
|
|
|
+ "TH": "\u04AA"
|
|
|
|
+ "tH": "\u04AB"
|
|
|
|
+ "th": "\u04AB"
|
|
|
|
+
|
|
|
|
+ # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
|
|
|
|
+ "I\uFE20O\uFE21\u0328": "\u046C"
|
|
|
|
+ "I\uFE20O\u0328\uFE21": "\u046C"
|
|
|
|
+ "I\uFE20o\uFE21\u0328": "\u046C"
|
|
|
|
+ "I\uFE20o\u0328\uFE21": "\u046C"
|
|
|
|
+ "i\uFE20o\uFE21\u0328": "\u046D"
|
|
|
|
+ "i\uFE20o\u0328\uFE21": "\u046D"
|
|
|
|
+ "i\uFE20O\uFE21\u0328": "\u046D"
|
|
|
|
+ "i\uFE20O\u0328\uFE21": "\u046D"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # CONVERION OF "I/i" LIGATED TO "U/u"
|
|
|
|
+ "I\uFE20U\uFE21": "\u042E"
|
|
|
|
+ "I\uFE20u\uFE21": "\u042E"
|
|
|
|
+ "i\uFE20u\uFE21": "\u044E"
|
|
|
|
+ "i\uFE20U\uFE21": "\u044E"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310)
|
|
|
|
+ "I\u0304": "\u0406"
|
|
|
|
+ "I\u0306": "\u0419"
|
|
|
|
+ "I\u0310": "\u0408"
|
|
|
|
+ "i\u0304": "\u0456"
|
|
|
|
+ "i\u0306": "\u0439"
|
|
|
|
+ "i\u0310": "\u0458"
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF REMAINING LONE "I/i"
|
|
|
|
+ "I": "\u0418"
|
|
|
|
+ "i": "\u0438"
|
|
|
|
+
|
|
|
|
+ "J": "\u0496"
|
|
|
|
+ "j": "\u0497"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"J": "\u04B8"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"J": "\u04B9"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"J": "\u04B6"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"J": "\u04B7"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ "K\uFE20S\uFE21": "\u046E"
|
|
|
|
+ "K\uFE20s\uFE21": "\u046E"
|
|
|
|
+ "k\uFE20s\uFE21": "\u046F"
|
|
|
|
+ "k\uFE20S\uFE21": "\u046F"
|
|
|
|
+ "Q": "\u04A0"
|
|
|
|
+ "q": "\u04A1"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"Q": "\u04C3"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"q": "\u04C4"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ "N\uFE20G\uFE21": "\u04A2"
|
|
|
|
+ "N\uFE20g\uFE21": "\u04A2"
|
|
|
|
+ "n\uFE20G\uFE21": "\u04A3"
|
|
|
|
+ "n\uFE20g\uFE21": "\u04A3"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"N\uFE20G\uFE21": "\u04A4"
|
|
|
|
+ #"N\uFE20g\uFE21": "\u04A4"
|
|
|
|
+ #"n\uFE20G\uFE21": "\u04A5"
|
|
|
|
+ #"n\uFE20g\uFE21": "\u04A5"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"N\uFE20G\uFE21": "\u04C7"
|
|
|
|
+ #"N\uFE20g\uFE21": "\u04C7"
|
|
|
|
+ #"n\uFE20G\uFE21": "\u04C8"
|
|
|
|
+ #"n\uFE20g\uFE21": "\u04C8"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
|
|
|
|
+ "O\u0304\uFE20T\uFE21": "\u047E"
|
|
|
|
+ "O\u0304\uFE20t\uFE21": "\u047E"
|
|
|
|
+ "O\uFE20\u0304T\uFE21": "\u047E"
|
|
|
|
+ "O\uFE20\u0304t\uFE21": "\u047E"
|
|
|
|
+ "O\uFE20T\uFE21": "\u047E"
|
|
|
|
+ "O\uFE20t\uFE21": "\u047E"
|
|
|
|
+ "o\u0304\uFE20t\uFE21": "\u047F"
|
|
|
|
+ "o\u0304\uFE20T\uFE21": "\u047F"
|
|
|
|
+ "o\uFE20\u0304t\uFE21": "\u047F"
|
|
|
|
+ "o\uFE20\u0304T\uFE21": "\u047F"
|
|
|
|
+ "o\uFE20t\uFE21": "\u047F"
|
|
|
|
+ "o\uFE20T\uFE21": "\u047F"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF "O/o" WITH MACRON(0304)
|
|
|
|
+ "O\u0304": "\u04EA"
|
|
|
|
+ "o\u0304": "\u04EB"
|
|
|
|
+ # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES
|
|
|
|
+ "O\u0307": "\u04E8"
|
|
|
|
+ "o\u0307": "\u04E9"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"O\u0307": "\u04E6"
|
|
|
|
+ #"o\u0307": "\u04E7"
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF REMAINING LONE "O/o"
|
|
|
|
+
|
|
|
|
+ "P\uFE20S\uFE21": "\u0470"
|
|
|
|
+ "P\uFE20s\uFE21": "\u0470"
|
|
|
|
+ "p\uFE20s\uFE21": "\u0471"
|
|
|
|
+ "p\uFE20S\uFE21": "\u0471"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ "SHCH": "\u0429"
|
|
|
|
+ "SHCh": "\u0429"
|
|
|
|
+ "SHch": "\u0429"
|
|
|
|
+ "Shch": "\u0429"
|
|
|
|
+ "sHCH": "\u0449"
|
|
|
|
+ "shCH": "\u0449"
|
|
|
|
+ "shcH": "\u0449"
|
|
|
|
+ "shch": "\u0449"
|
|
|
|
+
|
|
|
|
+ "sH": "\u0448"
|
|
|
|
+
|
|
|
|
+ "T\uFE20S\uFE21\u0307": "\u04B4"
|
|
|
|
+ "T\uFE20S\u0307\uFE21": "\u04B4"
|
|
|
|
+ "T\uFE20s\uFE21\u0307": "\u04B4"
|
|
|
|
+ "T\uFE20s\u0307\uFE21": "\u04B4"
|
|
|
|
+ "t\uFE20S\uFE21\u0307": "\u04B5"
|
|
|
|
+ "t\uFE20S\u0307\uFE21": "\u04B5"
|
|
|
|
+ "t\uFE20s\uFE21\u0307": "\u04B5"
|
|
|
|
+ "t\uFE20s\u0307\uFE21": "\u04B5"
|
|
|
|
+
|
|
|
|
+ "T\uFE20S\uFE21": "\u0426"
|
|
|
|
+ "T\uFE20s\uFE21": "\u0426"
|
|
|
|
+ "t\uFE20s\uFE21": "\u0446"
|
|
|
|
+ "t\uFE20S\uFE21": "\u0446"
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307)
|
|
|
|
+ "U\u0304": "\u04B0"
|
|
|
|
+ "u\u0304": "\u04B1"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"U\u0304": "\u04EE"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"U\u0304": "\u04EF"
|
|
|
|
+
|
|
|
|
+ "U\u0306": "\u040E"
|
|
|
|
+ "u\u0306": "\u0454"
|
|
|
|
+ "U\u0307": "\u04AE"
|
|
|
|
+ "u\u0307": "\u04AF"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"U\u0307": "\u04E6"
|
|
|
|
+ #"u\u0307": "\u04E7"
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE
|
|
|
|
+ "W": "\u040E"
|
|
|
|
+ "w": "\u0454"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ "F\u0307": "\u0472"
|
|
|
|
+ "f\u0307": "\u0473"
|
|
|
|
+
|
|
|
|
+ "cH": "\u0447"
|
|
|
|
+
|
|
|
|
+ # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY)
|
|
|
|
+ "H\u0307": "\u04BA"
|
|
|
|
+ "h\u0307": "\u04BB"
|
|
|
|
+
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"H\u0307": "\u04B2"
|
|
|
|
+ #"h\u0307": "\u04B3"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ #"H\u0307": "\u04FC"
|
|
|
|
+ #"h\u0307": "\u04FD"
|
|
|
|
+
|
|
|
|
+ "Y\u0307": "\u04F8"
|
|
|
|
+ "y\u0307": "\u04F9"
|
|
|
|
+
|
|
|
|
+ "Y": "\u042B"
|
|
|
|
+ "y": "\u044B"
|
|
|
|
+
|
|
|
|
+ "\u0027": "\u044C"
|
|
|
|
+ # this conversion is ambiguous - \u044C is also theoretically possible
|
|
|
|
+ "\u02BA": "\u044A"
|
|
|
|
+
|
|
|
|
+script_to_roman:
|
|
|
|
+ map:
|
|
|
|
+ "\u044F": "i\uFE20a\uFE21"
|
|
|
|
+ "\u04D8": "A\u0306"
|
|
|
|
+ "\u04D9": "a\u0306"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04D2": "A\u0306"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04D3": "a\u0306"
|
|
|
|
+ "\u0474": "V\u0307"
|
|
|
|
+ "\u0475": "v\u0307"
|
|
|
|
+ "\u0492": "Gh"
|
|
|
|
+ "U": "0493=gh"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u0494": "Gh"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u0495": "gh"
|
|
|
|
+ "\u0494": "G\u0301"
|
|
|
|
+ "\u0495": "g\u0301"
|
|
|
|
+ "\u049C": "G\u0307"
|
|
|
|
+ "\u049D": "g\u0307"
|
|
|
|
+ "\u0413": "G"
|
|
|
|
+ "\u0433": "g"
|
|
|
|
+ # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
|
|
|
|
+ "\u0464": "I\uFE20E\uFE21\u0304"
|
|
|
|
+ "\u0468": "I\uFE20E\uFE21\u0328"
|
|
|
|
+ "\u0465": "i\uFE20e\uFE21\u0304"
|
|
|
|
+ "\u0469": "i\uFE20e\uFE21\u0328"
|
|
|
|
+ "\u0462": "I\uFE20E\uFE21"
|
|
|
|
+ "\u0463": "i\uFE20e\uFE21"
|
|
|
|
+ # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
|
|
|
|
+ "\u0404": "E\u0304"
|
|
|
|
+ "\u042D": "E\u0307"
|
|
|
|
+ "\u0401": "E\u0308"
|
|
|
|
+ "\u0466": "E\u0328"
|
|
|
|
+ "\u0454": "e\u0304"
|
|
|
|
+ "\u044D": "e\u0307"
|
|
|
|
+ "\u0451": "e\u0308"
|
|
|
|
+ "\u0467": "e\u0328"
|
|
|
|
+ "\u0416": "Zh"
|
|
|
|
+ "\u0436": "zh"
|
|
|
|
+ # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns)
|
|
|
|
+ "\u0498": "T\uFE20H\uFE21"
|
|
|
|
+ "\u0499": "t\uFE20h\uFE21"
|
|
|
|
+ "\u04AA": "Th"
|
|
|
|
+ "\u04AB": "th"
|
|
|
|
+ # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
|
|
|
|
+ "\u046C": "I\uFE20O\uFE21\u0328"
|
|
|
|
+ "\u046D": "i\uFE20o\uFE21\u0328"
|
|
|
|
+ # CONVERION OF "I/i" LIGATED TO "U/u"
|
|
|
|
+ "\u044E": "i\uFE20u\uFE21"
|
|
|
|
+ # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310)
|
|
|
|
+ "\u0406": "I\u0304"
|
|
|
|
+ "\u0408": "I\u0310"
|
|
|
|
+ "\u0456": "i\u0304"
|
|
|
|
+ "\u0458": "i\u0310"
|
|
|
|
+ # CONVERSION OF REMAINING LONE "I/i"
|
|
|
|
+ "\u0418": "I"
|
|
|
|
+ "\u0438": "i"
|
|
|
|
+ "\u0496": "J"
|
|
|
|
+ "\u0497": "j"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04B8": #"J"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04B9": #"J"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04B6": #"J"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04B7": #"J"
|
|
|
|
+ "\u0445": "kh"
|
|
|
|
+ "\u046E": "K\uFE20S\uFE21"
|
|
|
|
+ "\u046F": "k\uFE20s\uFE21"
|
|
|
|
+ "\u04A0": "Q"
|
|
|
|
+ "\u04A1": "q"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04C3": "Q"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04C4": "q"
|
|
|
|
+ "\u04A2": "N\uFE20G\uFE21"
|
|
|
|
+ "\u04A3": "n\uFE20g\uFE21"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04A4": #"N\uFE20G\uFE21"
|
|
|
|
+ "\u04A5": #"n\uFE20g\uFE21"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04C7": #"N\uFE20G\uFE21"
|
|
|
|
+ "\u04C8": #"n\uFE20g\uFE21"
|
|
|
|
+ # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
|
|
|
|
+ "\u047E": "O\u0304\uFE20T\uFE21"
|
|
|
|
+ "\u047F": "o\u0304\uFE20t\uFE21"
|
|
|
|
+ # CONVERSION OF "O/o" WITH MACRON(0304)
|
|
|
|
+ "\u04EA": "O\u0304"
|
|
|
|
+ "\u04EB": "o\u0304"
|
|
|
|
+ # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES
|
|
|
|
+ "\u04E8": "O\u0307"
|
|
|
|
+ "\u04E9": "o\u0307"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04E6": #"O\u0307"
|
|
|
|
+ "\u04E7": #"o\u0307"
|
|
|
|
+ # CONVERSION OF REMAINING LONE "O/o"
|
|
|
|
+ "\u0470": "P\uFE20S\uFE21"
|
|
|
|
+ "\u0471": "p\uFE20s\uFE21"
|
|
|
|
+ "\u04B4": "T\uFE20S\uFE21\u0307"
|
|
|
|
+ "\u04B5": "t\uFE20s\uFE21\u0307"
|
|
|
|
+ "\u0426": "T\uFE20S\uFE21"
|
|
|
|
+ "\u0446": "t\uFE20s\uFE21"
|
|
|
|
+ # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307)
|
|
|
|
+ "\u04B0": "U\u0304"
|
|
|
|
+ "\u04B1": "u\u0304"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04EE": #"U\u0304"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04EF": #"U\u0304"
|
|
|
|
+ "\u040E": "U\u0306"
|
|
|
|
+ "\u0454": "u\u0306"
|
|
|
|
+ "\u04AE": "U\u0307"
|
|
|
|
+ "\u04AF": "u\u0307"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04E6": #"U\u0307"
|
|
|
|
+ "\u04E7": #"u\u0307"
|
|
|
|
+ # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE
|
|
|
|
+ "\u040E": "W"
|
|
|
|
+ "\u0454": "w"
|
|
|
|
+ "\u0472": "F\u0307"
|
|
|
|
+ "\u0473": "f\u0307"
|
|
|
|
+ "\u0444": "f"
|
|
|
|
+ "\u0427": "Ch"
|
|
|
|
+ # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY)
|
|
|
|
+ "\u04BA": "H\u0307"
|
|
|
|
+ "\u04BB": "h\u0307"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04B2": "H\u0307"
|
|
|
|
+ "\u04B3": "h\u0307"
|
|
|
|
+ # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
|
|
|
|
+ "\u04FC": "H\u0307"
|
|
|
|
+ "\u04FD": "h\u0307"
|
|
|
|
+ "\u04F8": "Y\u0307"
|
|
|
|
+ "\u04F9": "y\u0307"
|
|
|
|
+ "\u042B": "Y"
|
|
|
|
+ "\u044B": "y"
|
|
|
|
+ # this conversion is ambiguous - \u044C is also theoretically possible
|
|
|
|
+ "\u044A": "\u02BA"
|
|
|
|
+
|