general: name: Asian Cyrillic inherits: _cyrillic_base roman_to_script: map: # COMMON COMBINING CHARACTERS (always follow a base letter): # combining grave U+0300 # combining acute U+0301 # combining circumflex U+0302 # combining macron U+0304 # combining breve U+0306 # combining dot above U+0307 # combining diaeresis U+0308 # combining ring above U+030A # combining double acute U+030B # combining caron (hachek) U+030C # combining candrabindu U+0310 # combining dot below U+0323 # combining comma below U+0326 (Romanian, Latvian, Livonian) # combining cedilla U+0327 (French, Turkish, Azeri) # combining ogonek (hook) U+0328 (Polish, Lithuanian) # combining left ligature U+FE20 (Cyrillic transliteration) # combining right ligature U+FE21 (Cyrillic transliteration) # soft sign/prime (spacing) U+02B9(Cyrillic transliteration) # hard sign/double prime (spacing) U+02BA (Cyrillic transliteration) # ayn(spacing) U+02BB (Semitic and Caucasian languages) # alif (spacing) U+02BC (Semitic languages) # middle dot (space) U+00B7) (Catalan) # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED # CONVERSION OF "I/i" LIGATED TO "A/a" (all capitalization patterns) "I\uFE20A\uFE21": "\u042F" "I\uFE20a\uFE21": "\u042F" "i\uFE20a\uFE21": "\u044F" "i\uFE20A\uFE21": "\u044F" # CONVERSION OF "A/a" WITH BREVE (0306) "A\u0306": "\u04D8" "a\u0306": "\u04D9" # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION #"A\u0306": "\u04D2" # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION #"a\u0306": "\u04D3" # REMAINING LONE "A/a" "V\u0307": "\u0474" "v\u0307": "\u0475" "Gh": "\u0492" "GH": "\u0492" "gH": "\u0493" "gh": "\u0493" # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION #"Gh": "\u0494" # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION #"GH": "\u0494" # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION #"gH": "\u0495" # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION #"gh": "\u0495" "G\u0301": "\u0494" "g\u0301": "\u0495" "G\u0307": "\u049C" "g\u0307": "\u049D" "G": "\u0413" "g": "\u0433" # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328) "I\uFE20E\uFE21\u0304": "\u0464" "I\uFE20E\u0304\uFE21": "\u0464" "I\uFE20e\uFE21\u0304": "\u0464" "I\uFE20e\u0304\uFE21": "\u0464" "I\uFE20E\uFE21\u0328": "\u0468" "I\uFE20E\u0328\uFE21": "\u0468" "I\uFE20e\uFE21\u0328": "\u0468" "I\uFE20e\u0328\uFE21": "\u0468" "i\uFE20e\uFE21\u0304": "\u0465" "i\uFE20e\u0304\uFE21": "\u0465" "i\uFE20E\uFE21\u0304": "\u0465" "i\uFE20E\u0304\uFE21": "\u0465" "i\uFE20e\uFE21\u0328": "\u0469" "i\uFE20e\u0328\uFE21": "\u0469" "i\uFE20E\uFE21\u0328": "\u0469" "i\uFE20E\u0328\uFE21": "\u0469" "I\uFE20E\uFE21": "\u0462" "I\uFE20e\uFE21": "\u0462" "i\uFE20e\uFE21": "\u0463" "i\uFE20E\uFE21": "\u0463" # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C) "E\u030C": "\u0462" "E\u0304": "\u0404" "E\u0307": "\u042D" "E\u0308": "\u0401" "E\u0328": "\u0466" "e\u030C": "\u0463" "e\u0304": "\u0454" "e\u0307": "\u044D" "e\u0308": "\u0451" "e\u0328": "\u0467" # CONVERSION OF REMAINING LONE "E/e" "ZH": "\u0416" "Zh": "\u0416" "zH": "\u0436" "zh": "\u0436" # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns) "T\uFE20H\uFE21": "\u0498" "T\uFE20h\uFE21": "\u0498" "t\uFE20H\uFE21": "\u0499" "t\uFE20h\uFE21": "\u0499" "Th": "\u04AA" "TH": "\u04AA" "tH": "\u04AB" "th": "\u04AB" # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328) "I\uFE20O\uFE21\u0328": "\u046C" "I\uFE20O\u0328\uFE21": "\u046C" "I\uFE20o\uFE21\u0328": "\u046C" "I\uFE20o\u0328\uFE21": "\u046C" "i\uFE20o\uFE21\u0328": "\u046D" "i\uFE20o\u0328\uFE21": "\u046D" "i\uFE20O\uFE21\u0328": "\u046D" "i\uFE20O\u0328\uFE21": "\u046D" # CONVERION OF "I/i" LIGATED TO "U/u" "I\uFE20U\uFE21": "\u042E" "I\uFE20u\uFE21": "\u042E" "i\uFE20u\uFE21": "\u044E" "i\uFE20U\uFE21": "\u044E" # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310) "I\u0304": "\u0406" "I\u0306": "\u0419" "I\u0310": "\u0408" "i\u0304": "\u0456" "i\u0306": "\u0439" "i\u0310": "\u0458" # CONVERSION OF REMAINING LONE "I/i" "I": "\u0418" "i": "\u0438" "J": "\u0496" "j": "\u0497" # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION #"J": "\u04B8" # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION #"J": "\u04B9" # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION #"J": "\u04B6" # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION #"J": "\u04B7" "K\uFE20S\uFE21": "\u046E" "K\uFE20s\uFE21": "\u046E" "k\uFE20s\uFE21": "\u046F" "k\uFE20S\uFE21": "\u046F" "Q": "\u04A0" "q": "\u04A1" # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION #"Q": "\u04C3" # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION #"q": "\u04C4" "N\uFE20G\uFE21": "\u04A2" "N\uFE20g\uFE21": "\u04A2" "n\uFE20G\uFE21": "\u04A3" "n\uFE20g\uFE21": "\u04A3" # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION #"N\uFE20G\uFE21": "\u04A4" #"N\uFE20g\uFE21": "\u04A4" #"n\uFE20G\uFE21": "\u04A5" #"n\uFE20g\uFE21": "\u04A5" # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION #"N\uFE20G\uFE21": "\u04C7" #"N\uFE20g\uFE21": "\u04C7" #"n\uFE20G\uFE21": "\u04C8" #"n\uFE20g\uFE21": "\u04C8" # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t" "O\u0304\uFE20T\uFE21": "\u047E" "O\u0304\uFE20t\uFE21": "\u047E" "O\uFE20\u0304T\uFE21": "\u047E" "O\uFE20\u0304t\uFE21": "\u047E" "O\uFE20T\uFE21": "\u047E" "O\uFE20t\uFE21": "\u047E" "o\u0304\uFE20t\uFE21": "\u047F" "o\u0304\uFE20T\uFE21": "\u047F" "o\uFE20\u0304t\uFE21": "\u047F" "o\uFE20\u0304T\uFE21": "\u047F" "o\uFE20t\uFE21": "\u047F" "o\uFE20T\uFE21": "\u047F" # CONVERSION OF "O/o" WITH MACRON(0304) "O\u0304": "\u04EA" "o\u0304": "\u04EB" # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES "O\u0307": "\u04E8" "o\u0307": "\u04E9" # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION #"O\u0307": "\u04E6" #"o\u0307": "\u04E7" # CONVERSION OF REMAINING LONE "O/o" "P\uFE20S\uFE21": "\u0470" "P\uFE20s\uFE21": "\u0470" "p\uFE20s\uFE21": "\u0471" "p\uFE20S\uFE21": "\u0471" "SHCH": "\u0429" "SHCh": "\u0429" "SHch": "\u0429" "Shch": "\u0429" "sHCH": "\u0449" "shCH": "\u0449" "shcH": "\u0449" "shch": "\u0449" "sH": "\u0448" "T\uFE20S\uFE21\u0307": "\u04B4" "T\uFE20S\u0307\uFE21": "\u04B4" "T\uFE20s\uFE21\u0307": "\u04B4" "T\uFE20s\u0307\uFE21": "\u04B4" "t\uFE20S\uFE21\u0307": "\u04B5" "t\uFE20S\u0307\uFE21": "\u04B5" "t\uFE20s\uFE21\u0307": "\u04B5" "t\uFE20s\u0307\uFE21": "\u04B5" "T\uFE20S\uFE21": "\u0426" "T\uFE20s\uFE21": "\u0426" "t\uFE20s\uFE21": "\u0446" "t\uFE20S\uFE21": "\u0446" # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307) "U\u0304": "\u04B0" "u\u0304": "\u04B1" # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION #"U\u0304": "\u04EE" # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION #"U\u0304": "\u04EF" "U\u0306": "\u040E" "u\u0306": "\u0454" "U\u0307": "\u04AE" "u\u0307": "\u04AF" # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION #"U\u0307": "\u04E6" #"u\u0307": "\u04E7" # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE "W": "\u040E" "w": "\u0454" "F\u0307": "\u0472" "f\u0307": "\u0473" "cH": "\u0447" # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY) "H\u0307": "\u04BA" "h\u0307": "\u04BB" # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION #"H\u0307": "\u04B2" #"h\u0307": "\u04B3" # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION #"H\u0307": "\u04FC" #"h\u0307": "\u04FD" "Y\u0307": "\u04F8" "y\u0307": "\u04F9" "Y": "\u042B" "y": "\u044B" "\u0027": "\u044C" # this conversion is ambiguous - \u044C is also theoretically possible "\u02BA": "\u044A" script_to_roman: map: "\u044F": "i\uFE20a\uFE21" "\u04D8": "A\u0306" "\u04D9": "a\u0306" # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION "\u04D2": "A\u0306" # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION "\u04D3": "a\u0306" "\u0474": "V\u0307" "\u0475": "v\u0307" "\u0492": "Gh" "\u0493": "gh" # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION "\u0494": "Gh" # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION "\u0495": "gh" "\u0494": "G\u0301" "\u0495": "g\u0301" "\u049C": "G\u0307" "\u049D": "g\u0307" "\u0413": "G" "\u0433": "g" # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328) "\u0464": "I\uFE20E\uFE21\u0304" "\u0468": "I\uFE20E\uFE21\u0328" "\u0465": "i\uFE20e\uFE21\u0304" "\u0469": "i\uFE20e\uFE21\u0328" "\u0462": "I\uFE20E\uFE21" "\u0463": "i\uFE20e\uFE21" # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C) "\u0404": "E\u0304" "\u042D": "E\u0307" "\u0401": "E\u0308" "\u0466": "E\u0328" "\u0454": "e\u0304" "\u044D": "e\u0307" "\u0451": "e\u0308" "\u0467": "e\u0328" "\u0416": "Zh" "\u0436": "zh" # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns) "\u0498": "T\uFE20H\uFE21" "\u0499": "t\uFE20h\uFE21" "\u04AA": "Th" "\u04AB": "th" # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328) "\u046C": "I\uFE20O\uFE21\u0328" "\u046D": "i\uFE20o\uFE21\u0328" # CONVERION OF "I/i" LIGATED TO "U/u" "\u044E": "i\uFE20u\uFE21" # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310) "\u0406": "I\u0304" "\u0408": "I\u0310" "\u0456": "i\u0304" "\u0458": "i\u0310" # CONVERSION OF REMAINING LONE "I/i" "\u0418": "I" "\u0438": "i" "\u0496": "J" "\u0497": "j" # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION "\u04B8": #"J" # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION "\u04B9": #"J" # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION "\u04B6": #"J" # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION "\u04B7": #"J" "\u0445": "kh" "\u046E": "K\uFE20S\uFE21" "\u046F": "k\uFE20s\uFE21" "\u04A0": "Q" "\u04A1": "q" # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION "\u04C3": "Q" # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION "\u04C4": "q" "\u04A2": "N\uFE20G\uFE21" "\u04A3": "n\uFE20g\uFE21" # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION "\u04A4": #"N\uFE20G\uFE21" "\u04A5": #"n\uFE20g\uFE21" # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION "\u04C7": #"N\uFE20G\uFE21" "\u04C8": #"n\uFE20g\uFE21" # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t" "\u047E": "O\u0304\uFE20T\uFE21" "\u047F": "o\u0304\uFE20t\uFE21" # CONVERSION OF "O/o" WITH MACRON(0304) "\u04EA": "O\u0304" "\u04EB": "o\u0304" # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES "\u04E8": "O\u0307" "\u04E9": "o\u0307" # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION "\u04E6": #"O\u0307" "\u04E7": #"o\u0307" # CONVERSION OF REMAINING LONE "O/o" "\u0470": "P\uFE20S\uFE21" "\u0471": "p\uFE20s\uFE21" "\u04B4": "T\uFE20S\uFE21\u0307" "\u04B5": "t\uFE20s\uFE21\u0307" "\u0426": "T\uFE20S\uFE21" "\u0446": "t\uFE20s\uFE21" # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307) "\u04B0": "U\u0304" "\u04B1": "u\u0304" # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION "\u04EE": #"U\u0304" # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION "\u04EF": #"U\u0304" "\u040E": "U\u0306" "\u0454": "u\u0306" "\u04AE": "U\u0307" "\u04AF": "u\u0307" # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION "\u04E6": #"U\u0307" "\u04E7": #"u\u0307" # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE "\u040E": "W" "\u0454": "w" "\u0472": "F\u0307" "\u0473": "f\u0307" "\u0444": "f" "\u0427": "Ch" # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY) "\u04BA": "H\u0307" "\u04BB": "h\u0307" # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION "\u04B2": "H\u0307" "\u04B3": "h\u0307" # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION "\u04FC": "H\u0307" "\u04FD": "h\u0307" "\u04F8": "Y\u0307" "\u04F9": "y\u0307" "\u042B": "Y" "\u044B": "y" # this conversion is ambiguous - \u044C is also theoretically possible "\u044A": "\u02BA"