asian_cyrillic.yml 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. general:
  2. name: Asian Cyrillic
  3. inherits: _cyrillic_base
  4. roman_to_script:
  5. map:
  6. # COMMON COMBINING CHARACTERS (always follow a base letter):
  7. # combining grave U+0300
  8. # combining acute U+0301
  9. # combining circumflex U+0302
  10. # combining macron U+0304
  11. # combining breve U+0306
  12. # combining dot above U+0307
  13. # combining diaeresis U+0308
  14. # combining ring above U+030A
  15. # combining double acute U+030B
  16. # combining caron (hachek) U+030C
  17. # combining candrabindu U+0310
  18. # combining dot below U+0323
  19. # combining comma below U+0326 (Romanian, Latvian, Livonian)
  20. # combining cedilla U+0327 (French, Turkish, Azeri)
  21. # combining ogonek (hook) U+0328 (Polish, Lithuanian)
  22. # combining left ligature U+FE20 (Cyrillic transliteration)
  23. # combining right ligature U+FE21 (Cyrillic transliteration)
  24. # soft sign/prime (spacing) U+02B9(Cyrillic transliteration)
  25. # hard sign/double prime (spacing) U+02BA (Cyrillic transliteration)
  26. # ayn(spacing) U+02BB (Semitic and Caucasian languages)
  27. # alif (spacing) U+02BC (Semitic languages)
  28. # middle dot (space) U+00B7) (Catalan)
  29. # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
  30. # CONVERSION OF "I/i" LIGATED TO "A/a" (all capitalization patterns)
  31. "I\uFE20A\uFE21": "\u042F"
  32. "I\uFE20a\uFE21": "\u042F"
  33. "i\uFE20a\uFE21": "\u044F"
  34. "i\uFE20A\uFE21": "\u044F"
  35. # CONVERSION OF "A/a" WITH BREVE (0306)
  36. "A\u0306": "\u04D8"
  37. "a\u0306": "\u04D9"
  38. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
  39. #"A\u0306": "\u04D2"
  40. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
  41. #"a\u0306": "\u04D3"
  42. # REMAINING LONE "A/a"
  43. "V\u0307": "\u0474"
  44. "v\u0307": "\u0475"
  45. "Gh": "\u0492"
  46. "GH": "\u0492"
  47. "gH": "\u0493"
  48. "gh": "\u0493"
  49. # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  50. #"Gh": "\u0494"
  51. # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  52. #"GH": "\u0494"
  53. # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  54. #"gH": "\u0495"
  55. # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  56. #"gh": "\u0495"
  57. "G\u0301": "\u0494"
  58. "g\u0301": "\u0495"
  59. "G\u0307": "\u049C"
  60. "g\u0307": "\u049D"
  61. "G": "\u0413"
  62. "g": "\u0433"
  63. # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
  64. "I\uFE20E\uFE21\u0304": "\u0464"
  65. "I\uFE20E\u0304\uFE21": "\u0464"
  66. "I\uFE20e\uFE21\u0304": "\u0464"
  67. "I\uFE20e\u0304\uFE21": "\u0464"
  68. "I\uFE20E\uFE21\u0328": "\u0468"
  69. "I\uFE20E\u0328\uFE21": "\u0468"
  70. "I\uFE20e\uFE21\u0328": "\u0468"
  71. "I\uFE20e\u0328\uFE21": "\u0468"
  72. "i\uFE20e\uFE21\u0304": "\u0465"
  73. "i\uFE20e\u0304\uFE21": "\u0465"
  74. "i\uFE20E\uFE21\u0304": "\u0465"
  75. "i\uFE20E\u0304\uFE21": "\u0465"
  76. "i\uFE20e\uFE21\u0328": "\u0469"
  77. "i\uFE20e\u0328\uFE21": "\u0469"
  78. "i\uFE20E\uFE21\u0328": "\u0469"
  79. "i\uFE20E\u0328\uFE21": "\u0469"
  80. "I\uFE20E\uFE21": "\u0462"
  81. "I\uFE20e\uFE21": "\u0462"
  82. "i\uFE20e\uFE21": "\u0463"
  83. "i\uFE20E\uFE21": "\u0463"
  84. # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
  85. "E\u030C": "\u0462"
  86. "E\u0304": "\u0404"
  87. "E\u0307": "\u042D"
  88. "E\u0308": "\u0401"
  89. "E\u0328": "\u0466"
  90. "e\u030C": "\u0463"
  91. "e\u0304": "\u0454"
  92. "e\u0307": "\u044D"
  93. "e\u0308": "\u0451"
  94. "e\u0328": "\u0467"
  95. # CONVERSION OF REMAINING LONE "E/e"
  96. "ZH": "\u0416"
  97. "Zh": "\u0416"
  98. "zH": "\u0436"
  99. "zh": "\u0436"
  100. # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns)
  101. "T\uFE20H\uFE21": "\u0498"
  102. "T\uFE20h\uFE21": "\u0498"
  103. "t\uFE20H\uFE21": "\u0499"
  104. "t\uFE20h\uFE21": "\u0499"
  105. "Th": "\u04AA"
  106. "TH": "\u04AA"
  107. "tH": "\u04AB"
  108. "th": "\u04AB"
  109. # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
  110. "I\uFE20O\uFE21\u0328": "\u046C"
  111. "I\uFE20O\u0328\uFE21": "\u046C"
  112. "I\uFE20o\uFE21\u0328": "\u046C"
  113. "I\uFE20o\u0328\uFE21": "\u046C"
  114. "i\uFE20o\uFE21\u0328": "\u046D"
  115. "i\uFE20o\u0328\uFE21": "\u046D"
  116. "i\uFE20O\uFE21\u0328": "\u046D"
  117. "i\uFE20O\u0328\uFE21": "\u046D"
  118. # CONVERION OF "I/i" LIGATED TO "U/u"
  119. "I\uFE20U\uFE21": "\u042E"
  120. "I\uFE20u\uFE21": "\u042E"
  121. "i\uFE20u\uFE21": "\u044E"
  122. "i\uFE20U\uFE21": "\u044E"
  123. # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310)
  124. "I\u0304": "\u0406"
  125. "I\u0306": "\u0419"
  126. "I\u0310": "\u0408"
  127. "i\u0304": "\u0456"
  128. "i\u0306": "\u0439"
  129. "i\u0310": "\u0458"
  130. # CONVERSION OF REMAINING LONE "I/i"
  131. "I": "\u0418"
  132. "i": "\u0438"
  133. "J": "\u0496"
  134. "j": "\u0497"
  135. # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION
  136. #"J": "\u04B8"
  137. # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION
  138. #"J": "\u04B9"
  139. # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION
  140. #"J": "\u04B6"
  141. # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION
  142. #"J": "\u04B7"
  143. "K\uFE20S\uFE21": "\u046E"
  144. "K\uFE20s\uFE21": "\u046E"
  145. "k\uFE20s\uFE21": "\u046F"
  146. "k\uFE20S\uFE21": "\u046F"
  147. "Q": "\u04A0"
  148. "q": "\u04A1"
  149. # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION
  150. #"Q": "\u04C3"
  151. # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION
  152. #"q": "\u04C4"
  153. "N\uFE20G\uFE21": "\u04A2"
  154. "N\uFE20g\uFE21": "\u04A2"
  155. "n\uFE20G\uFE21": "\u04A3"
  156. "n\uFE20g\uFE21": "\u04A3"
  157. # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION
  158. #"N\uFE20G\uFE21": "\u04A4"
  159. #"N\uFE20g\uFE21": "\u04A4"
  160. #"n\uFE20G\uFE21": "\u04A5"
  161. #"n\uFE20g\uFE21": "\u04A5"
  162. # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION
  163. #"N\uFE20G\uFE21": "\u04C7"
  164. #"N\uFE20g\uFE21": "\u04C7"
  165. #"n\uFE20G\uFE21": "\u04C8"
  166. #"n\uFE20g\uFE21": "\u04C8"
  167. # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
  168. "O\u0304\uFE20T\uFE21": "\u047E"
  169. "O\u0304\uFE20t\uFE21": "\u047E"
  170. "O\uFE20\u0304T\uFE21": "\u047E"
  171. "O\uFE20\u0304t\uFE21": "\u047E"
  172. "O\uFE20T\uFE21": "\u047E"
  173. "O\uFE20t\uFE21": "\u047E"
  174. "o\u0304\uFE20t\uFE21": "\u047F"
  175. "o\u0304\uFE20T\uFE21": "\u047F"
  176. "o\uFE20\u0304t\uFE21": "\u047F"
  177. "o\uFE20\u0304T\uFE21": "\u047F"
  178. "o\uFE20t\uFE21": "\u047F"
  179. "o\uFE20T\uFE21": "\u047F"
  180. # CONVERSION OF "O/o" WITH MACRON(0304)
  181. "O\u0304": "\u04EA"
  182. "o\u0304": "\u04EB"
  183. # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES
  184. "O\u0307": "\u04E8"
  185. "o\u0307": "\u04E9"
  186. # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION
  187. #"O\u0307": "\u04E6"
  188. #"o\u0307": "\u04E7"
  189. # CONVERSION OF REMAINING LONE "O/o"
  190. "P\uFE20S\uFE21": "\u0470"
  191. "P\uFE20s\uFE21": "\u0470"
  192. "p\uFE20s\uFE21": "\u0471"
  193. "p\uFE20S\uFE21": "\u0471"
  194. "SHCH": "\u0429"
  195. "SHCh": "\u0429"
  196. "SHch": "\u0429"
  197. "Shch": "\u0429"
  198. "sHCH": "\u0449"
  199. "shCH": "\u0449"
  200. "shcH": "\u0449"
  201. "shch": "\u0449"
  202. "sH": "\u0448"
  203. "T\uFE20S\uFE21\u0307": "\u04B4"
  204. "T\uFE20S\u0307\uFE21": "\u04B4"
  205. "T\uFE20s\uFE21\u0307": "\u04B4"
  206. "T\uFE20s\u0307\uFE21": "\u04B4"
  207. "t\uFE20S\uFE21\u0307": "\u04B5"
  208. "t\uFE20S\u0307\uFE21": "\u04B5"
  209. "t\uFE20s\uFE21\u0307": "\u04B5"
  210. "t\uFE20s\u0307\uFE21": "\u04B5"
  211. "T\uFE20S\uFE21": "\u0426"
  212. "T\uFE20s\uFE21": "\u0426"
  213. "t\uFE20s\uFE21": "\u0446"
  214. "t\uFE20S\uFE21": "\u0446"
  215. # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307)
  216. "U\u0304": "\u04B0"
  217. "u\u0304": "\u04B1"
  218. # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
  219. #"U\u0304": "\u04EE"
  220. # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
  221. #"U\u0304": "\u04EF"
  222. "U\u0306": "\u040E"
  223. "u\u0306": "\u0454"
  224. "U\u0307": "\u04AE"
  225. "u\u0307": "\u04AF"
  226. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION
  227. #"U\u0307": "\u04E6"
  228. #"u\u0307": "\u04E7"
  229. # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE
  230. "W": "\u040E"
  231. "w": "\u0454"
  232. "F\u0307": "\u0472"
  233. "f\u0307": "\u0473"
  234. "cH": "\u0447"
  235. # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY)
  236. "H\u0307": "\u04BA"
  237. "h\u0307": "\u04BB"
  238. # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
  239. #"H\u0307": "\u04B2"
  240. #"h\u0307": "\u04B3"
  241. # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
  242. #"H\u0307": "\u04FC"
  243. #"h\u0307": "\u04FD"
  244. "Y\u0307": "\u04F8"
  245. "y\u0307": "\u04F9"
  246. "Y": "\u042B"
  247. "y": "\u044B"
  248. "\u0027": "\u044C"
  249. # this conversion is ambiguous - \u044C is also theoretically possible
  250. "\u02BA": "\u044A"
  251. script_to_roman:
  252. map:
  253. "\u044F": "i\uFE20a\uFE21"
  254. "\u04D8": "A\u0306"
  255. "\u04D9": "a\u0306"
  256. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
  257. "\u04D2": "A\u0306"
  258. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
  259. "\u04D3": "a\u0306"
  260. "\u0474": "V\u0307"
  261. "\u0475": "v\u0307"
  262. "\u0492": "Gh"
  263. "\u0493": "gh"
  264. # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  265. "\u0494": "Gh"
  266. # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  267. "\u0495": "gh"
  268. "\u0494": "G\u0301"
  269. "\u0495": "g\u0301"
  270. "\u049C": "G\u0307"
  271. "\u049D": "g\u0307"
  272. "\u0413": "G"
  273. "\u0433": "g"
  274. # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
  275. "\u0464": "I\uFE20E\uFE21\u0304"
  276. "\u0468": "I\uFE20E\uFE21\u0328"
  277. "\u0465": "i\uFE20e\uFE21\u0304"
  278. "\u0469": "i\uFE20e\uFE21\u0328"
  279. "\u0462": "I\uFE20E\uFE21"
  280. "\u0463": "i\uFE20e\uFE21"
  281. # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
  282. "\u0404": "E\u0304"
  283. "\u042D": "E\u0307"
  284. "\u0401": "E\u0308"
  285. "\u0466": "E\u0328"
  286. "\u0454": "e\u0304"
  287. "\u044D": "e\u0307"
  288. "\u0451": "e\u0308"
  289. "\u0467": "e\u0328"
  290. "\u0416": "Zh"
  291. "\u0436": "zh"
  292. # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns)
  293. "\u0498": "T\uFE20H\uFE21"
  294. "\u0499": "t\uFE20h\uFE21"
  295. "\u04AA": "Th"
  296. "\u04AB": "th"
  297. # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
  298. "\u046C": "I\uFE20O\uFE21\u0328"
  299. "\u046D": "i\uFE20o\uFE21\u0328"
  300. # CONVERION OF "I/i" LIGATED TO "U/u"
  301. "\u044E": "i\uFE20u\uFE21"
  302. # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310)
  303. "\u0406": "I\u0304"
  304. "\u0408": "I\u0310"
  305. "\u0456": "i\u0304"
  306. "\u0458": "i\u0310"
  307. # CONVERSION OF REMAINING LONE "I/i"
  308. "\u0418": "I"
  309. "\u0438": "i"
  310. "\u0496": "J"
  311. "\u0497": "j"
  312. # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION
  313. "\u04B8": #"J"
  314. # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION
  315. "\u04B9": #"J"
  316. # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION
  317. "\u04B6": #"J"
  318. # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION
  319. "\u04B7": #"J"
  320. "\u0445": "kh"
  321. "\u046E": "K\uFE20S\uFE21"
  322. "\u046F": "k\uFE20s\uFE21"
  323. "\u04A0": "Q"
  324. "\u04A1": "q"
  325. # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION
  326. "\u04C3": "Q"
  327. # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION
  328. "\u04C4": "q"
  329. "\u04A2": "N\uFE20G\uFE21"
  330. "\u04A3": "n\uFE20g\uFE21"
  331. # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION
  332. "\u04A4": #"N\uFE20G\uFE21"
  333. "\u04A5": #"n\uFE20g\uFE21"
  334. # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION
  335. "\u04C7": #"N\uFE20G\uFE21"
  336. "\u04C8": #"n\uFE20g\uFE21"
  337. # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
  338. "\u047E": "O\u0304\uFE20T\uFE21"
  339. "\u047F": "o\u0304\uFE20t\uFE21"
  340. # CONVERSION OF "O/o" WITH MACRON(0304)
  341. "\u04EA": "O\u0304"
  342. "\u04EB": "o\u0304"
  343. # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES
  344. "\u04E8": "O\u0307"
  345. "\u04E9": "o\u0307"
  346. # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION
  347. "\u04E6": #"O\u0307"
  348. "\u04E7": #"o\u0307"
  349. # CONVERSION OF REMAINING LONE "O/o"
  350. "\u0470": "P\uFE20S\uFE21"
  351. "\u0471": "p\uFE20s\uFE21"
  352. "\u04B4": "T\uFE20S\uFE21\u0307"
  353. "\u04B5": "t\uFE20s\uFE21\u0307"
  354. "\u0426": "T\uFE20S\uFE21"
  355. "\u0446": "t\uFE20s\uFE21"
  356. # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307)
  357. "\u04B0": "U\u0304"
  358. "\u04B1": "u\u0304"
  359. # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
  360. "\u04EE": #"U\u0304"
  361. # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
  362. "\u04EF": #"U\u0304"
  363. "\u040E": "U\u0306"
  364. "\u0454": "u\u0306"
  365. "\u04AE": "U\u0307"
  366. "\u04AF": "u\u0307"
  367. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION
  368. "\u04E6": #"U\u0307"
  369. "\u04E7": #"u\u0307"
  370. # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE
  371. "\u040E": "W"
  372. "\u0454": "w"
  373. "\u0472": "F\u0307"
  374. "\u0473": "f\u0307"
  375. "\u0444": "f"
  376. "\u0427": "Ch"
  377. # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY)
  378. "\u04BA": "H\u0307"
  379. "\u04BB": "h\u0307"
  380. # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
  381. "\u04B2": "H\u0307"
  382. "\u04B3": "h\u0307"
  383. # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
  384. "\u04FC": "H\u0307"
  385. "\u04FD": "h\u0307"
  386. "\u04F8": "Y\u0307"
  387. "\u04F9": "y\u0307"
  388. "\u042B": "Y"
  389. "\u044B": "y"
  390. # this conversion is ambiguous - \u044C is also theoretically possible
  391. "\u044A": "\u02BA"