AsianCyrillicRomanization.cfg 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. # Original table by Randall K. Barry, 23 February 2021
  2. [General]
  3. Name=AsianCyrillic
  4. # UNICODE CYRILLIC CHARACTER RANGE: U+0400-U+04FF
  5. # Due to the the large number of Cyrillic characters used by the languages of Asia and the former Soviet Union,
  6. # as well as variations in the romanization of certain characters depending upon the languages,
  7. # the transliterations produced by this conversion configuration file do not always match flawlessly
  8. # the transliterations specified by the "ALA-LC Romanization Tables" for each language.
  9. # Mappings of each character have been made to the correspondence that is correct
  10. # for the largest number of languages. The Roman-To-Script conversion is not identical to the
  11. # Script-To-Roman conversion. Improvements and standardization of the transliteration of specific
  12. # Cyrillic characters is needed. NOTE: Roundtrip conversion for many languages is lossless. :)
  13. [RomanToScript]
  14. FieldsIncluded=100 110 111 130 240 241 243 245 246 247 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
  15. SubfieldsAlwaysExcluded=uvxy0123456789
  16. OtherSubfieldsExcludedByTag=100/e 110/e 111/j 240/k 240/l 240/s 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
  17. # RDA boilerplate phrases not transliterated:
  18. Place of publication not identified=Place of publication not identified
  19. publisher not identified=publisher not identified
  20. date of publication not identified=date of publication not identified
  21. At head of title=At head of title
  22. Colophon=Colophon
  23. # COMMON COMBINING CHARACTERS (always follow a base letter):
  24. # combining grave U+0300
  25. # combining acute U+0301
  26. # combining circumflex U+0302
  27. # combining macron U+0304
  28. # combining breve U+0306
  29. # combining dot above U+0307
  30. # combining diaeresis U+0308
  31. # combining ring above U+030A
  32. # combining double acute U+030B
  33. # combining caron (hachek) U+030C
  34. # combining candrabindu U+0310
  35. # combining dot below U+0323
  36. # combining comma below U+0326 (Romanian, Latvian, Livonian)
  37. # combining cedilla U+0327 (French, Turkish, Azeri)
  38. # combining ogonek (hook) U+0328 (Polish, Lithuanian)
  39. # combining left ligature U+FE20 (Cyrillic transliteration)
  40. # combining right ligature U+FE21 (Cyrillic transliteration)
  41. # soft sign/prime (spacing) U+02B9(Cyrillic transliteration)
  42. # hard sign/double prime (spacing) U+02BA (Cyrillic transliteration)
  43. # ayn(spacing) U+02BB (Semitic and Caucasian languages)
  44. # alif (spacing) U+02BC (Semitic languages)
  45. # middle dot (space) U+00B7) (Catalan)
  46. # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
  47. # CONVERSION OF "I/i" LIGATED TO "A/a" (all capitalization patterns)
  48. IU+FE20AU+FE21=U+042F
  49. IU+FE20aU+FE21=U+042F
  50. iU+FE20aU+FE21=U+044F
  51. iU+FE20AU+FE21=U+044F
  52. # CONVERSION OF "A/a" WITH BREVE (0306)
  53. AU+0306=U+04D8
  54. aU+0306=U+04D9
  55. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
  56. # AU+0306=U+04D2
  57. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
  58. # aU+0306=U+04D3
  59. # REMAINING LONE "A/a"
  60. A=U+0410
  61. a=U+0430
  62. B=U+0411
  63. b=U+0431
  64. VU+0307=U+0474
  65. vU+0307=U+0475
  66. V=U+0412
  67. v=U+0432
  68. Gh=U+0492
  69. GH=U+0492
  70. gH=U+0493
  71. gh=U+0493
  72. # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  73. # Gh=U+0494
  74. # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  75. # GH=U+0494
  76. # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  77. # gH=U+0495
  78. # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  79. # gh=U+0495
  80. GU+0301=U+0494
  81. gU+0301=U+0495
  82. GU+0307=U+049C
  83. gU+0307=U+049D
  84. G=U+0413
  85. g=U+0433
  86. D=U+0414
  87. d=U+0434
  88. # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
  89. IU+FE20EU+FE21U+0304=U+0464
  90. IU+FE20EU+0304U+FE21=U+0464
  91. IU+FE20eU+FE21U+0304=U+0464
  92. IU+FE20eU+0304U+FE21=U+0464
  93. IU+FE20EU+FE21U+0328=U+0468
  94. IU+FE20EU+0328U+FE21=U+0468
  95. IU+FE20eU+FE21U+0328=U+0468
  96. IU+FE20eU+0328U+FE21=U+0468
  97. iU+FE20eU+FE21U+0304=U+0465
  98. iU+FE20eU+0304U+FE21=U+0465
  99. iU+FE20EU+FE21U+0304=U+0465
  100. iU+FE20EU+0304U+FE21=U+0465
  101. iU+FE20eU+FE21U+0328=U+0469
  102. iU+FE20eU+0328U+FE21=U+0469
  103. iU+FE20EU+FE21U+0328=U+0469
  104. iU+FE20EU+0328U+FE21=U+0469
  105. IU+FE20EU+FE21=U+0462
  106. IU+FE20eU+FE21=U+0462
  107. iU+FE20eU+FE21=U+0463
  108. iU+FE20EU+FE21=U+0463
  109. # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
  110. EU+030C=U+0462
  111. EU+0304=U+0404
  112. EU+0307=U+042D
  113. EU+0308=U+0401
  114. EU+0328=U+0466
  115. eU+030C=U+0463
  116. eU+0304=U+0454
  117. eU+0307=U+044D
  118. eU+0308=U+0451
  119. eU+0328=U+0467
  120. # CONVERSION OF REMAINING LONE "E/e"
  121. E=U+0415
  122. e=U+0435
  123. ZH=U+0416
  124. Zh=U+0416
  125. zH=U+0436
  126. zh=U+0436
  127. Z=U+0417
  128. z=U+0437
  129. # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns)
  130. TU+FE20HU+FE21=U+0498
  131. TU+FE20hU+FE21=U+0498
  132. tU+FE20HU+FE21=U+0499
  133. tU+FE20hU+FE21=U+0499
  134. Th=U+04AA
  135. TH=U+04AA
  136. tH=U+04AB
  137. th=U+04AB
  138. # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
  139. IU+FE20OU+FE21U+0328=U+046C
  140. IU+FE20OU+0328U+FE21=U+046C
  141. IU+FE20oU+FE21U+0328=U+046C
  142. IU+FE20oU+0328U+FE21=U+046C
  143. iU+FE20oU+FE21U+0328=U+046D
  144. iU+FE20oU+0328U+FE21=U+046D
  145. iU+FE20OU+FE21U+0328=U+046D
  146. iU+FE20OU+0328U+FE21=U+046D
  147. # CONVERION OF "I/i" LIGATED TO "U/u"
  148. IU+FE20UU+FE21=U+042E
  149. IU+FE20uU+FE21=U+042E
  150. iU+FE20uU+FE21=U+044E
  151. iU+FE20UU+FE21=U+044E
  152. # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310)
  153. IU+0304=U+0406
  154. IU+0306=U+0419
  155. IU+0310=U+0408
  156. iU+0304=U+0456
  157. iU+0306=U+0439
  158. iU+0310=U+0458
  159. # CONVERSION OF REMAINING LONE "I/i"
  160. I=U+0418
  161. i=U+0438
  162. J=U+0496
  163. j=U+0497
  164. # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION
  165. # J=U+04B8
  166. # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION
  167. # J=u+04B9
  168. # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION
  169. # J=U+04B6
  170. # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION
  171. # J=u+04B7
  172. KH=U+0425
  173. Kh=U+0425
  174. kh=U+0445
  175. kH=U+0445
  176. KU+FE20SU+FE21=U+046E
  177. KU+FE20sU+FE21=U+046E
  178. kU+FE20sU+FE21=U+046F
  179. kU+FE20SU+FE21=U+046F
  180. Q=U+04A0
  181. q=U+04A1
  182. # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION
  183. # Q=U+04C3
  184. # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION
  185. # q=u+04C4
  186. K=U+041A
  187. k=U+043A
  188. L=U+041B
  189. l=U+043B
  190. M=U+041C
  191. m=U+043C
  192. NU+FE20GU+FE21=U+04A2
  193. NU+FE20gU+FE21=U+04A2
  194. nU+FE20GU+FE21=U+04A3
  195. nU+FE20gU+FE21=U+04A3
  196. # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION
  197. # NU+FE20GU+FE21=U+04A4
  198. # NU+FE20gU+FE21=U+04A4
  199. # nU+FE20GU+FE21=U+04A5
  200. # nU+FE20gU+FE21=U+04A5
  201. # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION
  202. # NU+FE20GU+FE21=U+04C7
  203. # NU+FE20gU+FE21=U+04C7
  204. # nU+FE20GU+FE21=U+04C8
  205. # nU+FE20gU+FE21=U+04C8
  206. N=U+041D
  207. n=U+043D
  208. # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
  209. OU+0304U+FE20TU+FE21=U+047E
  210. OU+0304U+FE20tU+FE21=U+047E
  211. OU+FE20U+0304TU+FE21=U+047E
  212. OU+FE20U+0304tU+FE21=U+047E
  213. OU+FE20TU+FE21=U+047E
  214. OU+FE20tU+FE21=U+047E
  215. oU+0304U+FE20tU+FE21=U+047F
  216. oU+0304U+FE20TU+FE21=U+047F
  217. oU+FE20U+0304tU+FE21=U+047F
  218. oU+FE20U+0304TU+FE21=U+047F
  219. oU+FE20tU+FE21=U+047F
  220. oU+FE20TU+FE21=U+047F
  221. # CONVERSION OF "O/o" WITH MACRON(0304)
  222. OU+0304=U+04EA
  223. oU+0304=U+04EB
  224. # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES
  225. OU+0307=U+04E8
  226. oU+0307=U+04E9
  227. # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION
  228. # OU+0307=U+04E6
  229. # oU+0307=U+04E7
  230. # CONVERSION OF REMAINING LONE "O/o"
  231. O=U+041E
  232. o=U+043E
  233. PU+FE20SU+FE21=U+0470
  234. PU+FE20sU+FE21=U+0470
  235. pU+FE20sU+FE21=U+0471
  236. pU+FE20SU+FE21=U+0471
  237. P=U+041F
  238. p=U+043F
  239. R=U+0420
  240. r=U+0440
  241. SHCH=U+0429
  242. SHCh=U+0429
  243. SHch=U+0429
  244. Shch=U+0429
  245. sHCH=U+0449
  246. shCH=U+0449
  247. shcH=U+0449
  248. shch=U+0449
  249. SH=U+0428
  250. Sh=U+0428
  251. sh=U+0448
  252. sH=U+0448
  253. TU+FE20SU+FE21U+0307=U+04B4
  254. TU+FE20SU+0307U+FE21=U+04B4
  255. TU+FE20sU+FE21U+0307=U+04B4
  256. TU+FE20sU+0307U+FE21=U+04B4
  257. tU+FE20SU+FE21U+0307=U+04B5
  258. tU+FE20SU+0307U+FE21=U+04B5
  259. tU+FE20sU+FE21U+0307=U+04B5
  260. tU+FE20sU+0307U+FE21=U+04B5
  261. TU+FE20SU+FE21=U+0426
  262. TU+FE20sU+FE21=U+0426
  263. tU+FE20sU+FE21=U+0446
  264. tU+FE20SU+FE21=U+0446
  265. S=U+0421
  266. s=U+0441
  267. T=U+0422
  268. t=U+0442
  269. # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307)
  270. UU+0304=U+04B0
  271. uU+0304=U+04B1
  272. # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
  273. # UU+0304=U+04EE
  274. # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
  275. # UU+0304=U+04EF
  276. UU+0306=U+040E
  277. uU+0306=U+0454
  278. UU+0307=U+04AE
  279. uU+0307=U+04AF
  280. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION
  281. # UU+0307=U+04E6
  282. # uU+0307=U+04E7
  283. # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE
  284. W=U+040E
  285. w=U+0454
  286. U=U+0423
  287. u=U+0443
  288. FU+0307=U+0472
  289. fU+0307=U+0473
  290. F=U+0424
  291. f=U+0444
  292. CH=U+0427
  293. Ch=U+0427
  294. ch=U+0447
  295. cH=U+0447
  296. # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY)
  297. HU+0307=U+04BA
  298. hU+0307=U+04BB
  299. # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
  300. # HU+0307=U+04B2
  301. # hU+0307=U+04B3
  302. # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
  303. # HU+0307=U+04FC
  304. # hU+0307=U+04FD
  305. YU+0307=U+04F8
  306. yU+0307=U+04F9
  307. Y=U+042B
  308. y=U+044B
  309. # this conversion shouldn't be needed, but does no harm
  310. U+FE20=
  311. # this conversion shouldn't be needed, but does no harm
  312. U+FE21=
  313. # this conversion is ambiguous - U+042C is also theoretically possible
  314. U+0027=U+044C
  315. # this conversion is ambiguous - U+042C is also theoretically possible
  316. U+02B9=U+044C
  317. # this conversion is ambiguous - U+044C is also theoretically possible
  318. U+02BA=U+044A
  319. [ScriptToRoman]
  320. FieldsIncluded=100 110 111 130 240 241 242 243 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
  321. SubfieldsAlwaysExcluded=uvxy0123456789
  322. OtherSubfieldsExcludedByTag=100/e 110/e 111/j 240/k 240/l 240/s 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
  323. # RDA boilerplate phrases not transliterated:
  324. Place of publication not identified=Place of publication not identified
  325. publisher not identified=publisher not identified
  326. # CYRILLIC SCRIPT ALPHABETIC CHARACTERS TO BE CONVERTED
  327. U+042F=IU+FE20AU+FE21
  328. U+044F=iU+FE20aU+FE21
  329. U+04D8=AU+0306
  330. U+04D9=aU+0306
  331. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
  332. U+04D2=AU+0306
  333. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION
  334. U+04D3=aU+0306
  335. U+0410=A
  336. U+0430=a
  337. U+0411=B
  338. U+0431=b
  339. U+0474=VU+0307
  340. U+0475=vU+0307
  341. U+0412=V
  342. U+0432=v
  343. U+0492=Gh
  344. U+0493=gh
  345. # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  346. U+0494=Gh
  347. # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION
  348. U+0495=gh
  349. U+0494=GU+0301
  350. U+0495=gU+0301
  351. U+049C=GU+0307
  352. U+049D=gU+0307
  353. U+0413=G
  354. U+0433=g
  355. U+0414=D
  356. U+0434=d
  357. # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
  358. U+0464=IU+FE20EU+FE21U+0304
  359. U+0468=IU+FE20EU+FE21U+0328
  360. U+0465=iU+FE20eU+FE21U+0304
  361. U+0469=iU+FE20eU+FE21U+0328
  362. U+0462=IU+FE20EU+FE21
  363. U+0463=iU+FE20eU+FE21
  364. # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
  365. U+0404=EU+0304
  366. U+042D=EU+0307
  367. U+0401=EU+0308
  368. U+0466=EU+0328
  369. U+0454=eU+0304
  370. U+044D=eU+0307
  371. U+0451=eU+0308
  372. U+0467=eU+0328
  373. # CONVERSION OF REMAINING LONE "E/e"
  374. U+0415=E
  375. U+0435=e
  376. U+0416=Zh
  377. U+0436=zh
  378. U+0417=Z
  379. U+0437=z
  380. # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns)
  381. U+0498=TU+FE20HU+FE21
  382. U+0499=tU+FE20hU+FE21
  383. U+04AA=Th
  384. U+04AB=th
  385. # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
  386. U+046C=IU+FE20OU+FE21U+0328
  387. U+046D=iU+FE20oU+FE21U+0328
  388. # CONVERION OF "I/i" LIGATED TO "U/u"
  389. U+042E=IU+FE20UU+FE21
  390. U+044E=iU+FE20uU+FE21
  391. # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310)
  392. U+0406=IU+0304
  393. U+0419=IU+0306
  394. U+0408=IU+0310
  395. U+0456=iU+0304
  396. U+0439=iU+0306
  397. U+0458=iU+0310
  398. # CONVERSION OF REMAINING LONE "I/i"
  399. U+0418=I
  400. U+0438=i
  401. U+0496=J
  402. U+0497=j
  403. # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION
  404. U+04B8=# J
  405. # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION
  406. u+04B9=# J
  407. # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION
  408. U+04B6=# J
  409. # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION
  410. u+04B7=# J
  411. U+0425=Kh
  412. U+0445=kh
  413. U+046E=KU+FE20SU+FE21
  414. U+046F=kU+FE20sU+FE21
  415. U+04A0=Q
  416. U+04A1=q
  417. # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION
  418. U+04C3=Q
  419. # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION
  420. u+04C4=q
  421. U+041A=K
  422. U+043A=k
  423. U+041B=L
  424. U+043B=l
  425. U+041C=M
  426. U+043C=m
  427. U+04A2=NU+FE20GU+FE21
  428. U+04A3=nU+FE20gU+FE21
  429. # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION
  430. U+04A4=# NU+FE20GU+FE21
  431. U+04A5=# nU+FE20gU+FE21
  432. # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION
  433. U+04C7=# NU+FE20GU+FE21
  434. U+04C8=# nU+FE20gU+FE21
  435. U+041D=N
  436. U+043D=n
  437. # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
  438. U+047E=OU+0304U+FE20TU+FE21
  439. U+047F=oU+0304U+FE20tU+FE21
  440. # CONVERSION OF "O/o" WITH MACRON(0304)
  441. U+04EA=OU+0304
  442. U+04EB=oU+0304
  443. # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES
  444. U+04E8=OU+0307
  445. U+04E9=oU+0307
  446. # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION
  447. U+04E6=# OU+0307
  448. U+04E7=# oU+0307
  449. # CONVERSION OF REMAINING LONE "O/o"
  450. U+041E=O
  451. U+043E=o
  452. U+0470=PU+FE20SU+FE21
  453. U+0471=pU+FE20sU+FE21
  454. U+041F=P
  455. U+043F=p
  456. U+0420=R
  457. U+0440=r
  458. U+0429=Shch
  459. U+0449=shch
  460. U+0428=Sh
  461. u+0448=sh
  462. U+04B4=TU+FE20SU+FE21U+0307
  463. U+04B5=tU+FE20sU+FE21U+0307
  464. U+0426=TU+FE20SU+FE21
  465. U+0446=tU+FE20sU+FE21
  466. U+0421=S
  467. U+0441=s
  468. U+0422=T
  469. U+0442=t
  470. # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307)
  471. U+04B0=UU+0304
  472. U+04B1=uU+0304
  473. # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
  474. U+04EE=# UU+0304
  475. # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION
  476. U+04EF=# UU+0304
  477. U+040E=UU+0306
  478. U+0454=uU+0306
  479. U+04AE=UU+0307
  480. U+04AF=uU+0307
  481. # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION
  482. U+04E6=# UU+0307
  483. U+04E7=# uU+0307
  484. # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE
  485. U+040E=W
  486. U+0454=w
  487. U+0423=U
  488. U+0443=u
  489. U+0472=FU+0307
  490. U+0473=fU+0307
  491. U+0424=F
  492. U+0444=f
  493. U+0427=Ch
  494. U+0447=ch
  495. # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY)
  496. U+04BA=HU+0307
  497. U+04BB=hU+0307
  498. # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
  499. U+04B2=HU+0307
  500. U+04B3=hU+0307
  501. # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION
  502. U+04FC=HU+0307
  503. U+04FD=hU+0307
  504. U+04F8=YU+0307
  505. U+04F9=yU+0307
  506. U+042B=Y
  507. U+044B=y
  508. # this conversion is ambiguous - U+042C is also theoretically possible
  509. U+044C=U+02B9
  510. # this conversion is ambiguous - U+044C is also theoretically possible
  511. U+044A=U+02BA