# Original table by Randall K. Barry, 23 February 2021 [General] Name=AsianCyrillic # UNICODE CYRILLIC CHARACTER RANGE: U+0400-U+04FF # Due to the the large number of Cyrillic characters used by the languages of Asia and the former Soviet Union, # as well as variations in the romanization of certain characters depending upon the languages, # the transliterations produced by this conversion configuration file do not always match flawlessly # the transliterations specified by the "ALA-LC Romanization Tables" for each language. # Mappings of each character have been made to the correspondence that is correct # for the largest number of languages. The Roman-To-Script conversion is not identical to the # Script-To-Roman conversion. Improvements and standardization of the transliteration of specific # Cyrillic characters is needed. NOTE: Roundtrip conversion for many languages is lossless. :) [RomanToScript] FieldsIncluded=100 110 111 130 240 241 243 245 246 247 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830 SubfieldsAlwaysExcluded=uvxy0123456789 OtherSubfieldsExcludedByTag=100/e 110/e 111/j 240/k 240/l 240/s 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i # RDA boilerplate phrases not transliterated: Place of publication not identified=Place of publication not identified publisher not identified=publisher not identified date of publication not identified=date of publication not identified At head of title=At head of title Colophon=Colophon # COMMON COMBINING CHARACTERS (always follow a base letter): # combining grave U+0300 # combining acute U+0301 # combining circumflex U+0302 # combining macron U+0304 # combining breve U+0306 # combining dot above U+0307 # combining diaeresis U+0308 # combining ring above U+030A # combining double acute U+030B # combining caron (hachek) U+030C # combining candrabindu U+0310 # combining dot below U+0323 # combining comma below U+0326 (Romanian, Latvian, Livonian) # combining cedilla U+0327 (French, Turkish, Azeri) # combining ogonek (hook) U+0328 (Polish, Lithuanian) # combining left ligature U+FE20 (Cyrillic transliteration) # combining right ligature U+FE21 (Cyrillic transliteration) # soft sign/prime (spacing) U+02B9(Cyrillic transliteration) # hard sign/double prime (spacing) U+02BA (Cyrillic transliteration) # ayn(spacing) U+02BB (Semitic and Caucasian languages) # alif (spacing) U+02BC (Semitic languages) # middle dot (space) U+00B7) (Catalan) # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED # CONVERSION OF "I/i" LIGATED TO "A/a" (all capitalization patterns) IU+FE20AU+FE21=U+042F IU+FE20aU+FE21=U+042F iU+FE20aU+FE21=U+044F iU+FE20AU+FE21=U+044F # CONVERSION OF "A/a" WITH BREVE (0306) AU+0306=U+04D8 aU+0306=U+04D9 # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION # AU+0306=U+04D2 # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION # aU+0306=U+04D3 # REMAINING LONE "A/a" A=U+0410 a=U+0430 B=U+0411 b=U+0431 VU+0307=U+0474 vU+0307=U+0475 V=U+0412 v=U+0432 Gh=U+0492 GH=U+0492 gH=U+0493 gh=U+0493 # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION # Gh=U+0494 # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION # GH=U+0494 # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION # gH=U+0495 # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION # gh=U+0495 GU+0301=U+0494 gU+0301=U+0495 GU+0307=U+049C gU+0307=U+049D G=U+0413 g=U+0433 D=U+0414 d=U+0434 # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328) IU+FE20EU+FE21U+0304=U+0464 IU+FE20EU+0304U+FE21=U+0464 IU+FE20eU+FE21U+0304=U+0464 IU+FE20eU+0304U+FE21=U+0464 IU+FE20EU+FE21U+0328=U+0468 IU+FE20EU+0328U+FE21=U+0468 IU+FE20eU+FE21U+0328=U+0468 IU+FE20eU+0328U+FE21=U+0468 iU+FE20eU+FE21U+0304=U+0465 iU+FE20eU+0304U+FE21=U+0465 iU+FE20EU+FE21U+0304=U+0465 iU+FE20EU+0304U+FE21=U+0465 iU+FE20eU+FE21U+0328=U+0469 iU+FE20eU+0328U+FE21=U+0469 iU+FE20EU+FE21U+0328=U+0469 iU+FE20EU+0328U+FE21=U+0469 IU+FE20EU+FE21=U+0462 IU+FE20eU+FE21=U+0462 iU+FE20eU+FE21=U+0463 iU+FE20EU+FE21=U+0463 # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C) EU+030C=U+0462 EU+0304=U+0404 EU+0307=U+042D EU+0308=U+0401 EU+0328=U+0466 eU+030C=U+0463 eU+0304=U+0454 eU+0307=U+044D eU+0308=U+0451 eU+0328=U+0467 # CONVERSION OF REMAINING LONE "E/e" E=U+0415 e=U+0435 ZH=U+0416 Zh=U+0416 zH=U+0436 zh=U+0436 Z=U+0417 z=U+0437 # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns) TU+FE20HU+FE21=U+0498 TU+FE20hU+FE21=U+0498 tU+FE20HU+FE21=U+0499 tU+FE20hU+FE21=U+0499 Th=U+04AA TH=U+04AA tH=U+04AB th=U+04AB # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328) IU+FE20OU+FE21U+0328=U+046C IU+FE20OU+0328U+FE21=U+046C IU+FE20oU+FE21U+0328=U+046C IU+FE20oU+0328U+FE21=U+046C iU+FE20oU+FE21U+0328=U+046D iU+FE20oU+0328U+FE21=U+046D iU+FE20OU+FE21U+0328=U+046D iU+FE20OU+0328U+FE21=U+046D # CONVERION OF "I/i" LIGATED TO "U/u" IU+FE20UU+FE21=U+042E IU+FE20uU+FE21=U+042E iU+FE20uU+FE21=U+044E iU+FE20UU+FE21=U+044E # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310) IU+0304=U+0406 IU+0306=U+0419 IU+0310=U+0408 iU+0304=U+0456 iU+0306=U+0439 iU+0310=U+0458 # CONVERSION OF REMAINING LONE "I/i" I=U+0418 i=U+0438 J=U+0496 j=U+0497 # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION # J=U+04B8 # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION # J=u+04B9 # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION # J=U+04B6 # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION # J=u+04B7 KH=U+0425 Kh=U+0425 kh=U+0445 kH=U+0445 KU+FE20SU+FE21=U+046E KU+FE20sU+FE21=U+046E kU+FE20sU+FE21=U+046F kU+FE20SU+FE21=U+046F Q=U+04A0 q=U+04A1 # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION # Q=U+04C3 # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION # q=u+04C4 K=U+041A k=U+043A L=U+041B l=U+043B M=U+041C m=U+043C NU+FE20GU+FE21=U+04A2 NU+FE20gU+FE21=U+04A2 nU+FE20GU+FE21=U+04A3 nU+FE20gU+FE21=U+04A3 # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION # NU+FE20GU+FE21=U+04A4 # NU+FE20gU+FE21=U+04A4 # nU+FE20GU+FE21=U+04A5 # nU+FE20gU+FE21=U+04A5 # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION # NU+FE20GU+FE21=U+04C7 # NU+FE20gU+FE21=U+04C7 # nU+FE20GU+FE21=U+04C8 # nU+FE20gU+FE21=U+04C8 N=U+041D n=U+043D # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t" OU+0304U+FE20TU+FE21=U+047E OU+0304U+FE20tU+FE21=U+047E OU+FE20U+0304TU+FE21=U+047E OU+FE20U+0304tU+FE21=U+047E OU+FE20TU+FE21=U+047E OU+FE20tU+FE21=U+047E oU+0304U+FE20tU+FE21=U+047F oU+0304U+FE20TU+FE21=U+047F oU+FE20U+0304tU+FE21=U+047F oU+FE20U+0304TU+FE21=U+047F oU+FE20tU+FE21=U+047F oU+FE20TU+FE21=U+047F # CONVERSION OF "O/o" WITH MACRON(0304) OU+0304=U+04EA oU+0304=U+04EB # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES OU+0307=U+04E8 oU+0307=U+04E9 # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION # OU+0307=U+04E6 # oU+0307=U+04E7 # CONVERSION OF REMAINING LONE "O/o" O=U+041E o=U+043E PU+FE20SU+FE21=U+0470 PU+FE20sU+FE21=U+0470 pU+FE20sU+FE21=U+0471 pU+FE20SU+FE21=U+0471 P=U+041F p=U+043F R=U+0420 r=U+0440 SHCH=U+0429 SHCh=U+0429 SHch=U+0429 Shch=U+0429 sHCH=U+0449 shCH=U+0449 shcH=U+0449 shch=U+0449 SH=U+0428 Sh=U+0428 sh=U+0448 sH=U+0448 TU+FE20SU+FE21U+0307=U+04B4 TU+FE20SU+0307U+FE21=U+04B4 TU+FE20sU+FE21U+0307=U+04B4 TU+FE20sU+0307U+FE21=U+04B4 tU+FE20SU+FE21U+0307=U+04B5 tU+FE20SU+0307U+FE21=U+04B5 tU+FE20sU+FE21U+0307=U+04B5 tU+FE20sU+0307U+FE21=U+04B5 TU+FE20SU+FE21=U+0426 TU+FE20sU+FE21=U+0426 tU+FE20sU+FE21=U+0446 tU+FE20SU+FE21=U+0446 S=U+0421 s=U+0441 T=U+0422 t=U+0442 # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307) UU+0304=U+04B0 uU+0304=U+04B1 # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION # UU+0304=U+04EE # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION # UU+0304=U+04EF UU+0306=U+040E uU+0306=U+0454 UU+0307=U+04AE uU+0307=U+04AF # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION # UU+0307=U+04E6 # uU+0307=U+04E7 # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE W=U+040E w=U+0454 U=U+0423 u=U+0443 FU+0307=U+0472 fU+0307=U+0473 F=U+0424 f=U+0444 CH=U+0427 Ch=U+0427 ch=U+0447 cH=U+0447 # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY) HU+0307=U+04BA hU+0307=U+04BB # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION # HU+0307=U+04B2 # hU+0307=U+04B3 # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION # HU+0307=U+04FC # hU+0307=U+04FD YU+0307=U+04F8 yU+0307=U+04F9 Y=U+042B y=U+044B # this conversion shouldn't be needed, but does no harm U+FE20= # this conversion shouldn't be needed, but does no harm U+FE21= # this conversion is ambiguous - U+042C is also theoretically possible U+0027=U+044C # this conversion is ambiguous - U+042C is also theoretically possible U+02B9=U+044C # this conversion is ambiguous - U+044C is also theoretically possible U+02BA=U+044A [ScriptToRoman] FieldsIncluded=100 110 111 130 240 241 242 243 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830 SubfieldsAlwaysExcluded=uvxy0123456789 OtherSubfieldsExcludedByTag=100/e 110/e 111/j 240/k 240/l 240/s 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i # RDA boilerplate phrases not transliterated: Place of publication not identified=Place of publication not identified publisher not identified=publisher not identified # CYRILLIC SCRIPT ALPHABETIC CHARACTERS TO BE CONVERTED U+042F=IU+FE20AU+FE21 U+044F=iU+FE20aU+FE21 U+04D8=AU+0306 U+04D9=aU+0306 # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION U+04D2=AU+0306 # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARC LETTER "A/a" WITH BREVE DUE TO CONFLICTING ROMANIZATION U+04D3=aU+0306 U+0410=A U+0430=a U+0411=B U+0431=b U+0474=VU+0307 U+0475=vU+0307 U+0412=V U+0432=v U+0492=Gh U+0493=gh # DE-ACTIVATED CONVERSION OF YAKUT "A" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION U+0494=Gh # DE-ACTIVATED CONVERSION OF YAKUT "a" WITH DIAERSIS DUE TO CONFLICTING ROMANIZATION U+0495=gh U+0494=GU+0301 U+0495=gU+0301 U+049C=GU+0307 U+049D=gU+0307 U+0413=G U+0433=g U+0414=D U+0434=d # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328) U+0464=IU+FE20EU+FE21U+0304 U+0468=IU+FE20EU+FE21U+0328 U+0465=iU+FE20eU+FE21U+0304 U+0469=iU+FE20eU+FE21U+0328 U+0462=IU+FE20EU+FE21 U+0463=iU+FE20eU+FE21 # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C) U+0404=EU+0304 U+042D=EU+0307 U+0401=EU+0308 U+0466=EU+0328 U+0454=eU+0304 U+044D=eU+0307 U+0451=eU+0308 U+0467=eU+0328 # CONVERSION OF REMAINING LONE "E/e" U+0415=E U+0435=e U+0416=Zh U+0436=zh U+0417=Z U+0437=z # CONVERSION OF "T/t" LIGATED OR BLENDED WITH "H/h" (all capitalization patterns) U+0498=TU+FE20HU+FE21 U+0499=tU+FE20hU+FE21 U+04AA=Th U+04AB=th # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328) U+046C=IU+FE20OU+FE21U+0328 U+046D=iU+FE20oU+FE21U+0328 # CONVERION OF "I/i" LIGATED TO "U/u" U+042E=IU+FE20UU+FE21 U+044E=iU+FE20uU+FE21 # CONVERSION OF "I/i" WITH MACRON (0304), BREVE (0306), AND CANDRABINDU (0310) U+0406=IU+0304 U+0419=IU+0306 U+0408=IU+0310 U+0456=iU+0304 U+0439=iU+0306 U+0458=iU+0310 # CONVERSION OF REMAINING LONE "I/i" U+0418=I U+0438=i U+0496=J U+0497=j # DE-ACTIVATED CONVERSION OF AZERI "J" DUE TO CONFLICTING ROMANIZATION U+04B8=# J # DE-ACTIVATED CONVERSION OF AZERI "j" DUE TO CONFLICTING ROMANIZATION u+04B9=# J # DE-ACTIVATED CONVERSION OF TAJIK "J" DUE TO CONFLICTING ROMANIZATION U+04B6=# J # DE-ACTIVATED CONVERSION OF TAJIK "j" DUE TO CONFLICTING ROMANIZATION u+04B7=# J U+0425=Kh U+0445=kh U+046E=KU+FE20SU+FE21 U+046F=kU+FE20sU+FE21 U+04A0=Q U+04A1=q # DE-ACTIVATED CONVERSION OF KHANTY "Q" DUE TO CONFLICTING ROMANIZATION U+04C3=Q # DE-ACTIVATED CONVERSION OF KHANTY "q" DUE TO CONFLICTING ROMANIZATION u+04C4=q U+041A=K U+043A=k U+041B=L U+043B=l U+041C=M U+043C=m U+04A2=NU+FE20GU+FE21 U+04A3=nU+FE20gU+FE21 # DE-ACTIVATED CONVERSION OF YAKUT "NG/ng" DUE TO CONFLICTING ROMANIZATION U+04A4=# NU+FE20GU+FE21 U+04A5=# nU+FE20gU+FE21 # DE-ACTIVATED CONVERSION OF CHUKCHI AND EVENKI "NG/ng" DUE TO CONFLICTING ROMANIZATION U+04C7=# NU+FE20GU+FE21 U+04C8=# nU+FE20gU+FE21 U+041D=N U+043D=n # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t" U+047E=OU+0304U+FE20TU+FE21 U+047F=oU+0304U+FE20tU+FE21 # CONVERSION OF "O/o" WITH MACRON(0304) U+04EA=OU+0304 U+04EB=oU+0304 # CONVERSION OF "O/o" WITH DOT ABOVE (0307) USED IN MOST CENTRAL ASIAN LANGUAGES U+04E8=OU+0307 U+04E9=oU+0307 # DE-ACTIVATED CONVERSION OF GAGAUZ, KOMI, AND MARI "O" WITH DOT ABOVE (0307)DUE TO CONFLICTING ROMANIZATION U+04E6=# OU+0307 U+04E7=# oU+0307 # CONVERSION OF REMAINING LONE "O/o" U+041E=O U+043E=o U+0470=PU+FE20SU+FE21 U+0471=pU+FE20sU+FE21 U+041F=P U+043F=p U+0420=R U+0440=r U+0429=Shch U+0449=shch U+0428=Sh u+0448=sh U+04B4=TU+FE20SU+FE21U+0307 U+04B5=tU+FE20sU+FE21U+0307 U+0426=TU+FE20SU+FE21 U+0446=tU+FE20sU+FE21 U+0421=S U+0441=s U+0422=T U+0442=t # CONVERSION OF "U/u" WITH MACRON(0304), BREVE (0306), AND DOT ABOVE (0307) U+04B0=UU+0304 U+04B1=uU+0304 # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION U+04EE=# UU+0304 # DE-ACTIVATED CONVERSION OF TAJIK LETTER DUE TO CONFLICTING ROMANIZATION U+04EF=# UU+0304 U+040E=UU+0306 U+0454=uU+0306 U+04AE=UU+0307 U+04AF=uU+0307 # DE-ACTIVATED CONVERSION OF GAGAUZ AND MARI LETTER "O/o" WITH DOT ABOVE DUE TO CONFLICTING ROMANIZATION U+04E6=# UU+0307 U+04E7=# uU+0307 # CONVERSION OF ESKIMO AND KARAKALPAK "W/w" THAT MAPS TO THE SAME CHARACTERS AS "U/u" WITH BREVE U+040E=W U+0454=w U+0423=U U+0443=u U+0472=FU+0307 U+0473=fU+0307 U+0424=F U+0444=f U+0427=Ch U+0447=ch # CONVERSION OF CYRILLIC PALOCHKA (ASPIRATION SIGN) USED IN MANY CENTRAL ASIAN LANGUAGES (NOT NORMALLY INITIALLY) U+04BA=HU+0307 U+04BB=hU+0307 # DE-ACTIVATED CONVERSION OF TAJIK AND UZBEK LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION U+04B2=HU+0307 U+04B3=hU+0307 # DE-ACTIVATED CONVERSION OF ARCHAIC LETTER "H/h" WITH DOT ABOVE (0307) DUE TO CONFLICTING ROMANIZATION U+04FC=HU+0307 U+04FD=hU+0307 U+04F8=YU+0307 U+04F9=yU+0307 U+042B=Y U+044B=y # this conversion is ambiguous - U+042C is also theoretically possible U+044C=U+02B9 # this conversion is ambiguous - U+044C is also theoretically possible U+044A=U+02BA