123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380 |
- # Original table by Randall K. Barry, 22 February 2021
- [General]
- Name=Slavonic
- # UNICODE CYRILLIC CHARACTER RANGE: U+0400-U+04FF
- [RomanToScript]
- FieldsIncluded=100 110 111 130 240 241 243 245 246 247 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
- SubfieldsAlwaysExcluded=uvxy0123456789
- OtherSubfieldsExcludedByTag=100/e 110/e 111/j 240/k 240/l 240/s 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
- # RDA boilerplate phrases not transliterated:
- Place of publication not identified=Place of publication not identified
- publisher not identified=publisher not identified
- date of publication not identified=date of publication not identified
- At head of title=At head of title
- Colophon=Colophon
- # COMMON COMBINING CHARACTERS (always follow a base letter):
- # combining grave U+0300
- # combining acute U+0301
- # combining circumflex U+0302
- # combining macron U+0304
- # combining breve U+0306
- # combining dot above U+0307
- # combining diaeresis U+0308
- # combining ring above U+030A
- # combining double acute U+030B
- # combining caron (hachek) U+030C
- # combining candrabindu U+0310
- # combining dot below U+0323
- # combining comma below U+0326 (Romanian, Latvian, Livonian)
- # combining cedilla U+0327 (French, Turkish, Azeri)
- # combining ogonek (hook) U+0328 (Polish, Lithuanian)
- # combining left ligature U+FE20 (Cyrillic transliteration)
- # combining right ligature U+FE21 (Cyrillic transliteration)
- # soft sign/prime (spacing) U+02B9(Cyrillic transliteration)
- # hard sign/double prime (spacing) U+02BA (Cyrillic transliteration)
- # ayn(spacing) U+02BB (Semitic and Caucasian languages)
- # alif (spacing) U+02BC (Semitic languages)
- # middle dot (space) U+00B7) (Catalan)
- # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
- # CONVERSION OF "I/i" LIGATED TO "A/a" (all capitalization patterns)
- IU+FE20AU+FE21=U+042F
- IU+FE20aU+FE21=U+042F
- iU+FE20aU+FE21=U+044F
- iU+FE20AU+FE21=U+044F
- # REMAINING LONE "A/a"
- A=U+0410
- a=U+0430
- B=U+0411
- b=U+0431
- VU+0307=U+0474
- vU+0307=U+0475
- V=U+0412
- v=U+0432
- GU+0301=U+0494
- gU+0301=U+0495
- G=U+0413
- g=U+0433
- D=U+0414
- d=U+0434
- # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
- IU+FE20EU+FE21U+0304=U+0464
- IU+FE20EU+0304U+FE21=U+0464
- IU+FE20eU+FE21U+0304=U+0464
- IU+FE20eU+0304U+FE21=U+0464
- IU+FE20EU+FE21U+0328=U+0468
- IU+FE20EU+0328U+FE21=U+0468
- IU+FE20eU+FE21U+0328=U+0468
- IU+FE20eU+0328U+FE21=U+0468
- iU+FE20eU+FE21U+0304=U+0465
- iU+FE20eU+0304U+FE21=U+0465
- iU+FE20EU+FE21U+0304=U+0465
- iU+FE20EU+0304U+FE21=U+0465
- iU+FE20eU+FE21U+0328=U+0469
- iU+FE20eU+0328U+FE21=U+0469
- iU+FE20EU+FE21U+0328=U+0469
- iU+FE20EU+0328U+FE21=U+0469
- IU+FE20EU+FE21=U+0462
- IU+FE20eU+FE21=U+0462
- iU+FE20eU+FE21=U+0463
- iU+FE20EU+FE21=U+0463
- # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
- EU+030C=U+0462
- EU+0304=U+0404
- EU+0307=U+042D
- EU+0308=U+0401
- EU+0328=U+0466
- eU+030C=U+0463
- eU+0304=U+0454
- eU+0307=U+044D
- eU+0308=U+0451
- eU+0328=U+0467
- # CONVERSION OF REMAINING LONE "E/e"
- E=U+0415
- e=U+0435
- ZH=U+0416
- Zh=U+0416
- zH=U+0436
- zh=U+0436
- Z=U+0417
- z=U+0437
- # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
- IU+FE20OU+FE21U+0328=U+046C
- IU+FE20OU+0328U+FE21=U+046C
- IU+FE20oU+FE21U+0328=U+046C
- IU+FE20oU+0328U+FE21=U+046C
- iU+FE20oU+FE21U+0328=U+046D
- iU+FE20oU+0328U+FE21=U+046D
- iU+FE20OU+FE21U+0328=U+046D
- iU+FE20OU+0328U+FE21=U+046D
- # CONVERION OF "I/i" LIGATED TO "U/u"
- IU+FE20UU+FE21=U+042E
- IU+FE20uU+FE21=U+042E
- iU+FE20uU+FE21=U+044E
- iU+FE20UU+FE21=U+044E
- # CONVERSION OF "I/i" WITH MACRON (0304) AND BREVE (0306)
- IU+0304=U+0406
- IU+0306=U+0419
- iU+0304=U+0456
- iU+0306=U+0439
- # CONVERSION OF REMAINING LONE "I/i"
- I=U+0418
- i=U+0438
- KH=U+0425
- Kh=U+0425
- kh=U+0445
- kH=U+0445
- KU+FE20SU+FE21=U+046E
- KU+FE20sU+FE21=U+046E
- kU+FE20sU+FE21=U+046F
- kU+FE20SU+FE21=U+046F
- K=U+041A
- k=U+043A
- L=U+041B
- l=U+043B
- M=U+041C
- m=U+043C
- N=U+041D
- n=U+043D
- # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
- OU+0304U+FE20TU+FE21=U+047E
- OU+0304U+FE20tU+FE21=U+047E
- OU+FE20U+0304TU+FE21=U+047E
- OU+FE20U+0304tU+FE21=U+047E
- OU+FE20TU+FE21=U+047E
- OU+FE20tU+FE21=U+047E
- oU+0304U+FE20tU+FE21=U+047F
- oU+0304U+FE20TU+FE21=U+047F
- oU+FE20U+0304tU+FE21=U+047F
- oU+FE20U+0304TU+FE21=U+047F
- oU+FE20tU+FE21=U+047F
- oU+FE20TU+FE21=U+047F
- # CONVERSION OF "O/o" WITH MACRON(0304) AND OGONEK (0328)
- OU+0328=U+046A
- oU+0328=U+046B
- OU+0304=U+0460
- oU+0304=U+0461
- # CONVERSION OF REMAINING LONE "O/o"
- O=U+041E
- o=U+043E
- PU+FE20SU+FE21=U+0470
- PU+FE20sU+FE21=U+0470
- pU+FE20sU+FE21=U+0471
- pU+FE20SU+FE21=U+0471
- P=U+041F
- p=U+043F
- R=U+0420
- r=U+0440
- SHT=U+0429
- SHt=U+0429
- Sht=U+0429
- sHT=U+0449
- shT=U+0449
- sht=U+0449
- SH=U+0428
- Sh=U+0428
- sh=U+0448
- sH=U+0448
- TU+FE20SU+FE21=U+0426
- TU+FE20sU+FE21=U+0426
- tU+FE20sU+FE21=U+0446
- tU+FE20SU+FE21=U+0446
- S=U+0421
- s=U+0441
- T=U+0422
- t=U+0442
- UU+0304=U+0478
- uU+0304=U+0479
- U=U+0423
- u=U+0443
- FU+0307=U+0472
- fU+0307=U+0473
- F=U+0424
- f=U+0444
- CH=U+0427
- Ch=U+0427
- ch=U+0447
- cH=U+0447
- YU+0307=U+0476
- yU+0307=U+0477
- Y=U+042B
- y=U+044B
- # this conversion shouldn't be needed, but does no harm
- U+FE20=
- # this conversion shouldn't be needed, but does no harm
- U+FE21=
- # this conversion is ambiguous - U+042C is also theoretically possible
- U+0027=U+044C
- # this conversion is ambiguous - U+042C is also theoretically possible
- U+02B9=U+044C
- # this conversion is ambiguous - U+044C is also theoretically possible
- U+02BA=U+044A
- [ScriptToRoman]
- FieldsIncluded=100 110 111 130 240 241 242 243 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
- SubfieldsAlwaysExcluded=uvxy0123456789
- OtherSubfieldsExcludedByTag=100/e 110/e 111/j 240/k 240/l 240/s 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
- # RDA boilerplate phrases not transliterated:
- Place of publication not identified=Place of publication not identified
- publisher not identified=publisher not identified
- # CYRILLIC SCRIPT ALPHABETIC CHARACTERS TO BE CONVERTED
- # CONVERSION TO "I/i" LIGATED TO "A/a"
- U+042F=IU+FE20AU+FE21
- U+044F=iU+FE20aU+FE21
- U+0410=A
- U+0430=a
- U+0411=B
- U+0431=b
- U+0474=VU+0307
- U+0475=vU+0307
- U+0412=V
- U+0432=v
- U+0494=GU+0301
- U+0495=gU+0301
- U+0413=G
- U+0433=g
- U+0414=D
- U+0434=d
- # CONVERION TO "I/i" LIGATED TO "E/e" WITH DIACRITICS
- U+0464=IU+FE20EU+FE21U+0304
- U+0468=IU+FE20EU+FE21U+0328
- U+0465=iU+FE20eU+FE21U+0304
- U+0469=iU+FE20eU+FE21U+0328
- # CONVERSION TO "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
- U+0462=EU+030C
- U+0404=EU+0304
- U+042D=EU+0307
- U+0401=EU+0308
- U+0466=EU+0328
- U+0463=eU+030C
- U+0454=eU+0304
- U+044D=eU+0307
- U+0451=eU+0308
- U+0467=eU+0328
- U+0415=E
- U+0435=e
- U+0416=Zh
- U+0436=zh
- U+0417=Z
- U+0437=z
- # CONVERION T0 "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
- U+046C=IU+FE20OU+FE21U+0328
- U+046D=iU+FE20oU+FE21U+0328
- # CONVERION TO "I/i" LIGATED TO "U/u"
- U+042E=IU+FE20UU+FE21
- U+044E=iU+FE20uU+FE21
- # CONVERSION TO "I/i" WITH MACRON (0304) AND BREVE (0306)
- U+0406=IU+0304
- U+0419=IU+0306
- U+0456=iU+0304
- U+0439=iU+0306
- # CONVERSION TO LONE "I/i"
- U+0418=I
- U+0438=i
- U+0425=Kh
- U+0445=kh
- U+046E=KU+FE20SU+FE21
- U+046F=kU+FE20sU+FE21
- U+041A=K
- U+043A=k
- U+041B=L
- U+043B=l
- U+041C=M
- U+043C=m
- U+041D=N
- U+043D=n
- # CONVERION TO "O/o" WITH MACRON (0304) LIGATED TO "T/t"
- U+047E=OU+FE20U+0304tU+FE21
- U+047F=oU+FE20U+0304tU+FE21
- # CONVERSION TO "O/o" WITH MACRON(0304) AND OGONEK (0328)
- U+046A=OU+0328
- U+046B=oU+0328
- U+0460=OU+0304
- U+0461=oU+0304
- # CONVERSION TO LONE "O/o"
- U+041E=O
- U+043E=o
- U+0470=PU+FE20SU+FE21
- U+0471=pU+FE20sU+FE21
- U+041F=P
- U+043F=p
- U+0420=R
- U+0440=r
- U+0429=Sht
- U+0449=sht
- U+0428=Sh
- U+0448=sh
- U+0426=TU+FE20SU+FE21
- U+0446=tU+FE20sU+FE21
- U+0421=S
- U+0441=s
- U+0422=T
- U+0442=t
- U+0478=UU+0304
- U+0479=uU+0304
- U+0423=U
- U+0443=u
- U+0472=FU+0307
- U+0473=fU+0307
- U+0424=F
- U+0444=f
- U+0427=Ch
- U+0447=ch
- U+0476=YU+0307
- U+0477=yU+0307
- # Uppercase hard sign (ambiguously maps to one Latin character)
- U+042A=U+02BA
- # Lowercase hard sign (ambiguously maps to one Latin character)
- U+044A=U+02BA
- # Uppercase soft sign (ambiguously maps to one Latin character)
- U+042C=U+02B9
- # Lowercase soft sign (ambiguously maps to one Latin character)
- U+044C=U+02B9
- U+042B=Y
- U+044B=y
|