SlavonicRomanization.cfg 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. # Original table by Randall K. Barry, 22 February 2021
  2. [General]
  3. Name=Slavonic
  4. # UNICODE CYRILLIC CHARACTER RANGE: U+0400-U+04FF
  5. [RomanToScript]
  6. FieldsIncluded=100 110 111 130 240 241 243 245 246 247 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
  7. SubfieldsAlwaysExcluded=uvxy0123456789
  8. OtherSubfieldsExcludedByTag=100/e 110/e 111/j 240/k 240/l 240/s 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
  9. # RDA boilerplate phrases not transliterated:
  10. Place of publication not identified=Place of publication not identified
  11. publisher not identified=publisher not identified
  12. date of publication not identified=date of publication not identified
  13. At head of title=At head of title
  14. Colophon=Colophon
  15. # COMMON COMBINING CHARACTERS (always follow a base letter):
  16. # combining grave U+0300
  17. # combining acute U+0301
  18. # combining circumflex U+0302
  19. # combining macron U+0304
  20. # combining breve U+0306
  21. # combining dot above U+0307
  22. # combining diaeresis U+0308
  23. # combining ring above U+030A
  24. # combining double acute U+030B
  25. # combining caron (hachek) U+030C
  26. # combining candrabindu U+0310
  27. # combining dot below U+0323
  28. # combining comma below U+0326 (Romanian, Latvian, Livonian)
  29. # combining cedilla U+0327 (French, Turkish, Azeri)
  30. # combining ogonek (hook) U+0328 (Polish, Lithuanian)
  31. # combining left ligature U+FE20 (Cyrillic transliteration)
  32. # combining right ligature U+FE21 (Cyrillic transliteration)
  33. # soft sign/prime (spacing) U+02B9(Cyrillic transliteration)
  34. # hard sign/double prime (spacing) U+02BA (Cyrillic transliteration)
  35. # ayn(spacing) U+02BB (Semitic and Caucasian languages)
  36. # alif (spacing) U+02BC (Semitic languages)
  37. # middle dot (space) U+00B7) (Catalan)
  38. # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
  39. # CONVERSION OF "I/i" LIGATED TO "A/a" (all capitalization patterns)
  40. IU+FE20AU+FE21=U+042F
  41. IU+FE20aU+FE21=U+042F
  42. iU+FE20aU+FE21=U+044F
  43. iU+FE20AU+FE21=U+044F
  44. # REMAINING LONE "A/a"
  45. A=U+0410
  46. a=U+0430
  47. B=U+0411
  48. b=U+0431
  49. VU+0307=U+0474
  50. vU+0307=U+0475
  51. V=U+0412
  52. v=U+0432
  53. GU+0301=U+0494
  54. gU+0301=U+0495
  55. G=U+0413
  56. g=U+0433
  57. D=U+0414
  58. d=U+0434
  59. # CONVERION OF "I/i" LIGATED TO "E/e", SOME WITH MACRON (0304) AND OGONEK (0328)
  60. IU+FE20EU+FE21U+0304=U+0464
  61. IU+FE20EU+0304U+FE21=U+0464
  62. IU+FE20eU+FE21U+0304=U+0464
  63. IU+FE20eU+0304U+FE21=U+0464
  64. IU+FE20EU+FE21U+0328=U+0468
  65. IU+FE20EU+0328U+FE21=U+0468
  66. IU+FE20eU+FE21U+0328=U+0468
  67. IU+FE20eU+0328U+FE21=U+0468
  68. iU+FE20eU+FE21U+0304=U+0465
  69. iU+FE20eU+0304U+FE21=U+0465
  70. iU+FE20EU+FE21U+0304=U+0465
  71. iU+FE20EU+0304U+FE21=U+0465
  72. iU+FE20eU+FE21U+0328=U+0469
  73. iU+FE20eU+0328U+FE21=U+0469
  74. iU+FE20EU+FE21U+0328=U+0469
  75. iU+FE20EU+0328U+FE21=U+0469
  76. IU+FE20EU+FE21=U+0462
  77. IU+FE20eU+FE21=U+0462
  78. iU+FE20eU+FE21=U+0463
  79. iU+FE20EU+FE21=U+0463
  80. # CONVERSION OF "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
  81. EU+030C=U+0462
  82. EU+0304=U+0404
  83. EU+0307=U+042D
  84. EU+0308=U+0401
  85. EU+0328=U+0466
  86. eU+030C=U+0463
  87. eU+0304=U+0454
  88. eU+0307=U+044D
  89. eU+0308=U+0451
  90. eU+0328=U+0467
  91. # CONVERSION OF REMAINING LONE "E/e"
  92. E=U+0415
  93. e=U+0435
  94. ZH=U+0416
  95. Zh=U+0416
  96. zH=U+0436
  97. zh=U+0436
  98. Z=U+0417
  99. z=U+0437
  100. # CONVERION OF "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
  101. IU+FE20OU+FE21U+0328=U+046C
  102. IU+FE20OU+0328U+FE21=U+046C
  103. IU+FE20oU+FE21U+0328=U+046C
  104. IU+FE20oU+0328U+FE21=U+046C
  105. iU+FE20oU+FE21U+0328=U+046D
  106. iU+FE20oU+0328U+FE21=U+046D
  107. iU+FE20OU+FE21U+0328=U+046D
  108. iU+FE20OU+0328U+FE21=U+046D
  109. # CONVERION OF "I/i" LIGATED TO "U/u"
  110. IU+FE20UU+FE21=U+042E
  111. IU+FE20uU+FE21=U+042E
  112. iU+FE20uU+FE21=U+044E
  113. iU+FE20UU+FE21=U+044E
  114. # CONVERSION OF "I/i" WITH MACRON (0304) AND BREVE (0306)
  115. IU+0304=U+0406
  116. IU+0306=U+0419
  117. iU+0304=U+0456
  118. iU+0306=U+0439
  119. # CONVERSION OF REMAINING LONE "I/i"
  120. I=U+0418
  121. i=U+0438
  122. KH=U+0425
  123. Kh=U+0425
  124. kh=U+0445
  125. kH=U+0445
  126. KU+FE20SU+FE21=U+046E
  127. KU+FE20sU+FE21=U+046E
  128. kU+FE20sU+FE21=U+046F
  129. kU+FE20SU+FE21=U+046F
  130. K=U+041A
  131. k=U+043A
  132. L=U+041B
  133. l=U+043B
  134. M=U+041C
  135. m=U+043C
  136. N=U+041D
  137. n=U+043D
  138. # CONVERION OF "O/o" WITH OR WITHOUT MACRON (0304), LIGATED TO "T/t"
  139. OU+0304U+FE20TU+FE21=U+047E
  140. OU+0304U+FE20tU+FE21=U+047E
  141. OU+FE20U+0304TU+FE21=U+047E
  142. OU+FE20U+0304tU+FE21=U+047E
  143. OU+FE20TU+FE21=U+047E
  144. OU+FE20tU+FE21=U+047E
  145. oU+0304U+FE20tU+FE21=U+047F
  146. oU+0304U+FE20TU+FE21=U+047F
  147. oU+FE20U+0304tU+FE21=U+047F
  148. oU+FE20U+0304TU+FE21=U+047F
  149. oU+FE20tU+FE21=U+047F
  150. oU+FE20TU+FE21=U+047F
  151. # CONVERSION OF "O/o" WITH MACRON(0304) AND OGONEK (0328)
  152. OU+0328=U+046A
  153. oU+0328=U+046B
  154. OU+0304=U+0460
  155. oU+0304=U+0461
  156. # CONVERSION OF REMAINING LONE "O/o"
  157. O=U+041E
  158. o=U+043E
  159. PU+FE20SU+FE21=U+0470
  160. PU+FE20sU+FE21=U+0470
  161. pU+FE20sU+FE21=U+0471
  162. pU+FE20SU+FE21=U+0471
  163. P=U+041F
  164. p=U+043F
  165. R=U+0420
  166. r=U+0440
  167. SHT=U+0429
  168. SHt=U+0429
  169. Sht=U+0429
  170. sHT=U+0449
  171. shT=U+0449
  172. sht=U+0449
  173. SH=U+0428
  174. Sh=U+0428
  175. sh=U+0448
  176. sH=U+0448
  177. TU+FE20SU+FE21=U+0426
  178. TU+FE20sU+FE21=U+0426
  179. tU+FE20sU+FE21=U+0446
  180. tU+FE20SU+FE21=U+0446
  181. S=U+0421
  182. s=U+0441
  183. T=U+0422
  184. t=U+0442
  185. UU+0304=U+0478
  186. uU+0304=U+0479
  187. U=U+0423
  188. u=U+0443
  189. FU+0307=U+0472
  190. fU+0307=U+0473
  191. F=U+0424
  192. f=U+0444
  193. CH=U+0427
  194. Ch=U+0427
  195. ch=U+0447
  196. cH=U+0447
  197. YU+0307=U+0476
  198. yU+0307=U+0477
  199. Y=U+042B
  200. y=U+044B
  201. # this conversion shouldn't be needed, but does no harm
  202. U+FE20=
  203. # this conversion shouldn't be needed, but does no harm
  204. U+FE21=
  205. # this conversion is ambiguous - U+042C is also theoretically possible
  206. U+0027=U+044C
  207. # this conversion is ambiguous - U+042C is also theoretically possible
  208. U+02B9=U+044C
  209. # this conversion is ambiguous - U+044C is also theoretically possible
  210. U+02BA=U+044A
  211. [ScriptToRoman]
  212. FieldsIncluded=100 110 111 130 240 241 242 243 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
  213. SubfieldsAlwaysExcluded=uvxy0123456789
  214. OtherSubfieldsExcludedByTag=100/e 110/e 111/j 240/k 240/l 240/s 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
  215. # RDA boilerplate phrases not transliterated:
  216. Place of publication not identified=Place of publication not identified
  217. publisher not identified=publisher not identified
  218. # CYRILLIC SCRIPT ALPHABETIC CHARACTERS TO BE CONVERTED
  219. # CONVERSION TO "I/i" LIGATED TO "A/a"
  220. U+042F=IU+FE20AU+FE21
  221. U+044F=iU+FE20aU+FE21
  222. U+0410=A
  223. U+0430=a
  224. U+0411=B
  225. U+0431=b
  226. U+0474=VU+0307
  227. U+0475=vU+0307
  228. U+0412=V
  229. U+0432=v
  230. U+0494=GU+0301
  231. U+0495=gU+0301
  232. U+0413=G
  233. U+0433=g
  234. U+0414=D
  235. U+0434=d
  236. # CONVERION TO "I/i" LIGATED TO "E/e" WITH DIACRITICS
  237. U+0464=IU+FE20EU+FE21U+0304
  238. U+0468=IU+FE20EU+FE21U+0328
  239. U+0465=iU+FE20eU+FE21U+0304
  240. U+0469=iU+FE20eU+FE21U+0328
  241. # CONVERSION TO "E/e" WITH MACRON (0304), DOT ABOVE (0307), DIAERESIS (0308), OGONEK (0328), & CARON (030C)
  242. U+0462=EU+030C
  243. U+0404=EU+0304
  244. U+042D=EU+0307
  245. U+0401=EU+0308
  246. U+0466=EU+0328
  247. U+0463=eU+030C
  248. U+0454=eU+0304
  249. U+044D=eU+0307
  250. U+0451=eU+0308
  251. U+0467=eU+0328
  252. U+0415=E
  253. U+0435=e
  254. U+0416=Zh
  255. U+0436=zh
  256. U+0417=Z
  257. U+0437=z
  258. # CONVERION T0 "I/i" LIGATED TO "O/o" WITH MACRON (0304) AND OGONEK (0328)
  259. U+046C=IU+FE20OU+FE21U+0328
  260. U+046D=iU+FE20oU+FE21U+0328
  261. # CONVERION TO "I/i" LIGATED TO "U/u"
  262. U+042E=IU+FE20UU+FE21
  263. U+044E=iU+FE20uU+FE21
  264. # CONVERSION TO "I/i" WITH MACRON (0304) AND BREVE (0306)
  265. U+0406=IU+0304
  266. U+0419=IU+0306
  267. U+0456=iU+0304
  268. U+0439=iU+0306
  269. # CONVERSION TO LONE "I/i"
  270. U+0418=I
  271. U+0438=i
  272. U+0425=Kh
  273. U+0445=kh
  274. U+046E=KU+FE20SU+FE21
  275. U+046F=kU+FE20sU+FE21
  276. U+041A=K
  277. U+043A=k
  278. U+041B=L
  279. U+043B=l
  280. U+041C=M
  281. U+043C=m
  282. U+041D=N
  283. U+043D=n
  284. # CONVERION TO "O/o" WITH MACRON (0304) LIGATED TO "T/t"
  285. U+047E=OU+FE20U+0304tU+FE21
  286. U+047F=oU+FE20U+0304tU+FE21
  287. # CONVERSION TO "O/o" WITH MACRON(0304) AND OGONEK (0328)
  288. U+046A=OU+0328
  289. U+046B=oU+0328
  290. U+0460=OU+0304
  291. U+0461=oU+0304
  292. # CONVERSION TO LONE "O/o"
  293. U+041E=O
  294. U+043E=o
  295. U+0470=PU+FE20SU+FE21
  296. U+0471=pU+FE20sU+FE21
  297. U+041F=P
  298. U+043F=p
  299. U+0420=R
  300. U+0440=r
  301. U+0429=Sht
  302. U+0449=sht
  303. U+0428=Sh
  304. U+0448=sh
  305. U+0426=TU+FE20SU+FE21
  306. U+0446=tU+FE20sU+FE21
  307. U+0421=S
  308. U+0441=s
  309. U+0422=T
  310. U+0442=t
  311. U+0478=UU+0304
  312. U+0479=uU+0304
  313. U+0423=U
  314. U+0443=u
  315. U+0472=FU+0307
  316. U+0473=fU+0307
  317. U+0424=F
  318. U+0444=f
  319. U+0427=Ch
  320. U+0447=ch
  321. U+0476=YU+0307
  322. U+0477=yU+0307
  323. # Uppercase hard sign (ambiguously maps to one Latin character)
  324. U+042A=U+02BA
  325. # Lowercase hard sign (ambiguously maps to one Latin character)
  326. U+044A=U+02BA
  327. # Uppercase soft sign (ambiguously maps to one Latin character)
  328. U+042C=U+02B9
  329. # Lowercase soft sign (ambiguously maps to one Latin character)
  330. U+044C=U+02B9
  331. U+042B=Y
  332. U+044B=y