mongolian_mongol_bichig.yml 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. general:
  2. name: Mongolian (Mongol bichig)
  3. roman_to_script:
  4. ignore:
  5. - "at head of title"
  6. - "colophon"
  7. - "date of publication not identified"
  8. - "place of publication not identified"
  9. - "publisher not identified"
  10. # NOTE There is ambiguity about ignoring these
  11. # words. Note that the single-character Roman
  12. # numerals are not included on purpose.
  13. # Ideally the source editors should use the
  14. # dedicated U+2160÷U+216F (uppercase Roman
  15. # numerals) and/or U+2170÷U+217F (lower case Roman
  16. # numerals) ranges to avoid this ambiguity.
  17. # TODO implement regular expressions for ignore patterns.
  18. #- re: "I{2,3}"
  19. #- re: "I(V|X)"
  20. #- re: "LI{,3}"
  21. #- re: "LI?(V|X)"
  22. #- re: "L(V|X{1,3})I{,3}"
  23. #- re: "LX{1,3}I?V"
  24. #- re: "LX{1,3}VI{,3}"
  25. #- re: "(V|X{1,3})I{,3}"
  26. #- re: "X{1,3}I{,3}"
  27. #- re: "X{1,3}I(V|X)"
  28. #- re: "X{1,3}VI{,3}"
  29. - "II"
  30. - "III"
  31. - "IV"
  32. - "IX"
  33. - "LI"
  34. - "LII"
  35. - "LIII"
  36. - "LIV"
  37. - "LIX"
  38. - "LV"
  39. - "LVI"
  40. - "LVII"
  41. - "LVIII"
  42. - "LX"
  43. - "LXI"
  44. - "LXII"
  45. - "LXIII"
  46. - "LXIV"
  47. - "LXIX"
  48. - "LXV"
  49. - "LXVI"
  50. - "LXVII"
  51. - "LXVIII"
  52. - "LXX"
  53. - "LXXI"
  54. - "LXXII"
  55. - "LXXIII"
  56. - "LXXIV"
  57. - "LXXIX"
  58. - "LXXV"
  59. - "LXXVI"
  60. - "LXXVII"
  61. - "LXXVIII"
  62. - "LXXX"
  63. - "LXXXI"
  64. - "LXXXII"
  65. - "LXXXIII"
  66. - "LXXXIV"
  67. - "LXXXIX"
  68. - "LXXXV"
  69. - "LXXXVI"
  70. - "LXXXVII"
  71. - "LXXXVIII"
  72. - "VI"
  73. - "VII"
  74. - "VIII"
  75. - "XI"
  76. - "XII"
  77. - "XIII"
  78. - "XIV"
  79. - "XIX"
  80. - "XL"
  81. - "XLI"
  82. - "XLII"
  83. - "XLIII"
  84. - "XLIV"
  85. - "XLIX"
  86. - "XLV"
  87. - "XLVI"
  88. - "XLVII"
  89. - "XLVIII"
  90. - "XV"
  91. - "XVI"
  92. - "XVII"
  93. - "XVIII"
  94. - "XX"
  95. - "XXI"
  96. - "XXII"
  97. - "XXIII"
  98. - "XXIV"
  99. - "XXIX"
  100. - "XXV"
  101. - "XXVI"
  102. - "XXVII"
  103. - "XXVIII"
  104. - "XXX"
  105. - "XXXI"
  106. - "XXXII"
  107. - "XXXIII"
  108. - "XXXIV"
  109. - "XXXIX"
  110. - "XXXV"
  111. - "XXXVI"
  112. - "XXXVII"
  113. - "XXXVIII"
  114. - "and one other"
  115. - "and ([a-z]+ )?others"
  116. - "et al."
  117. map:
  118. "\u002DA": "\u180E\u1820"
  119. "\u002Da": "\u180E\u1820"
  120. "A": "\u1820"
  121. "a": "\u1820"
  122. "\u002DE": "\u180E\u1821"
  123. "\u002De": "\u180E\u1821"
  124. "\u002D": "\u202F"
  125. "E\u0307": "\u1827"
  126. "e\u0307": "\u1827"
  127. "E": "\u1821"
  128. "e": "\u1821"
  129. "\u002DI": "\u180E\u1822"
  130. "\u002Di": "\u180E\u1822"
  131. "I": "\u1822"
  132. "i": "\u1822"
  133. "O\u0307": "\u1825"
  134. "o\u0307": "\u1825"
  135. "O": "\u1823"
  136. "o": "\u1823"
  137. "U\u0307": "\u1826"
  138. "u\u0307": "\u1826"
  139. "U": "\u1824"
  140. "u": "\u1824"
  141. "NG": "\u1829"
  142. # this conversion should not be needed, but does no harm
  143. "nG": "\u1829"
  144. "ng": "\u1829"
  145. "N": "\u1828"
  146. "n": "\u1828"
  147. "B": "\u182A"
  148. "b": "\u182A"
  149. "P": "\u182B"
  150. "p": "\u182B"
  151. "Q": "\u182C"
  152. "q": "\u182C"
  153. "KH": "\u183B"
  154. "Kh": "\u183B"
  155. # this conversion should not be needed, but does no harm
  156. "kH": "\u183B"
  157. "kh": "\u183B"
  158. "K\u0307": "\u183A"
  159. "k\u0307": "\u183A"
  160. "K": "\u182C"
  161. "k": "\u182C"
  162. "G\u0307": "\u182D"
  163. "g\u0307": "\u182D"
  164. "G": "\u182D"
  165. "g": "\u182D"
  166. "M": "\u182E"
  167. "m": "\u182E"
  168. "LH": "\u1840"
  169. "Lh": "\u1840"
  170. # this conversion should not be needed, but does no harm
  171. "lH": "\u1840"
  172. "lh": "\u1840"
  173. "L": "\u182F"
  174. "l": "\u182F"
  175. "TS\u0307": "\u183C"
  176. # this conversion should not be needed, but does no harm
  177. "Ts\u0307": "\u183C"
  178. # this conversion should not be needed, but does no harm
  179. "tS\u0307": "\u183C"
  180. "ts\u0307": "\u183C"
  181. "S\u0301": "\u1831"
  182. "s\u0301": "\u1831"
  183. "S": "\u1830"
  184. "s": "\u1830"
  185. "T": "\u1832"
  186. "t": "\u1832"
  187. "D": "\u1833"
  188. "d": "\u1833"
  189. "J": "\u1835"
  190. "j": "\u1835"
  191. "Y": "\u1836"
  192. "y": "\u1836"
  193. "V": "\u1838"
  194. "v": "\u1838"
  195. "W": "\u1838"
  196. "w": "\u1838"
  197. "F": "\u1839"
  198. "f": "\u1839"
  199. "ZR": "\u183F"
  200. # this conversion should not be needed, but does no harm
  201. "Zr": "\u183F"
  202. # this conversion should not be needed, but does no harm
  203. "zR": "\u183F"
  204. "zr": "\u183F"
  205. "R": "\u1837"
  206. "r": "\u1837"
  207. "ZH": "\u1841"
  208. "Zh": "\u1841"
  209. # this conversion should not be needed, but does no harm
  210. "zH": "\u1841"
  211. "zh": "\u1841"
  212. "CH": "\u1842"
  213. "Ch": "\u1842"
  214. # this conversion should not be needed, but does no harm
  215. "cH": "\u1842"
  216. "ch": "\u1842"
  217. # this is a Buryat letter
  218. "C\u0307": "\u1878"
  219. "c\u0307": "\u1878"
  220. "C": "\u1834"
  221. "c": "\u1834"
  222. "H": "\u183E"
  223. "h": "\u183E"
  224. "-": "\u180E"
  225. script_to_roman:
  226. map:
  227. # ga
  228. "\u182D\u1820": "g\u0307a"
  229. # go
  230. "\u182D\u1823": "g\u0307o"
  231. # gu
  232. "\u182D\u1824": "g\u0307u"
  233. # ag
  234. "\u1820\u182D": "ag\u0307"
  235. # og
  236. "\u1823\u182D": "og\u0307"
  237. # ug
  238. "\u1824\u182D": "ug\u0307"
  239. # ge
  240. "\u182D\u1821": "ge"
  241. # gi
  242. "\u182D\u1822": "gi"
  243. # goe
  244. "\u182D\u1825": "go\u0307"
  245. # gue
  246. "\u182D\u1826": "gu\u0307"
  247. # gee
  248. "\u182D\u1827": "ge\u0307"
  249. # eg
  250. "\u1821\u182D": "eg"
  251. # ig
  252. "\u1822\u182D": "ig"
  253. # oeg
  254. "\u1825\u182D": "o\u0307g"
  255. # ueg
  256. "\u1826\u182D": "u\u0307g"
  257. # eeg
  258. "\u1827\u182D": "e\u0307g"
  259. # qa
  260. "\u182C\u1820": "q\u0307a"
  261. # qo
  262. "\u182C\u1823": "q\u0307o"
  263. # qu
  264. "\u182C\u1824": "q\u0307u"
  265. # aq (should not occur)
  266. "\u1820\u182C": "aq"
  267. # oq (should not occur)
  268. "\u1823\u182C": "oq"
  269. # uq (should not occur)
  270. "\u1824\u182C": "uq"
  271. # ke
  272. "\u182C\u1821": "ke"
  273. # ki
  274. "\u182C\u1822": "ki"
  275. # koe
  276. "\u182C\u1825": "ko\u0307"
  277. # kue
  278. "\u182C\u1826": "ku\u0307"
  279. # kee
  280. "\u182C\u1827": "ke\u0307"
  281. # ek (should not occur)
  282. "\u1821\u182C": "ek"
  283. # ik should not occur)
  284. "\u1822\u182C": "ik"
  285. # oek (should not occur)
  286. "\u1825\u182C": "o\u0307k"
  287. # uek (should not occur)
  288. "\u1826\u182C": "o\u0307k"
  289. # eek should not occur)
  290. "\u1827\u182C": "e\u0307k"
  291. # non-connecting vowel a
  292. "\u180E\u1820": "\u002Da"
  293. # non-connecting vowel e
  294. "\u180E\u1821": "\u002De"
  295. # non-connectubg vowel i
  296. "\u180E\u1822": "\u002Di"
  297. # Other Mongolian vowel separators to hyphen
  298. "\u180E": "\u002De"
  299. # Narrow no-break space to hyphen
  300. "\u202F": "\u002D"
  301. # Other Mongolian vowel NOT associated with g or k/q
  302. "\u1801": "..."
  303. "\u1802": ","
  304. "\u1803": "."
  305. "\u1804": ":"
  306. "\u1805": "*"
  307. "\u1806": "-"
  308. "\u1807": "\u0020"
  309. "\u1808": ","
  310. "\u1809": "."
  311. "\u180A": "-"
  312. "\u1810": "0"
  313. "\u1811": "1"
  314. "\u1812": "2"
  315. "\u1813": "3"
  316. "\u1814": "4"
  317. "\u1815": "5"
  318. "\u1816": "6"
  319. "\u1817": "7"
  320. "\u1818": "8"
  321. "\u1819": "9"
  322. # Mongolian vowels NOT associated with g/g+dot or k/q
  323. "\u1820": "a"
  324. "\u1821": "e"
  325. "\u1822": "i"
  326. "\u1823": "o"
  327. "\u1824": "u"
  328. "\u1825": "o\u0307"
  329. "\u1826": "u\u0307"
  330. "\u1827": "e\u0307"
  331. "\u1828": "n"
  332. "\u1829": "ng"
  333. "\u182A": "b"
  334. "\u182B": "p"
  335. "\u182C": "q"
  336. "\u182D": "g\u0307"
  337. "\u182E": "m"
  338. "\u182F": "l"
  339. "\u1830": "s"
  340. "\u1831": "s\u0301"
  341. "\u1832": "t"
  342. "\u1833": "d"
  343. "\u1834": "c"
  344. "\u1835": "j"
  345. "\u1836": "y"
  346. "\u1837": "r"
  347. "\u1838": "v"
  348. "\u1839": "f"
  349. "\u183A": "k\u0307"
  350. "\u183B": "kh"
  351. "\u183C": "ts\u0307"
  352. "\u183D": "z"
  353. "\u183E": "h\u0307"
  354. "\u183F": "zr"
  355. "\u1840": "lh"
  356. "\u1841": "zh"
  357. "\u1842": "ch"
  358. "\u1878": "c\u0307"