123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361 |
- general:
- name: Mongolian (Mongol bichig)
- roman_to_script:
- ignore:
- - "at head of title"
- - "colophon"
- - "date of publication not identified"
- - "place of publication not identified"
- - "publisher not identified"
- # NOTE There is ambiguity about ignoring these
- # words. Note that the single-character Roman
- # numerals are not included on purpose.
- # Ideally the source editors should use the
- # dedicated U+2160÷U+216F (uppercase Roman
- # numerals) and/or U+2170÷U+217F (lower case Roman
- # numerals) ranges to avoid this ambiguity.
- # TODO implement regular expressions for ignore patterns.
- #- re: "I{2,3}"
- #- re: "I(V|X)"
- #- re: "LI{,3}"
- #- re: "LI?(V|X)"
- #- re: "L(V|X{1,3})I{,3}"
- #- re: "LX{1,3}I?V"
- #- re: "LX{1,3}VI{,3}"
- #- re: "(V|X{1,3})I{,3}"
- #- re: "X{1,3}I{,3}"
- #- re: "X{1,3}I(V|X)"
- #- re: "X{1,3}VI{,3}"
- - "II"
- - "III"
- - "IV"
- - "IX"
- - "LI"
- - "LII"
- - "LIII"
- - "LIV"
- - "LIX"
- - "LV"
- - "LVI"
- - "LVII"
- - "LVIII"
- - "LX"
- - "LXI"
- - "LXII"
- - "LXIII"
- - "LXIV"
- - "LXIX"
- - "LXV"
- - "LXVI"
- - "LXVII"
- - "LXVIII"
- - "LXX"
- - "LXXI"
- - "LXXII"
- - "LXXIII"
- - "LXXIV"
- - "LXXIX"
- - "LXXV"
- - "LXXVI"
- - "LXXVII"
- - "LXXVIII"
- - "LXXX"
- - "LXXXI"
- - "LXXXII"
- - "LXXXIII"
- - "LXXXIV"
- - "LXXXIX"
- - "LXXXV"
- - "LXXXVI"
- - "LXXXVII"
- - "LXXXVIII"
- - "VI"
- - "VII"
- - "VIII"
- - "XI"
- - "XII"
- - "XIII"
- - "XIV"
- - "XIX"
- - "XL"
- - "XLI"
- - "XLII"
- - "XLIII"
- - "XLIV"
- - "XLIX"
- - "XLV"
- - "XLVI"
- - "XLVII"
- - "XLVIII"
- - "XV"
- - "XVI"
- - "XVII"
- - "XVIII"
- - "XX"
- - "XXI"
- - "XXII"
- - "XXIII"
- - "XXIV"
- - "XXIX"
- - "XXV"
- - "XXVI"
- - "XXVII"
- - "XXVIII"
- - "XXX"
- - "XXXI"
- - "XXXII"
- - "XXXIII"
- - "XXXIV"
- - "XXXIX"
- - "XXXV"
- - "XXXVI"
- - "XXXVII"
- - "XXXVIII"
- - "and one other"
- - "and ([a-z]+ )?others"
- - "et al."
- map:
- "\u002DA": "\u180E\u1820"
- "\u002Da": "\u180E\u1820"
- "A": "\u1820"
- "a": "\u1820"
- "\u002DE": "\u180E\u1821"
- "\u002De": "\u180E\u1821"
- "\u002D": "\u202F"
- "E\u0307": "\u1827"
- "e\u0307": "\u1827"
- "E": "\u1821"
- "e": "\u1821"
- "\u002DI": "\u180E\u1822"
- "\u002Di": "\u180E\u1822"
- "I": "\u1822"
- "i": "\u1822"
- "O\u0307": "\u1825"
- "o\u0307": "\u1825"
- "O": "\u1823"
- "o": "\u1823"
- "U\u0307": "\u1826"
- "u\u0307": "\u1826"
- "U": "\u1824"
- "u": "\u1824"
- "NG": "\u1829"
- # this conversion should not be needed, but does no harm
- "nG": "\u1829"
- "ng": "\u1829"
- "N": "\u1828"
- "n": "\u1828"
- "B": "\u182A"
- "b": "\u182A"
- "P": "\u182B"
- "p": "\u182B"
- "Q": "\u182C"
- "q": "\u182C"
- "KH": "\u183B"
- "Kh": "\u183B"
- # this conversion should not be needed, but does no harm
- "kH": "\u183B"
- "kh": "\u183B"
- "K\u0307": "\u183A"
- "k\u0307": "\u183A"
- "K": "\u182C"
- "k": "\u182C"
- "G\u0307": "\u182D"
- "g\u0307": "\u182D"
- "G": "\u182D"
- "g": "\u182D"
- "M": "\u182E"
- "m": "\u182E"
- "LH": "\u1840"
- "Lh": "\u1840"
- # this conversion should not be needed, but does no harm
- "lH": "\u1840"
- "lh": "\u1840"
- "L": "\u182F"
- "l": "\u182F"
- "TS\u0307": "\u183C"
- # this conversion should not be needed, but does no harm
- "Ts\u0307": "\u183C"
- # this conversion should not be needed, but does no harm
- "tS\u0307": "\u183C"
- "ts\u0307": "\u183C"
- "S\u0301": "\u1831"
- "s\u0301": "\u1831"
- "S": "\u1830"
- "s": "\u1830"
- "T": "\u1832"
- "t": "\u1832"
- "D": "\u1833"
- "d": "\u1833"
- "J": "\u1835"
- "j": "\u1835"
- "Y": "\u1836"
- "y": "\u1836"
- "V": "\u1838"
- "v": "\u1838"
- "W": "\u1838"
- "w": "\u1838"
- "F": "\u1839"
- "f": "\u1839"
- "ZR": "\u183F"
- # this conversion should not be needed, but does no harm
- "Zr": "\u183F"
- # this conversion should not be needed, but does no harm
- "zR": "\u183F"
- "zr": "\u183F"
- "R": "\u1837"
- "r": "\u1837"
- "ZH": "\u1841"
- "Zh": "\u1841"
- # this conversion should not be needed, but does no harm
- "zH": "\u1841"
- "zh": "\u1841"
- "CH": "\u1842"
- "Ch": "\u1842"
- # this conversion should not be needed, but does no harm
- "cH": "\u1842"
- "ch": "\u1842"
- # this is a Buryat letter
- "C\u0307": "\u1878"
- "c\u0307": "\u1878"
- "C": "\u1834"
- "c": "\u1834"
- "H": "\u183E"
- "h": "\u183E"
- "-": "\u180E"
- script_to_roman:
- map:
- # ga
- "\u182D\u1820": "g\u0307a"
- # go
- "\u182D\u1823": "g\u0307o"
- # gu
- "\u182D\u1824": "g\u0307u"
- # ag
- "\u1820\u182D": "ag\u0307"
- # og
- "\u1823\u182D": "og\u0307"
- # ug
- "\u1824\u182D": "ug\u0307"
- # ge
- "\u182D\u1821": "ge"
- # gi
- "\u182D\u1822": "gi"
- # goe
- "\u182D\u1825": "go\u0307"
- # gue
- "\u182D\u1826": "gu\u0307"
- # gee
- "\u182D\u1827": "ge\u0307"
- # eg
- "\u1821\u182D": "eg"
- # ig
- "\u1822\u182D": "ig"
- # oeg
- "\u1825\u182D": "o\u0307g"
- # ueg
- "\u1826\u182D": "u\u0307g"
- # eeg
- "\u1827\u182D": "e\u0307g"
- # qa
- "\u182C\u1820": "q\u0307a"
- # qo
- "\u182C\u1823": "q\u0307o"
- # qu
- "\u182C\u1824": "q\u0307u"
- # aq (should not occur)
- "\u1820\u182C": "aq"
- # oq (should not occur)
- "\u1823\u182C": "oq"
- # uq (should not occur)
- "\u1824\u182C": "uq"
- # ke
- "\u182C\u1821": "ke"
- # ki
- "\u182C\u1822": "ki"
- # koe
- "\u182C\u1825": "ko\u0307"
- # kue
- "\u182C\u1826": "ku\u0307"
- # kee
- "\u182C\u1827": "ke\u0307"
- # ek (should not occur)
- "\u1821\u182C": "ek"
- # ik should not occur)
- "\u1822\u182C": "ik"
- # oek (should not occur)
- "\u1825\u182C": "o\u0307k"
- # uek (should not occur)
- "\u1826\u182C": "o\u0307k"
- # eek should not occur)
- "\u1827\u182C": "e\u0307k"
- # non-connecting vowel a
- "\u180E\u1820": "\u002Da"
- # non-connecting vowel e
- "\u180E\u1821": "\u002De"
- # non-connectubg vowel i
- "\u180E\u1822": "\u002Di"
- # Other Mongolian vowel separators to hyphen
- "\u180E": "\u002De"
- # Narrow no-break space to hyphen
- "\u202F": "\u002D"
- # Other Mongolian vowel NOT associated with g or k/q
- "\u1801": "..."
- "\u1802": ","
- "\u1803": "."
- "\u1804": ":"
- "\u1805": "*"
- "\u1806": "-"
- "\u1807": "\u0020"
- "\u1808": ","
- "\u1809": "."
- "\u180A": "-"
- "\u1810": "0"
- "\u1811": "1"
- "\u1812": "2"
- "\u1813": "3"
- "\u1814": "4"
- "\u1815": "5"
- "\u1816": "6"
- "\u1817": "7"
- "\u1818": "8"
- "\u1819": "9"
- # Mongolian vowels NOT associated with g/g+dot or k/q
- "\u1820": "a"
- "\u1821": "e"
- "\u1822": "i"
- "\u1823": "o"
- "\u1824": "u"
- "\u1825": "o\u0307"
- "\u1826": "u\u0307"
- "\u1827": "e\u0307"
- "\u1828": "n"
- "\u1829": "ng"
- "\u182A": "b"
- "\u182B": "p"
- "\u182C": "q"
- "\u182D": "g\u0307"
- "\u182E": "m"
- "\u182F": "l"
- "\u1830": "s"
- "\u1831": "s\u0301"
- "\u1832": "t"
- "\u1833": "d"
- "\u1834": "c"
- "\u1835": "j"
- "\u1836": "y"
- "\u1837": "r"
- "\u1838": "v"
- "\u1839": "f"
- "\u183A": "k\u0307"
- "\u183B": "kh"
- "\u183C": "ts\u0307"
- "\u183D": "z"
- "\u183E": "h\u0307"
- "\u183F": "zr"
- "\u1840": "lh"
- "\u1841": "zh"
- "\u1842": "ch"
- "\u1878": "c\u0307"
|