general: name: Mongolian (Mongol bichig) roman_to_script: ignore: - "at head of title" - "colophon" - "date of publication not identified" - "place of publication not identified" - "publisher not identified" # NOTE There is ambiguity about ignoring these # words. Note that the single-character Roman # numerals are not included on purpose. # Ideally the source editors should use the # dedicated U+2160÷U+216F (uppercase Roman # numerals) and/or U+2170÷U+217F (lower case Roman # numerals) ranges to avoid this ambiguity. # TODO implement regular expressions for ignore patterns. #- re: "I{2,3}" #- re: "I(V|X)" #- re: "LI{,3}" #- re: "LI?(V|X)" #- re: "L(V|X{1,3})I{,3}" #- re: "LX{1,3}I?V" #- re: "LX{1,3}VI{,3}" #- re: "(V|X{1,3})I{,3}" #- re: "X{1,3}I{,3}" #- re: "X{1,3}I(V|X)" #- re: "X{1,3}VI{,3}" - "II" - "III" - "IV" - "IX" - "LI" - "LII" - "LIII" - "LIV" - "LIX" - "LV" - "LVI" - "LVII" - "LVIII" - "LX" - "LXI" - "LXII" - "LXIII" - "LXIV" - "LXIX" - "LXV" - "LXVI" - "LXVII" - "LXVIII" - "LXX" - "LXXI" - "LXXII" - "LXXIII" - "LXXIV" - "LXXIX" - "LXXV" - "LXXVI" - "LXXVII" - "LXXVIII" - "LXXX" - "LXXXI" - "LXXXII" - "LXXXIII" - "LXXXIV" - "LXXXIX" - "LXXXV" - "LXXXVI" - "LXXXVII" - "LXXXVIII" - "VI" - "VII" - "VIII" - "XI" - "XII" - "XIII" - "XIV" - "XIX" - "XL" - "XLI" - "XLII" - "XLIII" - "XLIV" - "XLIX" - "XLV" - "XLVI" - "XLVII" - "XLVIII" - "XV" - "XVI" - "XVII" - "XVIII" - "XX" - "XXI" - "XXII" - "XXIII" - "XXIV" - "XXIX" - "XXV" - "XXVI" - "XXVII" - "XXVIII" - "XXX" - "XXXI" - "XXXII" - "XXXIII" - "XXXIV" - "XXXIX" - "XXXV" - "XXXVI" - "XXXVII" - "XXXVIII" - "and one other" - "and ([a-z]+ )?others" - "et al." map: "\u002DA": "\u180E\u1820" "\u002Da": "\u180E\u1820" "A": "\u1820" "a": "\u1820" "\u002DE": "\u180E\u1821" "\u002De": "\u180E\u1821" "\u002D": "\u202F" "E\u0307": "\u1827" "e\u0307": "\u1827" "E": "\u1821" "e": "\u1821" "\u002DI": "\u180E\u1822" "\u002Di": "\u180E\u1822" "I": "\u1822" "i": "\u1822" "O\u0307": "\u1825" "o\u0307": "\u1825" "O": "\u1823" "o": "\u1823" "U\u0307": "\u1826" "u\u0307": "\u1826" "U": "\u1824" "u": "\u1824" "NG": "\u1829" # this conversion should not be needed, but does no harm "nG": "\u1829" "ng": "\u1829" "N": "\u1828" "n": "\u1828" "B": "\u182A" "b": "\u182A" "P": "\u182B" "p": "\u182B" "Q": "\u182C" "q": "\u182C" "KH": "\u183B" "Kh": "\u183B" # this conversion should not be needed, but does no harm "kH": "\u183B" "kh": "\u183B" "K\u0307": "\u183A" "k\u0307": "\u183A" "K": "\u182C" "k": "\u182C" "G\u0307": "\u182D" "g\u0307": "\u182D" "G": "\u182D" "g": "\u182D" "M": "\u182E" "m": "\u182E" "LH": "\u1840" "Lh": "\u1840" # this conversion should not be needed, but does no harm "lH": "\u1840" "lh": "\u1840" "L": "\u182F" "l": "\u182F" "TS\u0307": "\u183C" # this conversion should not be needed, but does no harm "Ts\u0307": "\u183C" # this conversion should not be needed, but does no harm "tS\u0307": "\u183C" "ts\u0307": "\u183C" "S\u0301": "\u1831" "s\u0301": "\u1831" "S": "\u1830" "s": "\u1830" "T": "\u1832" "t": "\u1832" "D": "\u1833" "d": "\u1833" "J": "\u1835" "j": "\u1835" "Y": "\u1836" "y": "\u1836" "V": "\u1838" "v": "\u1838" "W": "\u1838" "w": "\u1838" "F": "\u1839" "f": "\u1839" "ZR": "\u183F" # this conversion should not be needed, but does no harm "Zr": "\u183F" # this conversion should not be needed, but does no harm "zR": "\u183F" "zr": "\u183F" "R": "\u1837" "r": "\u1837" "ZH": "\u1841" "Zh": "\u1841" # this conversion should not be needed, but does no harm "zH": "\u1841" "zh": "\u1841" "CH": "\u1842" "Ch": "\u1842" # this conversion should not be needed, but does no harm "cH": "\u1842" "ch": "\u1842" # this is a Buryat letter "C\u0307": "\u1878" "c\u0307": "\u1878" "C": "\u1834" "c": "\u1834" "H": "\u183E" "h": "\u183E" "-": "\u180E" script_to_roman: map: # ga "\u182D\u1820": "g\u0307a" # go "\u182D\u1823": "g\u0307o" # gu "\u182D\u1824": "g\u0307u" # ag "\u1820\u182D": "ag\u0307" # og "\u1823\u182D": "og\u0307" # ug "\u1824\u182D": "ug\u0307" # ge "\u182D\u1821": "ge" # gi "\u182D\u1822": "gi" # goe "\u182D\u1825": "go\u0307" # gue "\u182D\u1826": "gu\u0307" # gee "\u182D\u1827": "ge\u0307" # eg "\u1821\u182D": "eg" # ig "\u1822\u182D": "ig" # oeg "\u1825\u182D": "o\u0307g" # ueg "\u1826\u182D": "u\u0307g" # eeg "\u1827\u182D": "e\u0307g" # qa "\u182C\u1820": "q\u0307a" # qo "\u182C\u1823": "q\u0307o" # qu "\u182C\u1824": "q\u0307u" # aq (should not occur) "\u1820\u182C": "aq" # oq (should not occur) "\u1823\u182C": "oq" # uq (should not occur) "\u1824\u182C": "uq" # ke "\u182C\u1821": "ke" # ki "\u182C\u1822": "ki" # koe "\u182C\u1825": "ko\u0307" # kue "\u182C\u1826": "ku\u0307" # kee "\u182C\u1827": "ke\u0307" # ek (should not occur) "\u1821\u182C": "ek" # ik should not occur) "\u1822\u182C": "ik" # oek (should not occur) "\u1825\u182C": "o\u0307k" # uek (should not occur) "\u1826\u182C": "o\u0307k" # eek should not occur) "\u1827\u182C": "e\u0307k" # non-connecting vowel a "\u180E\u1820": "\u002Da" # non-connecting vowel e "\u180E\u1821": "\u002De" # non-connectubg vowel i "\u180E\u1822": "\u002Di" # Other Mongolian vowel separators to hyphen "\u180E": "\u002De" # Narrow no-break space to hyphen "\u202F": "\u002D" # Other Mongolian vowel NOT associated with g or k/q "\u1801": "..." "\u1802": "," "\u1803": "." "\u1804": ":" "\u1805": "*" "\u1806": "-" "\u1807": "\u0020" "\u1808": "," "\u1809": "." "\u180A": "-" "\u1810": "0" "\u1811": "1" "\u1812": "2" "\u1813": "3" "\u1814": "4" "\u1815": "5" "\u1816": "6" "\u1817": "7" "\u1818": "8" "\u1819": "9" # Mongolian vowels NOT associated with g/g+dot or k/q "\u1820": "a" "\u1821": "e" "\u1822": "i" "\u1823": "o" "\u1824": "u" "\u1825": "o\u0307" "\u1826": "u\u0307" "\u1827": "e\u0307" "\u1828": "n" "\u1829": "ng" "\u182A": "b" "\u182B": "p" "\u182C": "q" "\u182D": "g\u0307" "\u182E": "m" "\u182F": "l" "\u1830": "s" "\u1831": "s\u0301" "\u1832": "t" "\u1833": "d" "\u1834": "c" "\u1835": "j" "\u1836": "y" "\u1837": "r" "\u1838": "v" "\u1839": "f" "\u183A": "k\u0307" "\u183B": "kh" "\u183C": "ts\u0307" "\u183D": "z" "\u183E": "h\u0307" "\u183F": "zr" "\u1840": "lh" "\u1841": "zh" "\u1842": "ch" "\u1878": "c\u0307"