|
@@ -8,111 +8,104 @@ general:
|
|
|
roman_to_script:
|
|
|
|
|
|
map:
|
|
|
+ "\u0020latin": "\u0020\u182F\u1820\u1832\u180B\u1822\u1828"
|
|
|
+ "mate\u0307riyal": "\u182E\u1820\u1832\u180B\u1827\u1837\u1822\u1836\u1820\u182F"
|
|
|
"\u002Daca": "\u202F\u1820\u1834\u1820"
|
|
|
- "\u002DA": "\u180E\u1820"
|
|
|
"\u002Da": "\u180E\u1820"
|
|
|
- "A": "\u1820"
|
|
|
"a": "\u1820"
|
|
|
"\u002Dece": "\u202F\u1821\u1834\u1821"
|
|
|
- "\u002DE": "\u180E\u1821"
|
|
|
"\u002De": "\u180E\u1821"
|
|
|
"\u002D": "\u202F"
|
|
|
- "E\u0307": "\u1827"
|
|
|
"e\u0307": "\u1827"
|
|
|
- "E": "\u1821"
|
|
|
"e": "\u1821"
|
|
|
- "\u002DI": "\u180E\u1822"
|
|
|
- "\u002Di": "\u180E\u1822"
|
|
|
- "I": "\u1822"
|
|
|
"i": "\u1822"
|
|
|
- "O\u0307": "\u1825"
|
|
|
"o\u0307": "\u1825"
|
|
|
- "O": "\u1823"
|
|
|
"o": "\u1823"
|
|
|
- "U\u0307": "\u1826"
|
|
|
"u\u0307": "\u1826"
|
|
|
- "U": "\u1824"
|
|
|
"u": "\u1824"
|
|
|
- "NG": "\u1829"
|
|
|
- "nG": "\u1829"
|
|
|
+ # Feminine g control when followed by l
|
|
|
+ "nggl": "\u1829\u182D\u180D\u182F"
|
|
|
+ "ng\u0307": "\u1828\u182D"
|
|
|
"ng": "\u1829"
|
|
|
- "N": "\u1828"
|
|
|
"n": "\u1828"
|
|
|
- "B": "\u182A"
|
|
|
"b": "\u182A"
|
|
|
- "P": "\u182B"
|
|
|
"p": "\u182B"
|
|
|
- "Q": "\u182C"
|
|
|
"q": "\u182C"
|
|
|
- "KH": "\u183B"
|
|
|
- "Kh": "\u183B"
|
|
|
- "kH": "\u183B"
|
|
|
"kh": "\u183B"
|
|
|
- "K\u0307": "\u183A"
|
|
|
"k\u0307": "\u183A"
|
|
|
- "K": "\u182C"
|
|
|
"k": "\u182C"
|
|
|
- "G\u0307": "\u182D"
|
|
|
"g\u0307": "\u182D"
|
|
|
- "G": "\u182D"
|
|
|
"g": "\u182D"
|
|
|
- "M": "\u182E"
|
|
|
"m": "\u182E"
|
|
|
- "LH": "\u1840"
|
|
|
- "Lh": "\u1840"
|
|
|
- "lH": "\u1840"
|
|
|
"lh": "\u1840"
|
|
|
- "L": "\u182F"
|
|
|
"l": "\u182F"
|
|
|
- "TS\u0307": "\u183C"
|
|
|
- "Ts\u0307": "\u183C"
|
|
|
- "tS\u0307": "\u183C"
|
|
|
"ts\u0307": "\u183C"
|
|
|
- "S\u0301": "\u1831"
|
|
|
"s\u0301": "\u1831"
|
|
|
- "S": "\u1830"
|
|
|
"s": "\u1830"
|
|
|
- "T": "\u1832"
|
|
|
+ "t'": "\u1832\u180B"
|
|
|
"t": "\u1832"
|
|
|
- "D": "\u1833"
|
|
|
+ "d'": "\u1833\u180B"
|
|
|
"d": "\u1833"
|
|
|
"J": "\u1835"
|
|
|
"j": "\u1835"
|
|
|
- "Y": "\u1836"
|
|
|
"y": "\u1836"
|
|
|
- "V": "\u1838"
|
|
|
"v": "\u1838"
|
|
|
- "W": "\u1838"
|
|
|
"w": "\u1838"
|
|
|
- "F": "\u1839"
|
|
|
"f": "\u1839"
|
|
|
- "ZR": "\u183F"
|
|
|
- "Zr": "\u183F"
|
|
|
- "zR": "\u183F"
|
|
|
"zr": "\u183F"
|
|
|
- "R": "\u1837"
|
|
|
"r": "\u1837"
|
|
|
- "ZH": "\u1841"
|
|
|
- "Zh": "\u1841"
|
|
|
- "zH": "\u1841"
|
|
|
"zh": "\u1841"
|
|
|
- "Z": "\u183D"
|
|
|
"z": "\u183D"
|
|
|
- "CH": "\u1842"
|
|
|
- "Ch": "\u1842"
|
|
|
- "cH": "\u1842"
|
|
|
- "ch": "\u1842"
|
|
|
- # this is a Buryat letter
|
|
|
- "C\u0307": "\u1878"
|
|
|
- "c\u0307": "\u1878"
|
|
|
- "C": "\u1834"
|
|
|
+ "h\u0307": "\u1842"
|
|
|
"c": "\u1834"
|
|
|
- "H": "\u183E"
|
|
|
"h": "\u183E"
|
|
|
- "-": "\u180E"
|
|
|
+ # Double hyphen: kept in data
|
|
|
+ "\u002D\u002D": "\u002D\u002D"
|
|
|
+ # Mongolian ellipsis
|
|
|
+ "\u002E\u002E\u002E": "\u1801"
|
|
|
+ # Comma at end of subfield
|
|
|
+ "\u002C\u0020\u2021": "\u002C\u0020\u2021"
|
|
|
+ # Mongolian comma
|
|
|
+ "\u002C": "\u1802"
|
|
|
+ # Mongolian full stop
|
|
|
+ "\u002E\u002E": "\u1803"
|
|
|
+ # Mongolian four dots (chapter end)
|
|
|
+ "\u002B": "\u1805"
|
|
|
+ # Mongolian soft hyphen
|
|
|
+ "\u0020\u002D\u0020": "\u1806"
|
|
|
+ # Mongolian nirugu (letter extender added to initial ending in a full stop)
|
|
|
+ "\u002E\u0020": "\u180A\u0020"
|
|
|
+ # Mongolian Free Variation Separator One (FVS1) apostrophe used after t and d
|
|
|
+ "\u0027": "\u180B"
|
|
|
+ # Mongolian Free Variation Separator Two (FVS2) quotation mark used to force final alternate letter shape
|
|
|
+ "\u0022": "\u180C"
|
|
|
+ # Mongolian Free Variation Separator Three (FVS3) grave used to force intermediate alternate letter shape
|
|
|
+ "\u0060": "\u180D"
|
|
|
+ # Mongolian Vowel Separator (MVS) low line used as an unabiguous final vowel separator
|
|
|
+ "\u005F": "\u180E"
|
|
|
+ # Narrow No-Break Space (NNBSP) hyphen used before Mongolian grammatical endings
|
|
|
+ "\u002D": "\u202F"
|
|
|
+ "\u003C\u003C": "\u300A"
|
|
|
+ "\u003E\u003E": "\u300B"
|
|
|
+ # Middle dot; asterisk used to separate parts of one person's name (clan * forename)
|
|
|
+ "\u002A": "\u00B7"
|
|
|
+ "0": "\u1810"
|
|
|
+ "1": "\u1811"
|
|
|
+ "2": "\u1812"
|
|
|
+ "3": "\u1813"
|
|
|
+ "4": "\u1814"
|
|
|
+ "5": "\u1815"
|
|
|
+ "6": "\u1816"
|
|
|
+ "7": "\u1817"
|
|
|
+ "8": "\u1818"
|
|
|
+ "9": "\u1819"
|
|
|
|
|
|
script_to_roman:
|
|
|
map:
|
|
|
+ # Middle dot; asterisk used to separate parts of one person's name (clan * forename)
|
|
|
+ "\u00B7": "\u002A"
|
|
|
+ "\u0020\u182F\u1820\u1832\u180B\u1822\u1828": "\u0020latin"
|
|
|
+ "\u182E\u1820\u1832\u180B\u1827\u1837\u1822\u1836\u1820\u182F": "mate\u0307riyal"
|
|
|
# ga
|
|
|
"\u182D\u1820": "g\u0307a"
|
|
|
# go
|
|
@@ -137,8 +130,6 @@ script_to_roman:
|
|
|
"\u182D\u1827": "ge\u0307"
|
|
|
# eg
|
|
|
"\u1821\u182D": "eg"
|
|
|
- # ig
|
|
|
- "\u1822\u182D": "ig"
|
|
|
# oeg
|
|
|
"\u1825\u182D": "o\u0307g"
|
|
|
# ueg
|
|
@@ -146,17 +137,11 @@ script_to_roman:
|
|
|
# eeg
|
|
|
"\u1827\u182D": "e\u0307g"
|
|
|
# qa
|
|
|
- "\u182C\u1820": "q\u0307a"
|
|
|
+ "\u182C\u1820": "qa"
|
|
|
# qo
|
|
|
- "\u182C\u1823": "q\u0307o"
|
|
|
+ "\u182C\u1823": "qo"
|
|
|
# qu
|
|
|
- "\u182C\u1824": "q\u0307u"
|
|
|
- # aq (should not occur)
|
|
|
- "\u1820\u182C": "aq"
|
|
|
- # oq (should not occur)
|
|
|
- "\u1823\u182C": "oq"
|
|
|
- # uq (should not occur)
|
|
|
- "\u1824\u182C": "uq"
|
|
|
+ "\u182C\u1824": "qu"
|
|
|
# ke
|
|
|
"\u182C\u1821": "ke"
|
|
|
# ki
|
|
@@ -167,37 +152,36 @@ script_to_roman:
|
|
|
"\u182C\u1826": "ku\u0307"
|
|
|
# kee
|
|
|
"\u182C\u1827": "ke\u0307"
|
|
|
- # ek (should not occur)
|
|
|
- "\u1821\u182C": "ek"
|
|
|
- # ik should not occur)
|
|
|
- "\u1822\u182C": "ik"
|
|
|
- # oek (should not occur)
|
|
|
- "\u1825\u182C": "o\u0307k"
|
|
|
- # uek (should not occur)
|
|
|
- "\u1826\u182C": "o\u0307k"
|
|
|
- # eek should not occur)
|
|
|
- "\u1827\u182C": "e\u0307k"
|
|
|
- # non-connecting vowel a
|
|
|
- "\u180E\u1820": "\u002Da"
|
|
|
- # non-connecting vowel e
|
|
|
- "\u180E\u1821": "\u002De"
|
|
|
- # non-connectubg vowel i
|
|
|
- "\u180E\u1822": "\u002Di"
|
|
|
- # Other Mongolian vowel separators to hyphen
|
|
|
- "\u180E": "\u002De"
|
|
|
- # Narrow no-break space to hyphen
|
|
|
- "\u202F": "\u002D"
|
|
|
- # Other Mongolian vowel NOT associated with g or k/q
|
|
|
- "\u1801": "..."
|
|
|
- "\u1802": ","
|
|
|
- "\u1803": "."
|
|
|
- "\u1804": ":"
|
|
|
- "\u1805": "*"
|
|
|
- "\u1806": "-"
|
|
|
+ # Double hyphen: kept in data
|
|
|
+ "\u002D\u002D": "\u002D\u002D"
|
|
|
+ # Mongolian ellipsis
|
|
|
+ "\u1801": "\u002E\u002E\u002E"
|
|
|
+ # Mongolian comma
|
|
|
+ "\u1802": "\u002C"
|
|
|
+ # Mongolian full stop
|
|
|
+ "\u1803": "\u002E\u002E"
|
|
|
+ # Mongolian colon
|
|
|
+ "\u1804": "\u003A"
|
|
|
+ # Mongolian four dots (chapter end)
|
|
|
+ "\u1805": "\u002B"
|
|
|
+ # Mongolian soft hyphen
|
|
|
+ "\u1806": "\u0020\u002D\u0020"
|
|
|
"\u1807": "\u0020"
|
|
|
- "\u1808": ","
|
|
|
- "\u1809": "."
|
|
|
- "\u180A": "-"
|
|
|
+ "\u1808": "\u002C"
|
|
|
+ "\u1809": "\u002E"
|
|
|
+ # Mongolian nirugu (letter extender to force initial form rather than isolated form in initials)
|
|
|
+ "\u180A\u0020": "\u002E\u0020"
|
|
|
+ # Mongolian Free Variation Separator One (FVS1) apostrophe used after t and d
|
|
|
+ "\u180B": "\u0027"
|
|
|
+ # Mongolian Free Variation Separator Two (FVS2) quotation mark used to force final alternate letter shape
|
|
|
+ "\u180C": "\u0022"
|
|
|
+ # Mongolian Free Variation Separator Three (FVS3) grave used to force intermediate alternate letter shape
|
|
|
+ "\u180D": "\u0060"
|
|
|
+ # Mongolian Vowel Separator (MVS) converts to hyphen as the vowel separator
|
|
|
+ "\u180E": "\u002D"
|
|
|
+ # Mongolian Free Variation Separator Four (FVS4) [not currently used]
|
|
|
+ "\u180F": "\u005B\u003F\u005B"
|
|
|
+ "\u180A": "."
|
|
|
"\u1810": "0"
|
|
|
"\u1811": "1"
|
|
|
"\u1812": "2"
|
|
@@ -219,10 +203,12 @@ script_to_roman:
|
|
|
"\u1827": "e\u0307"
|
|
|
"\u1828": "n"
|
|
|
"\u1829": "ng"
|
|
|
+ # Feminine g control when followed by l
|
|
|
+ "\u1829\u182D\u180D\u182F": "ngg`l"
|
|
|
"\u182A": "b"
|
|
|
"\u182B": "p"
|
|
|
"\u182C": "q"
|
|
|
- "\u182D": "g\u0307"
|
|
|
+ "\u182D": "g"
|
|
|
"\u182E": "m"
|
|
|
"\u182F": "l"
|
|
|
"\u1830": "s"
|
|
@@ -239,9 +225,13 @@ script_to_roman:
|
|
|
"\u183B": "kh"
|
|
|
"\u183C": "ts\u0307"
|
|
|
"\u183D": "z"
|
|
|
- "\u183E": "h\u0307"
|
|
|
+ "\u183E": "h"
|
|
|
"\u183F": "zr"
|
|
|
"\u1840": "lh"
|
|
|
"\u1841": "zh"
|
|
|
- "\u1842": "ch"
|
|
|
- "\u1878": "c\u0307"
|
|
|
+ "\u1842": "h\u0307"
|
|
|
+ # Narrow No-Break Space (NNBSP) converts to hyphen before Mongolian grammatical endings
|
|
|
+ "\u202F": "\u002D"
|
|
|
+ # low line to Mongolian vowel separator
|
|
|
+ "\u300A": "\u003C\u003C"
|
|
|
+ "\u300B": "\u003E\u003E"
|