소스 검색

Merge pull request #234 from lcnetdev/updates_mongolian_mongol_bichig

updates mongolian_mongol_bichig table mappings
Stefano Cossu 2 주 전
부모
커밋
ebc1902c6c
1개의 변경된 파일94개의 추가작업 그리고 104개의 파일을 삭제
  1. 94 104
      scriptshifter/tables/data/mongolian_mongol_bichig.yml

+ 94 - 104
scriptshifter/tables/data/mongolian_mongol_bichig.yml

@@ -8,111 +8,104 @@ general:
 roman_to_script:
 
   map:
+    "\u0020latin": "\u0020\u182F\u1820\u1832\u180B\u1822\u1828"
+    "mate\u0307riyal": "\u182E\u1820\u1832\u180B\u1827\u1837\u1822\u1836\u1820\u182F"
     "\u002Daca": "\u202F\u1820\u1834\u1820"
-    "\u002DA": "\u180E\u1820"
     "\u002Da": "\u180E\u1820"
-    "A": "\u1820"
     "a": "\u1820"
     "\u002Dece": "\u202F\u1821\u1834\u1821"
-    "\u002DE": "\u180E\u1821"
     "\u002De": "\u180E\u1821"
     "\u002D": "\u202F"
-    "E\u0307": "\u1827"
     "e\u0307": "\u1827"
-    "E": "\u1821"
     "e": "\u1821"
-    "\u002DI": "\u180E\u1822"
-    "\u002Di": "\u180E\u1822"
-    "I": "\u1822"
     "i": "\u1822"
-    "O\u0307": "\u1825"
     "o\u0307": "\u1825"
-    "O": "\u1823"
     "o": "\u1823"
-    "U\u0307": "\u1826"
     "u\u0307": "\u1826"
-    "U": "\u1824"
     "u": "\u1824"
-    "NG": "\u1829"
-    "nG": "\u1829"
+    # Feminine g control when followed by l
+    "nggl": "\u1829\u182D\u180D\u182F"
+    "ng\u0307": "\u1828\u182D"
     "ng": "\u1829"
-    "N": "\u1828"
     "n": "\u1828"
-    "B": "\u182A"
     "b": "\u182A"
-    "P": "\u182B"
     "p": "\u182B"
-    "Q": "\u182C"
     "q": "\u182C"
-    "KH": "\u183B"
-    "Kh": "\u183B"
-    "kH": "\u183B"
     "kh": "\u183B"
-    "K\u0307": "\u183A"
     "k\u0307": "\u183A"
-    "K": "\u182C"
     "k": "\u182C"
-    "G\u0307": "\u182D"
     "g\u0307": "\u182D"
-    "G": "\u182D"
     "g": "\u182D"
-    "M": "\u182E"
     "m": "\u182E"
-    "LH": "\u1840"
-    "Lh": "\u1840"
-    "lH": "\u1840"
     "lh": "\u1840"
-    "L": "\u182F"
     "l": "\u182F"
-    "TS\u0307": "\u183C"
-    "Ts\u0307": "\u183C"
-    "tS\u0307": "\u183C"
     "ts\u0307": "\u183C"
-    "S\u0301": "\u1831"
     "s\u0301": "\u1831"
-    "S": "\u1830"
     "s": "\u1830"
-    "T": "\u1832"
+    "t'": "\u1832\u180B"
     "t": "\u1832"
-    "D": "\u1833"
+    "d'": "\u1833\u180B"
     "d": "\u1833"
     "J": "\u1835"
     "j": "\u1835"
-    "Y": "\u1836"
     "y": "\u1836"
-    "V": "\u1838"
     "v": "\u1838"
-    "W": "\u1838"
     "w": "\u1838"
-    "F": "\u1839"
     "f": "\u1839"
-    "ZR": "\u183F"
-    "Zr": "\u183F"
-    "zR": "\u183F"
     "zr": "\u183F"
-    "R": "\u1837"
     "r": "\u1837"
-    "ZH": "\u1841"
-    "Zh": "\u1841"
-    "zH": "\u1841"
     "zh": "\u1841"
-    "Z": "\u183D"
     "z": "\u183D"
-    "CH": "\u1842"
-    "Ch": "\u1842"
-    "cH": "\u1842"
-    "ch": "\u1842"
-    # this is a Buryat letter
-    "C\u0307": "\u1878"
-    "c\u0307": "\u1878"
-    "C": "\u1834"
+    "h\u0307": "\u1842"
     "c": "\u1834"
-    "H": "\u183E"
     "h": "\u183E"
-    "-": "\u180E"
+    # Double hyphen: kept in data
+    "\u002D\u002D": "\u002D\u002D"
+    # Mongolian ellipsis
+    "\u002E\u002E\u002E": "\u1801"
+    # Comma at end of subfield
+    "\u002C\u0020\u2021": "\u002C\u0020\u2021"
+    # Mongolian comma
+    "\u002C": "\u1802"
+    # Mongolian full stop
+    "\u002E\u002E": "\u1803"
+    # Mongolian four dots (chapter end)
+    "\u002B": "\u1805"
+    # Mongolian soft hyphen
+    "\u0020\u002D\u0020": "\u1806"
+    # Mongolian nirugu (letter extender added to initial ending in a full stop)
+    "\u002E\u0020": "\u180A\u0020"
+    # Mongolian Free Variation Separator One (FVS1) apostrophe used after t and d
+    "\u0027": "\u180B"
+    # Mongolian Free Variation Separator Two (FVS2) quotation mark used to force final alternate letter shape
+    "\u0022": "\u180C"
+    # Mongolian Free Variation Separator Three (FVS3) grave used to force intermediate alternate letter shape
+    "\u0060": "\u180D"
+    # Mongolian Vowel Separator (MVS) low line used as an unabiguous final vowel separator
+    "\u005F": "\u180E"
+    # Narrow No-Break Space (NNBSP) hyphen used before Mongolian grammatical endings
+    "\u002D": "\u202F"
+    "\u003C\u003C": "\u300A"
+    "\u003E\u003E": "\u300B"
+    # Middle dot; asterisk used to separate parts of one person's name (clan * forename)
+    "\u002A": "\u00B7"
+    "0": "\u1810"
+    "1": "\u1811"
+    "2": "\u1812"
+    "3": "\u1813"
+    "4": "\u1814"
+    "5": "\u1815"
+    "6": "\u1816"
+    "7": "\u1817"
+    "8": "\u1818"
+    "9": "\u1819"
 
 script_to_roman:
   map:
+    # Middle dot; asterisk used to separate parts of one person's name (clan * forename)
+    "\u00B7": "\u002A"
+    "\u0020\u182F\u1820\u1832\u180B\u1822\u1828": "\u0020latin"
+    "\u182E\u1820\u1832\u180B\u1827\u1837\u1822\u1836\u1820\u182F": "mate\u0307riyal"
     # ga
     "\u182D\u1820": "g\u0307a"
     # go
@@ -137,8 +130,6 @@ script_to_roman:
     "\u182D\u1827": "ge\u0307"
     # eg
     "\u1821\u182D": "eg"
-    # ig
-    "\u1822\u182D": "ig"
     # oeg
     "\u1825\u182D": "o\u0307g"
     # ueg
@@ -146,17 +137,11 @@ script_to_roman:
     # eeg
     "\u1827\u182D": "e\u0307g"
     # qa
-    "\u182C\u1820": "q\u0307a"
+    "\u182C\u1820": "qa"
     # qo
-    "\u182C\u1823": "q\u0307o"
+    "\u182C\u1823": "qo"
     # qu
-    "\u182C\u1824": "q\u0307u"
-    # aq (should not occur)
-    "\u1820\u182C": "aq"
-    # oq (should not occur)
-    "\u1823\u182C": "oq"
-    # uq (should not occur)
-    "\u1824\u182C": "uq"
+    "\u182C\u1824": "qu"
     # ke
     "\u182C\u1821": "ke"
     # ki
@@ -167,37 +152,36 @@ script_to_roman:
     "\u182C\u1826": "ku\u0307"
     # kee
     "\u182C\u1827": "ke\u0307"
-    # ek (should not occur)
-    "\u1821\u182C": "ek"
-    # ik should not occur)
-    "\u1822\u182C": "ik"
-    # oek (should not occur)
-    "\u1825\u182C": "o\u0307k"
-    # uek (should not occur)
-    "\u1826\u182C": "o\u0307k"
-    # eek should not occur)
-    "\u1827\u182C": "e\u0307k"
-    # non-connecting vowel a
-    "\u180E\u1820": "\u002Da"
-    # non-connecting vowel e
-    "\u180E\u1821": "\u002De"
-    # non-connectubg vowel i
-    "\u180E\u1822": "\u002Di"
-    # Other Mongolian vowel separators to hyphen
-    "\u180E": "\u002De"
-    # Narrow no-break space to hyphen
-    "\u202F": "\u002D"
-    # Other Mongolian vowel NOT associated with g or k/q
-    "\u1801": "..."
-    "\u1802": ","
-    "\u1803": "."
-    "\u1804": ":"
-    "\u1805": "*"
-    "\u1806": "-"
+    # Double hyphen: kept in data
+    "\u002D\u002D": "\u002D\u002D"
+    # Mongolian ellipsis
+    "\u1801": "\u002E\u002E\u002E"
+    # Mongolian comma
+    "\u1802": "\u002C"
+    # Mongolian full stop
+    "\u1803": "\u002E\u002E"
+    # Mongolian colon
+    "\u1804": "\u003A"
+    # Mongolian four dots (chapter end)
+    "\u1805": "\u002B"
+    # Mongolian soft hyphen
+    "\u1806": "\u0020\u002D\u0020"
     "\u1807": "\u0020"
-    "\u1808": ","
-    "\u1809": "."
-    "\u180A": "-"
+    "\u1808": "\u002C"
+    "\u1809": "\u002E"
+    # Mongolian nirugu (letter extender to force initial form rather than isolated form in initials)
+    "\u180A\u0020": "\u002E\u0020"
+    # Mongolian Free Variation Separator One (FVS1) apostrophe used after t and d
+    "\u180B": "\u0027"
+    # Mongolian Free Variation Separator Two (FVS2) quotation mark used to force final alternate letter shape
+    "\u180C": "\u0022"
+    # Mongolian Free Variation Separator Three (FVS3) grave used to force intermediate alternate letter shape
+    "\u180D": "\u0060"
+    # Mongolian Vowel Separator (MVS) converts to hyphen as the vowel separator
+    "\u180E": "\u002D"
+    # Mongolian Free Variation Separator Four (FVS4) [not currently used]
+    "\u180F": "\u005B\u003F\u005B"
+    "\u180A": "."
     "\u1810": "0"
     "\u1811": "1"
     "\u1812": "2"
@@ -219,10 +203,12 @@ script_to_roman:
     "\u1827": "e\u0307"
     "\u1828": "n"
     "\u1829": "ng"
+    # Feminine g control when followed by l
+    "\u1829\u182D\u180D\u182F": "ngg`l"
     "\u182A": "b"
     "\u182B": "p"
     "\u182C": "q"
-    "\u182D": "g\u0307"
+    "\u182D": "g"
     "\u182E": "m"
     "\u182F": "l"
     "\u1830": "s"
@@ -239,9 +225,13 @@ script_to_roman:
     "\u183B": "kh"
     "\u183C": "ts\u0307"
     "\u183D": "z"
-    "\u183E": "h\u0307"
+    "\u183E": "h"
     "\u183F": "zr"
     "\u1840": "lh"
     "\u1841": "zh"
-    "\u1842": "ch"
-    "\u1878": "c\u0307"
+    "\u1842": "h\u0307"
+    # Narrow No-Break Space (NNBSP) converts to hyphen before Mongolian grammatical endings
+    "\u202F": "\u002D"
+    # low line to Mongolian vowel separator
+    "\u300A": "\u003C\u003C"
+    "\u300B": "\u003E\u003E"