Browse Source

Update Cyrillic scripts.

Stefano Cossu 1 year ago
parent
commit
018d3b9e8a

+ 46 - 0
transliterator/tables/data/azerbaijani.yml

@@ -0,0 +1,46 @@
+general:
+  name: Azerbaijani (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    # Included to handle typos
+    "GH": "\u0492"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    # Included to handle typos
+    "gH": "\u0493"
+    "I\u0310": "\u0408"
+    "i\u0310": "\u0458"
+    "G\u0307": "\u049C"
+    "g\u0307": "\u049D"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+    "J": "\u04B8"
+    "j": "\u04B9"
+
+script_to_roman:
+  map:
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u0408": "I\u0310"
+    "\u0458": "i\u0310"
+    "\u049C": "G\u0307"
+    "\u049D": "g\u0307"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"
+    "\u04B8": "J"
+    "\u04B9": "j"

+ 17 - 1
transliterator/tables/data/index.yml

@@ -9,10 +9,12 @@
 
 armenian:
   name: Armenian
+azerbaijani:
+  name: Azerbaijani (Cyrillic)
 asian_cyrillic:
   name: Asian Cyrillic
   description: >
-    Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza, Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat, Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz, Ingush, Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak, Karelian, Kazakh, Khakass, Khanty, Komi, Komi-Permyak, Koryak, Kumyk, Kyrgyz, Lak, Lapp, Lezghian, Lithuanian, Mansi, Mari, Moldovan, Molodstov, Mongolian, Mordvin, Nanai, Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, Selkup, Shor, Tabasaran, Tajik, Tat, Tatar, Turkmen, Tuva, Udekhe, Udmurt, Uzbek, Yakut.
+    Multi-purpose transliteration for non-Slavic Cyrillic scripts: Abaza, Abkhaz, Adygei, Aisor, Altai, Avar, Azeri, Balkar, Bashkir, Buryat, Chechen, Chukchi, Chuvash, Dargwa, Dungan, Eskimo, Even, Evenki, Gagauz, Ingush, Inuit, Kabardian, Kalmyk, Karachay, Karachay-Balkar, Karakalpak, Karelian, Khakass, Khanty, Komi, Komi-Permyak, Koryak, Kumyk, Lak, Lapp, Lezghian, Lithuanian, Mansi, Mari, Moldovan, Molodstov, Mordvin, Nanai, Nenets, Nivkh, Nogai, Ossetic, Permyak, Romanian, Romany, Selkup, Shor, Tabasaran, Tat, Tuva, Udekhe, Udmurt, Yakut.
 belarusian:
   name: Belarusian
 bulgarian:
@@ -27,9 +29,23 @@ georgian:
   name: Georgian
 greek:
   name: Greek (classic)
+kazakh:
+  name: Kazakh (Cyrillic)
+kyrgyz:
+  name: Kyrgyz (Cyrillic)
+mongolian:
+  name: Mongolian (Cyrillic)
 russian:
   name: Russian
 serbian_macedonian:
   name: Serbian and Macedonian
+tajik:
+  name: Tajik (Cyrillic)
+tatar:
+  name: Tatar (Cyrillic)
+turkmen:
+  name: Turkmen (Cyrillic)
 ukrainian:
   name: Ukrainian
+uzbek:
+  name: Uzbek (Cyrillic)

+ 54 - 0
transliterator/tables/data/kazakh.yml

@@ -0,0 +1,54 @@
+general:
+  name: kazakh (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    # Included to handle typos
+    "GH": "\u0492"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    # Included to handle typos
+    "gH": "\u0493"
+    "I\u0304": "\u0408"
+    "i\u0304": "\u0458"
+    "Q": "\u049A"
+    "q": "\u049B"
+    "N\uFE20G\uFE21": "\u04A2"
+    # Included to handle typos
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    # Included to handle typos
+    "n\uFE20G\uFE21": "\u04A3"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0304": "\u04B0"
+    "u\u0304": "\u04B1"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+
+script_to_roman:
+  map:
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u0408": "I\u0304"
+    "\u0458": "i\u0304"
+    "\u049A": "Q"
+    "\u049B": "q"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04B0": "U\u0304"
+    "\u04B1": "u\u0304"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"

+ 26 - 0
transliterator/tables/data/kyrgyz.yml

@@ -0,0 +1,26 @@
+general:
+  name: kyrgyz (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    # Included to handle typos
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    # Included to handle typos
+    "n\uFE20G\uFE21": "\u04A3"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+
+script_to_roman:
+  map:
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"

+ 30 - 0
transliterator/tables/data/mongolian.yml

@@ -0,0 +1,30 @@
+general:
+  name: Mongolian (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "N\uFE20G\uFE21": "\u04A2"
+    # Included to handle typos
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    # Included to handle typos
+    "n\uFE20G\uFE21": "\u04A3"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+
+script_to_roman:
+  map:
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"

+ 40 - 0
transliterator/tables/data/tajik.yml

@@ -0,0 +1,40 @@
+general:
+  name: tajik (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    # Included to handle typos
+    "GH": "\u0492"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    # Included to handle typos
+    "gH": "\u0493"
+    "I\u0304": "\u04E2"
+    "i\u0304": "\u04E3"
+    "Q": "\u049A"
+    "q": "\u049B"
+    "U\u0304": "\u04EE"
+    "u\u0304": "\u04EF"
+    "H\u0307": "\u04B2"
+    "h\u0307": "\u04B3"
+    "J": "\u04B6"
+    "j": "\u04B7"
+
+script_to_roman:
+  map:
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u04E2": "I\u0304"
+    "\u04E3": "i\u0304"
+    "\u049A": "Q"
+    "\u049B": "q"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04EE": "U\u0304"
+    "\u04EF": "u\u0304"
+    "\u04B2": "H\u0307"
+    "\u04B3": "h\u0307"
+    "\u04B6": "J"
+    "\u04B7": "j"

+ 50 - 0
transliterator/tables/data/tatar.yml

@@ -0,0 +1,50 @@
+general:
+  name: tatar (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+    "J": "\u0496"
+    "j": "\u0497"
+    "N\uFE20G\uFE21": "\u04A2"
+    # Included to handle typos
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    # Included to handle typos
+    "n\uFE20G\uFE21": "\u04A3"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "H\u0307": "\u04BA"
+    "h\u0307": "\u04BB"
+
+script_to_roman:
+  map:
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"
+    # Included to normalize alternate character
+    "\u04D2": "A\u0306"
+    # Included to normalize alternate character
+    "\u04D3": "a\u0306"
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    # Included to normalize alternate character
+    "\u04E6": "O\u0307"
+    # Included to normalize alternate character
+    "\u04E7": "o\u0307"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    # Included to normalize alternate character
+    "\u04F0": "U\u0307"
+    # Included to normalize alternate character
+    "\u04F1": "u\u0307"
+    "\u04BA": "H\u0307"
+    "\u04BB": "h\u0307"

+ 34 - 0
transliterator/tables/data/turkmen.yml

@@ -0,0 +1,34 @@
+general:
+  name: turkmen (Cyrillic)
+  parents:
+    - _cyrillic_base
+
+roman_to_script:
+  map:
+    "J": "\u0496"
+    "j": "\u0497"
+    "N\uFE20G\uFE21": "\u04A2"
+    # Included to handle typos
+    "N\uFE20g\uFE21": "\u04A2"
+    "n\uFE20g\uFE21": "\u04A3"
+    # Included to handle typos
+    "n\uFE20G\uFE21": "\u04A3"
+    "O\u0307": "\u04E8"
+    "o\u0307": "\u04E9"
+    "U\u0307": "\u04AE"
+    "u\u0307": "\u04AF"
+    "A\u0306": "\u04D8"
+    "a\u0306": "\u04D9"
+
+script_to_roman:
+  map:
+    "\u0496": "J"
+    "\u0497": "j"
+    "\u04A2": "N\uFE20G\uFE21"
+    "\u04A3": "n\uFE20g\uFE21"
+    "\u04E8": "O\u0307"
+    "\u04E9": "o\u0307"
+    "\u04AE": "U\u0307"
+    "\u04AF": "u\u0307"
+    "\u04D8": "A\u0306"
+    "\u04D9": "a\u0306"

+ 35 - 0
transliterator/tables/data/uzbek.yml

@@ -0,0 +1,35 @@
+general:
+  name: uzbek (Cyrillic)
+  parents:
+    - _cyrillic_base
+    - _ignore_base
+
+roman_to_script:
+  map:
+    # Included to handle typos
+    "GH": "\u0492"
+    "Gh": "\u0492"
+    "gh": "\u0493"
+    # Included to handle typos
+    "gH": "\u0493"
+    "Q": "\u049A"
+    "q": "\u049B"
+    "U\u0306": "\u040E"
+    "u\u0306": "\u045E"
+    "H\u0307": "\u04B2"
+    "h\u0307": "\u04B3"
+
+script_to_roman:
+  map:
+    "\u0492": "Gh"
+    "\u0493": "gh"
+    "\u049A": "Q"
+    "\u049B": "q"
+    # Included to normalize alternate character
+    "\u04C3": "Q"
+    # Included to normalize alternate character
+    "\u04C4": "q"
+    "\u040E": "U\u0306"
+    "\u045E": "u\u0306"
+    "\u04B2": "H\u0307"
+    "\u04B3": "h\u0307"