Procházet zdrojové kódy

Add R2S section to Cyrillic overrides.

Stefano Cossu před 2 roky
rodič
revize
130cb80816

+ 0 - 27
transliterator/tables/data/_cyrillic_base.yml

@@ -123,44 +123,24 @@ roman_to_script:
     "b": "\u0431"
     "V": "\u0412"
     "v": "\u0432"
-    "H": "\u0413"
-    "h": "\u0433"
-    "G": "\u0490"
-    "g": "\u0491"
     "D": "\u0414"
     "d": "\u0434"
     "E": "\u0415"
     "e": "\u0435"
-    "Z\uFE20H\uFE21": "\u0416"
     # this conversion shouldn't be needed, but does no harm
-    "Z\uFE20h\uFE21": "\u0416"
     "Z": "\u0417"
-    "z\uFE20h\uFE21": "\u0436"
     "z": "\u0437"
-    "Y": "\u0418"
-    "y": "\u0438"
     "I\u0306": "\u0419"
-    "I\u0308": "\u0407"
-    "I\uFE20E\uFE21": "\u0404"
     # this conversion shouldn't be needed, but does no harm
-    "I\uFE20e\uFE21": "\u0404"
-    "I\uFE20O\uFE21": "\u0401"
-    # this conversion shouldn't be needed, but does no harm
-    "I\uFE20o\uFE21": "\u0401"
     "I\uFE20U\uFE21": "\u042E"
     # this conversion shouldn't be needed, but does no harm
     "I\uFE20u\uFE21": "\u042E"
     "I\uFE20A\uFE21": "\u042F"
     # this conversion shouldn't be needed, but does no harm
     "I\uFE20a\uFE21": "\u042F"
-    "I": "\u0406"
     "i\u0306": "\u0439"
-    "i\u0308": "\u0457"
-    "i\uFE20e\uFE21": "\u0454"
-    "i\uFE20o\uFE21": "\u0451"
     "i\uFE20u\uFE21": "\u044E"
     "i\uFE20a\uFE21": "\u044F"
-    "i": "\u0456"
     # this conversion shouldn't be needed, but does no harm
     "KH": "\u0425"
     "Kh": "\u0425"
@@ -180,21 +160,14 @@ roman_to_script:
     "R": "\u0420"
     "r": "\u0440"
     # this conversion shouldn't be needed, but does no harm
-    "SHCH": "\u0429"
-    "Shch": "\u0429"
     # this conversion shouldn't be needed, but does no harm
     "SH": "\u0428"
     "Sh": "\u0428"
     "S": "\u0421"
-    "shch": "\u0449"
     "sh": "\u0448"
     "s": "\u0441"
     # this conversion shouldn't be needed, but does no harm
-    "T\uFE20S\uFE21": "\u0426"
-    # this conversion shouldn't be needed, but does no harm
-    "T\uFE20s\uFE21": "\u0426"
     "T": "\u0422"
-    "t\uFE20s\uFE21": "\u0446"
     "t": "\u0442"
     "U": "\u0423"
     "u": "\u0443"

+ 40 - 0
transliterator/tables/data/belorusian.yml

@@ -2,6 +2,46 @@ general:
   name: Belorusian
   inherits: _cyrillic_base
 
+roman_to_script:
+  map:
+    "H": "\u0413"
+    "h": "\u0433"
+    "G": "\u0490"
+    "g": "\u0491"
+    "E\u0307": "\u042D"
+    "e\u0307": "\u044D"
+    "e": "\u0435"
+    "Z\uFE20H\uFE21": "\u0416"
+    # this conversion shouldn't be needed, but does no harm
+    "Z\uFE20h\uFE21": "\u0416"
+    "z\uFE20h\uFE21": "\u0436"
+    "z": "\u0437"
+    "I\uFE20O\uFE21": "\u0401"
+    # this conversion shouldn't be needed, but does no harm
+    "I\uFE20o\uFE21": "\u0401"
+    "I": "\u0406"
+    "i\uFE20o\uFE21": "\u0451"
+    "i": "\u0456"
+    # this conversion shouldn't be needed, but does no harm
+    # Obsolete form.
+    #"SHCH": "\u0429"
+    # Obsolete form.
+    #"Shch": "\u0429"
+    "SHCH": "\u0428\u0447"
+    "Shch": "\u0428\u0447"
+    # Obsolete form.
+    # "shch": "\u0449"
+    "shch": "\u0448\u0447"
+    # this conversion shouldn't be needed, but does no harm
+    "TS": "\u0426"
+    "Ts": "\u0426"
+    "ts": "\u0446"
+    "U\u0306": "\u040E"
+    "u\u0306": "\u045E"
+    "Y": "\u042B"
+    "y": "\u044B"
+    "\u0027": "\u044C"
+
 script_to_roman:
   map:
     "\u0401": "I\uFE20O\uFE21"

+ 56 - 0
transliterator/tables/data/russian.yml

@@ -2,6 +2,62 @@ general:
   name: Russian
   inherits: _cyrillic_base
 
+roman_to_script:
+  map:
+    "G": "\u0413"
+    "g": "\u0433"
+    "E\u0307": "\u042D"
+    "E\u0308": "\u0401"
+    # conversion results in a non-MARC-8 character
+    "E\u0328": "\u0466"
+    "E": "\u0415"
+    "e\u0307": "\u044D"
+    "e\u0308": "\u0451"
+    # conversion results in a non-MARC-8 character
+    "e\u0328": "\u0467"
+    # this conversion shouldn't be needed, but does no harm
+    "ZH": "\u0416"
+    "Zh": "\u0416"
+    "Z": "\u0417"
+    "zh": "\u0436"
+    "I\u0304": "\u0406"
+    "I\uFE20E\uFE21": "\u0462"
+    # this conversion shouldn't be needed, but does no harm
+    "I\uFE20e\uFE21": "\u0462"
+    "I": "\u0418"
+    "i\u0304": "\u0456"
+    "i\uFE20e\uFE21": "\u0463"
+    "i": "\u0438"
+    # conversion results in a non-MARC-8 character
+    "O\u0307": "\u04E8"
+    # conversion results in a non-MARC-8 character
+    "o\u0307": "\u04E9"
+    # this conversion shouldn't be needed, but does no harm
+    "SHCH": "\u0429"
+    "Shch": "\u0429"
+    "shch": "\u0449"
+    "T\uFE20S\uFE21": "\u0426"
+    # this conversion shouldn't be needed, but does no harm
+    "T\uFE20s\uFE21": "\u0426"
+    "t\uFE20s\uFE21": "\u0446"
+    # conversion results in a non-MARC-8 character
+    "U\u0307": "\u04AE"
+    # conversion results in a non-MARC-8 character
+    "u\u0307": "\u04AF"
+    "F\u0307": "\u0472"
+    "f\u0307": "\u0473"
+    # conversion results in a non-MARC-8 character
+    "H\u0307": "\u04BA"
+    # conversion results in a non-MARC-8 character
+    "h\u0307": "\u04BB"
+    "Y\u0307": "\u0474"
+    "Y": "\u042B"
+    "y\u0307": "\u0475"
+    "y": "\u044B"
+    "\u0027": "\u044C"
+    # this conversion is ambiguous - \u044C is also theoretically possible
+    "\u02BA": "\u044A"
+
 script_to_roman:
   map:
     "\u0401": "E\u0308"

+ 36 - 0
transliterator/tables/data/ukrainian.yml

@@ -2,6 +2,42 @@ general:
   name: Ukrainian
   inherits: _cyrillic_base
 
+roman_to_script:
+  map:
+    "H": "\u0413"
+    "h": "\u0433"
+    "G": "\u0490"
+    "g": "\u0491"
+    "E": "\u0415"
+    "Z\uFE20H\uFE21": "\u0416"
+    # this conversion shouldn't be needed, but does no harm
+    "Z\uFE20h\uFE21": "\u0416"
+    "Z": "\u0417"
+    "z\uFE20h\uFE21": "\u0436"
+    "Y": "\u0418"
+    "y": "\u0438"
+    "I\u0308": "\u0407"
+    "I\uFE20E\uFE21": "\u0404"
+    # this conversion shouldn't be needed, but does no harm
+    "I\uFE20e\uFE21": "\u0404"
+    "I\uFE20O\uFE21": "\u0401"
+    # this conversion shouldn't be needed, but does no harm
+    "I\uFE20o\uFE21": "\u0401"
+    "I": "\u0406"
+    "i\u0308": "\u0457"
+    "i\uFE20e\uFE21": "\u0454"
+    "i\uFE20o\uFE21": "\u0451"
+    "i": "\u0456"
+    # this conversion shouldn't be needed, but does no harm
+    "SHCH": "\u0429"
+    "Shch": "\u0429"
+    "shch": "\u0449"
+    # this conversion shouldn't be needed, but does no harm
+    "T\uFE20S\uFE21": "\u0426"
+    # this conversion shouldn't be needed, but does no harm
+    "T\uFE20s\uFE21": "\u0426"
+    "t\uFE20s\uFE21": "\u0446"
+
 script_to_roman:
   map:
     "\u0401": "I\uFE20O\uFE21"