Browse Source

Greek: more mapping fixes.

scossu 1 year ago
parent
commit
1344f083e9
2 changed files with 86 additions and 47 deletions
  1. 85 46
      scriptshifter/tables/data/greek_classical.yml
  2. 1 1
      tests/data/script_samples/greek.csv

+ 85 - 46
scriptshifter/tables/data/greek_classical.yml

@@ -14,6 +14,7 @@ script_to_roman:
   normalize:
     # Alpha
     "\u03B1":
+      - "\u03AC"
       - "\u1F00"
       - "\u1F02"
       - "\u1F04"
@@ -30,7 +31,6 @@ script_to_roman:
       - "\u1FB3"
       - "\u1FB4"
       - "\u1FB7"
-      - "\u1FBC"
     "\u0391":
       - "\u1F08"
       - "\u1F0A"
@@ -41,6 +41,7 @@ script_to_roman:
       - "\u1F8E"
       - "\u1FB8"
       - "\u1FB9"
+      - "\u1FBC"
     # Rough alpha
     "\u1F01":
       - "\u03B1\u0314"
@@ -62,6 +63,7 @@ script_to_roman:
       - "\u1F0F"
     # Epsilon
     "\u03B5":
+      - "\u03AD"
       - "\u1F10"
       - "\u1F12"
       - "\u1F14"
@@ -82,6 +84,7 @@ script_to_roman:
       - "\u1F1D"
     # Eta
     "\u03B7":
+      - "\u03AE"
       - "\u1F20"
       - "\u1F22"
       - "\u1F24"
@@ -92,6 +95,11 @@ script_to_roman:
       - "\u1F92"
       - "\u1F94"
       - "\u1F96"
+      - "\u1FC2"
+      - "\u1FC3"
+      - "\u1FC4"
+      - "\u1FC6"
+      - "\u1FC7"
     "\u0397":
       - "\u1F28"
       - "\u1F2A"
@@ -101,10 +109,6 @@ script_to_roman:
       - "\u1F9A"
       - "\u1F9C"
       - "\u1F9E"
-      - "\u1FC2"
-      - "\u1FC3"
-      - "\u1FC4"
-      - "\u1FC7"
       - "\u1FCC"
     # Rough eta
     "\u1F21":
@@ -128,6 +132,7 @@ script_to_roman:
     # Iota
     "\u03B9":
       - "\u0390"
+      - "\u03AF"
       - "\u03CA"
       - "\u1F30"
       - "\u1F32"
@@ -139,6 +144,7 @@ script_to_roman:
       - "\u1FD1"
       - "\u1FD2"
       - "\u1FD3"
+      - "\u1FD6"
       - "\u1FD7"
     "\u0399":
       - "\u03AA"
@@ -162,6 +168,7 @@ script_to_roman:
         # ὶ
     # Omicron
     "\u03BF":
+      - "\u03cc"
       - "\u1F40"
       - "\u1F42"
       - "\u1F44"
@@ -193,6 +200,7 @@ script_to_roman:
     "\u03C5":
       - "\u03B0"
       - "\u03CB"
+      - "\u03CD"
       - "\u1F50"
       - "\u1F52"
       - "\u1F54"
@@ -223,6 +231,7 @@ script_to_roman:
       - "\u1F5F"
     # Omega
     "\u03C9":
+      - "\u03CE"
       - "\u1F60"
       - "\u1F62"
       - "\u1F64"
@@ -236,6 +245,7 @@ script_to_roman:
       - "\u1FF2"
       - "\u1FF3"
       - "\u1FF4"
+      - "\u1FF6"
       - "\u1FF7"
     "\u03A9":
       - "\u1F68"
@@ -290,7 +300,7 @@ script_to_roman:
     "\u201C": "\"\u0332"
     "\u201D": "\"\u0333"
     "\u2018": "'\u0332"
-    "\u2019": "'\u0333"
+    #"\u2019": "'\u0333"
     "\u2116": "No\u0332"
     # "\u0300": ""
     # "\u0301": ""
@@ -319,12 +329,13 @@ script_to_roman:
     "\u0384": " \u0301"
     "\u0385": " \u0308\u0301"
     "\u0386\u0314": "Ha\u0301"
+    "\u0386\u1F31": "Hai"
     "\u0386": "A\u0301"
     "\u0387": ";\u0333"
     "\u0388\u0314": "He\u0301"
     "\u0388": "E\u0301"
-    "\u0389\u0314": "E\u0304\u0301"
-    "\u0389": "E\u0304\u0301"
+    "\u0389\u0314": "\u0112\u0301"
+    "\u0389": "\u0112\u0301"
     "\u038A\u0314": "Hi\u0301"
     "\u038A": "I\u0301"
     # \u038B reserved
@@ -334,7 +345,7 @@ script_to_roman:
     "\u038E\u0314": "Hy\u0301"
     "\u038E": "Y\u0301"
     "\u038F\u0314": "Ho\u0301"
-    "\u038F": "O\u0304\u0301"
+    "\u038F": "\u014C\u0301"
     "\u0390": "i\u0308\u0301"
     "\u1F09": "Ha"
     "\u0391\u03C5": "Au"
@@ -346,8 +357,10 @@ script_to_roman:
     "\u0395\u03C5": "Eu"
     "\u0395": "E"
     "\u0396": "Z"
-    "\u1F29": "He\u0304"
-    "\u0397": "E\u0304"
+    "\u1F29": "H\u0113"
+    "\u0397": "\u0112"
+    "\u0397\u03C5": "\u0112u"
+    "\u0397\u1F51": "H\u0113u"
     "\u0398": "Th"
     "\u1F39": "Hi"
     "\u0399\u03C5": "Iu"
@@ -370,23 +383,28 @@ script_to_roman:
     "\u03A4": "T"
     "\u1F59": "Hy"
     "\u03A5": "Y"
+    "\u03A5\u03B9": "Ui"
+    "\u03A5\u1F31": "Hui"
     "\u03A6": "Ph"
     "\u03A7": "Ch"
     "\u03A8": "Ps"
-    "\u1F69": "Ho\u0304"
-    "\u03A9": "O\u0304"
+    "\u1F69": "H\u014D"
+    "\u03A9": "\u014C"
+    "\u03A9\u03C5": "\u014Cu"
     "\u03AA": "I\u0308"
     "\u03AB": "Y\u0308"
     "\u03AC\u0314": "ha\u0301"
     "\u03AC": "a\u0301"
     "\u03ADU": "he\u0301"
     "\u03AD": "e\u0301"
-    "\u03AE\u0314": "he\u0304\u0301"
-    "\u03AE": "e\u0304\u0301"
+    "\u03AE\u0314": "h\u0113\u0301"
+    "\u03AE": "\u0113\u0301"
     "\u03AF\u0314": "hi\u0301"
     "\u03AF": "i\u0301"
     "\u03B0": "y\u0308\u0301"
     "\u1F01": "ha"
+    "\u1F01\u1F31": "hai"
+    "\u1F01\u03C5": "hau"
     "\u03B1\u03C5": "au"
     "\u03B1": "a"
     "\u03B2": "b"
@@ -398,10 +416,13 @@ script_to_roman:
     "\u03B4": "d"
     "\u1F11": "he"
     "\u03B5\u03C5": "eu"
+    "\u03B5\u1F51": "heu"
     "\u03B5": "e"
     "\u03B6": "z"
-    "\u1F21": "he\u0304"
-    "\u03B7": "e\u0304"
+    "\u1F21": "h\u0113"
+    "\u03B7": "\u0113"
+    "\u03B7\u03C5": "\u0113u"
+    "\u03B7\u1F51": "h\u0113u"
     "\u03B8": "th"
     "\u1F31": "hi"
     "\u03B9\u03C5": "iu"
@@ -424,16 +445,19 @@ script_to_roman:
     "\u03C4": "t"
     "\u1F51": "hy"
     "\u03C5": "y"
+    "\u03C5\u03B9": "ui"
+    "\u03C5\u1F31": "hui"
     "\u03C6": "ph"
     "\u03C7": "ch"
     "\u03C8": "ps"
-    "\u1F61": "ho\u0304"
-    "\u03C9": "o\u0304"
+    "\u1F61": "h\u014D"
+    "\u03C9": "\u014D"
+    "\u03C9\u03C5": "\u014Du"
     "\u03CA": "i\u0308"
     "\u03CB": "y\u0308"
     "\u03CC": "o\u0301"
     "\u03CD": "y\u0301"
-    "\u03CE": "o\u0304\u0301"
+    "\u03CE": "\u014D\u0301"
     "\u03CF": "K\u0326"
     "\u03D0": "b\u0333"
     "\u03D1": "t\u0333h\u0333"
@@ -443,12 +467,14 @@ script_to_roman:
     "\u03D5": "p\u0333h\u0333"
     "\u03D6": "p\u0333"
     "\u03D7": "k\u0326"
-    "\u03D8": "Q"
-    "\u03D9": "q"
+    "\u03D8": ""
+    "\u03D9": ""
     "\u03DA": "6\u0333"
     "\u03DB": "6\u0332"
-    "\u03DC": "G\u0332"
-    "\u03DD": "g\u0332"
+    #"\u03DC": "G\u0332"
+    "\u03DC": "W"
+    #"\u03DD": "g\u0332"
+    "\u03DD": "w"
     "\u03DE": "K\u0324"
     "\u03DF": "k\u0324"
     "\u03E0": "s\uFE20s\uFE21"
@@ -469,14 +495,14 @@ script_to_roman:
     "\u03EF": "t\u0323i"
     "\u03F0": "k\u0332"
     "\u03F1": "r\u0332"
-    "\u03F2": "s\u0332"
+    "\u03F2": "s"
     "\u03F3": "j"
     "\u03F4": "T\u0333H\u0333"
     "\u03F5": "e\u0332"
     "\u03F6": "e\u0333"
     "\u03F7": "S\uFE20H\uFE21"
     "\u03F8": "s\uFE20h\uFE21"
-    "\u03F9": "S\u0332"
+    "\u03F9": "S"
     "\u03FA": "S\u0323"
     "\u03FB": "s\u0323"
     "\u03FC": "r\u0333"
@@ -520,28 +546,35 @@ script_to_roman:
       "eu": "\u03B5\u03C5"
       "E\u0301": "\u0388"
       "e\u0301": "\u03AD"
-      "E\u0304\u0301": "\u0389\u0314"
-      "e\u0304\u0301": "\u03AE"
-      "E\u0304\u0301": "\u0389"
-      "E\u0304": "\u0397"
-      "e\u0304": "\u03B7"
+      "\u0112\u0301": "\u0389\u0314"
+      "\u0113\u0301": "\u03AE"
+      "\u0112\u0301": "\u0389"
+      "\u0112": "\u0397"
+      "\u0112u": "\u0397\u03C5"
+      "\u0113": "\u03B7"
+      "\u0113u": "\u03B7\u03C5"
+      "h\u0113u": "\u03B7\u1F51"
       "e\u0332": "\u03F5"
       "e\u0333": "\u03F6"
       "F": "\u03E4"
       "f": "\u03E5"
-      "G\u0332": "\u03DC"
-      "g\u0332": "\u03DD"
+      #"G\u0332": "\u03DC"
+      "W": "\u03DC"
+      #"g\u0332": "\u03DD"
+      "w": "\u03DD"
       "Ha\u0301": "\u0386\u0314"
       "ha\u0301": "\u03AC\u0314"
       "Ha": "\u1F09"
       "ha": "\u03B1\u0314"
       "A": "\u0391"
       "a": "\u03B1"
-      "he\u0304\u0301": "\u03AE\u0314"
+      "h\u0113\u0301": "\u03AE\u0314"
       "He\u0301": "\u0388\u0314"
       "he\u0301": "\u03AD\u0314"
-      "He\u0304": "\u1F29"
-      "he\u0304": "\u1F21"
+      "H\u0113": "\u1F29"
+      "H\u0113u": "\u1F29\u03C5"
+      "h\u0113": "\u1F21"
+      "h\u0113u": "\u1F21\u13C5"
       "He": "\u1F19"
       "he": "\u1F11"
       "E": "\u0395"
@@ -552,8 +585,8 @@ script_to_roman:
       "hi": "\u1F31"
       "Ho\u0301": "\u038F\u0314"
       "Ho\u0301": "\u038C\u0314"
-      "ho\u0304": "\u1F61"
-      "Ho\u0304": "\u1F69"
+      "h\u014D": "\u1F61"
+      "H\u014D": "\u1F69"
       "Ho": "\u1F49"
       "ho": "\u1F41"
       "H\u0307": "\u03E8"
@@ -600,10 +633,12 @@ script_to_roman:
       "ou": "\u03BF\u03C5"
       "O\u0301": "\u038C"
       "o\u0301": "\u03CC"
-      "O\u0304\u0301": "\u038F"
-      "o\u0304\u0301": "\u03CE"
-      "O\u0304": "\u03A9"
-      "o\u0304": "\u03C9"
+      "\u014C\u0301": "\u038F"
+      "\u014D\u0301": "\u03CE"
+      "\u014C": "\u03A9"
+      "\u014Cu": "\u03A9\u03C5"
+      "\u014D": "\u03C9"
+      "\u014Du": "\u03D9\u03C5"
       "O": "\u039F"
       "o": "\u03BF"
       "Ph": "\u03A6"
@@ -614,8 +649,8 @@ script_to_roman:
       "p\u0333": "\u03D6"
       "P": "\u03A0"
       "p": "\u03C0"
-      "Q": "\u03D8"
-      "q": "\u03D9"
+      "": "\u03D8"
+      "": "\u03D9"
       "Rh": "\u1FEC"
       "rh": "\u1FE5"
       "r\u0332": "\u03F1"
@@ -632,8 +667,8 @@ script_to_roman:
       "s\u030C": "\u03E3"
       "S\u0323": "\u03FA"
       "s\u0323": "\u03FB"
-      "S\u0332": "\u03F9"
-      "s\u0332": "\u03F2"
+      "S": "\u03F9"
+      "s": "\u03F2"
       "S": "\u03A3"
       "%s": "\u03C2"
       "s": "\u03C3"
@@ -668,6 +703,10 @@ script_to_roman:
       "y\u0308": "\u03CB"
       "Y\u0333": "\u03D2"
       "Y": "\u03A5"
+      "Ui": "\u03A5\u03B9"
+      "Hui": "\u03A5\u1F31"
       "y": "\u03C5"
+      "ui": "\u03C5\u03B9"
+      "hui": "\u03C5\u1F31"
       "Z": "\u0396"
       "z": "\u03B6"

+ 1 - 1
tests/data/script_samples/greek.csv

@@ -20,7 +20,7 @@ greek_classical,Ἐγχειρίδιον ἁρμονικῆς,Encheiridion harmon
 greek_classical,ἄλαϲτα δὲ ϝέργα πάθον κακὰ μηϲαμένοι,alasta de werga pathon kaka mēsamenoi,,
 greek_classical,Δαμαρέτα τ’ ἐρατά τε Ϝιανθεμίϲ,Damareta t’ erata te Wianthemis,,
 greek_classical,ξένϝος,xenwos,,
-greek_classical,Πάτροϙλος,Patrolos,,
+greek_classical,Πάτροϙλος,Patrolos,,
 greek_modern,"Ἐτήσια ἔκθεσις / Κυπριακὴ Δημοκρατία, Ὑπουργεῖον Ἐργασίας καὶ Κοινωνικῶν Ἀσφαλίσεων","Etēsia ekthesis / Kypriakē Dēmokratia, Hypourgeion Ergasias kai Koinōnikōn Asphaliseōn",,
 greek_modern,"Ετήσια έκθεση / Κυπριακή Δημοκρατία, Υπουργείο Εργασίας και Κοινωνικών Ασφαλίσεων","Etēsia ekthesē / Kypriakē Dēmokratia, Hypourgeio Ergasias kai Koinōnikōn Asphaliseōn",,
 greek_modern,Ελληνικό Ίδρυμα Ευρωπαϊκής και Εξωτερικής Πολιτικής,Hellēniko Hidryma Eurōpaikēs kai Exōterikēs Politikēs,,