Jelajahi Sumber

Fix EOF numerals parsing; enable modern Greek tests.

scossu 1 tahun lalu
induk
melakukan
27e3609169

+ 2 - 1
scriptshifter/hooks/greek/__init__.py

@@ -134,7 +134,8 @@ def parse_numeral(ctx):
                                 f"{cur} is not a valid digit character "
                                 f"at place #{4 - i} in a numeral.")
 
-                    return  # Continue normal parsing.
+                    ctx.cur += 1
+                    return CONT  # Continue normal parsing.
 
         ctx.cur += 1
         return CONT

+ 0 - 134
scriptshifter/tables/data/greek_ancient.yml

@@ -1,134 +0,0 @@
-general:
-  name: Greek (ancient and medieval)
-  notes:
-    - Compiled based on https://www.loc.gov/catdir/cpso/romanization/greek.pdf
-    - >
-      "The document notes: \"The h is supplied as appropriate when the rough
-        breathing does not appear in the Greek text (for example, when the text
-        is in all capitals, or, in Modern Greek, when the text is in monotonic
-        orthography).\" This is not implemented as it would assume that a
-        specific word be known to have a rough breathing."
-
-  parents:
-    - _ignore_base
-
-script_to_roman:
-  hooks:
-    post_config:
-      -
-        - greek.normalize_diacritics
-  map:
-    "Α": "A"
-    "Αυ": "Au"
-    "Β": "B"
-    "Γ": "G"
-    "Γκ": "Gk"
-    "Δ": "D"
-    "Ε": "E"
-    "Ευ": "Eu"
-    "Ζ": "Z"
-    "Η": "Ē"
-    "Ηυ": "Ēu"
-    "Θ": "Th"
-    "Ι": "I"
-    "Κ": "K"
-    "Λ": "L"
-    "Μ": "M"
-    "Μπ": "B"
-    "Ν": "N"
-    "Ντ": "D̲"
-    "Ξ": "X"
-    "Ο": "O"
-    "Ου": "Ou"
-    "Π": "P"
-    "Ρ": "R"
-    "Ῥ": "Rh"
-    "Σ": "S"
-    "C": "S"
-    "Τ": "T"
-    "Υ": "Y"
-    "Υι": "Ui"
-    "Φ": "Ph"
-    "Χ": "Ch"
-    "Ψ": "Ps"
-    "Ω": "Ō"
-    "Ωυ": "Ōu"
-    "α": "a"
-    "αυ": "au"
-    "β": "b"
-    "γ": "g"
-    "γγ": "ng"
-    "γκ": "gk"
-    "γξ": "nx"
-    "γχ": "nch"
-    "δ": "d"
-    "ε": "e"
-    "ευ": "eu"
-    "ζ": "z"
-    "η": "ē"
-    "ηυ": "ēu"
-    "θ": "th"
-    "ι": "i"
-    "κ": "k"
-    "λ": "l"
-    "μ": "m"
-    "μπ": "b"  # initially
-    "μπ": "mp" # medially & finally TODO hook
-    "ν": "n"
-    "ντ": "d̲"  # initially
-    "ντ": "nt" # medially & finally TODO hook
-    "ξ": "x"
-    "ο": "o"
-    "ου": "ou"
-    "π": "p"
-    "ρ": "r"
-    "ῥ": "rh"
-    "σ": "s"
-    "ϲ": "s"
-    "ς": "s"
-    "τ": "t"
-    "υ": "y"
-    "υι": "ui"
-    "φ": "ph"
-    "χ": "ch"
-    "ψ": "ps"
-    "ω": "ō"
-    "ωυ": "ōu"
-
-    # Dasia
-    "ἁ": "ha"
-    "ἁυ": "hau"
-    "Ἁ": "Ha"
-    "Ἁυ": "Hau"
-    "ἑ": "he"
-    "ἑυ": "heu"
-    "Ἑ": "He"
-    "Ἑυ": "Heu"
-    "ἡ": "hē"
-    "ἡυ": "hēu"
-    "Ἡ": "Hē"
-    "Ἡυ": "Hēu"
-    "ἱ": "hi"
-    "Ἱ": "Hi"
-    "ὁ": "ho"
-    "ὁυ": "Hou"
-    "Ὁ": "Ho"
-    "Ὁυ": "Hou"
-    "ὑ": "hy"
-    "ὑι": "hui"
-    "Ὑ": "Hy"
-    "Ὑι": "Hui"
-    "ὡ": "hō"
-    "ὡυ": "hōu"
-    "Ὡ": "Hō"
-    "Ὡυ": "Hōu"
-
-    # Archaic symbols
-    "Ϝ": "W"
-    "Ϙ": "Ḳ"
-    "ϝ": "w"
-    "ϙ": "ḳ"
-
-    # Punctuation
-    "·": ";"
-    ";": "?"

+ 11 - 18
scriptshifter/tables/data/greek_classical.yml

@@ -1,8 +1,7 @@
 general:
-  name: Greek (ancient and medieval)
+  name: Classical Greek (ancient and medieval)
   notes:
     - Compiled based on https://www.loc.gov/catdir/cpso/romanization/greek.pdf
-
   parents:
     - _ignore_base
 
@@ -15,7 +14,6 @@ script_to_roman:
   normalize:
     # Alpha
     "\u03B1":
-      - "\u03AC"
       - "\u1F00"
       - "\u1F02"
       - "\u1F04"
@@ -23,7 +21,6 @@ script_to_roman:
       - "\u1F70"
       - "\u1F71"
     "\u0391":
-      - "\u0386"
       - "\u1F08"
       - "\u1F0A"
       - "\u1F0C"
@@ -40,14 +37,12 @@ script_to_roman:
       - "\u1F0F"
     # Epsilon
     "\u03B5":
-      - "\u03AD"
       - "\u1F10"
       - "\u1F12"
       - "\u1F14"
       - "\u1F72"
       - "\u1F73"
     "\u0395":
-      - "\u0388"
       - "\u1F18"
       - "\u1F1A"
       - "\u1F1C"
@@ -62,7 +57,6 @@ script_to_roman:
       - "\u1F1D"
     # Eta
     "\u03B7":
-      - "\u03AE"
       - "\u1F20"
       - "\u1F22"
       - "\u1F24"
@@ -70,7 +64,6 @@ script_to_roman:
       - "\u1F74"
       - "\u1F75"
     "\u0397":
-      - "\u0389"
       - "\u1F28"
       - "\u1F2A"
       - "\u1F2C"
@@ -88,7 +81,6 @@ script_to_roman:
       - "\u1F2F"
     # Iota
     "\u03B9":
-      - "\u03AF"
       - "\u1F30"
       - "\u1F32"
       - "\u1F34"
@@ -96,7 +88,6 @@ script_to_roman:
       - "\u1F76"
       - "\u1F77"
     "\u0399":
-      - "\u038A"
       - "\u1F38"
       - "\u1F3A"
       - "\u1F3C"
@@ -115,14 +106,12 @@ script_to_roman:
         # ὶ
     # Omicron
     "\u03BF":
-      - "\u03CC"
       - "\u1F40"
       - "\u1F42"
       - "\u1F44"
       - "\u1F78"
       - "\u1F79"
     "\u039F":
-      - "\u038C"
       - "\u1F48"
       - "\u1F4A"
       - "\u1F4C"
@@ -135,6 +124,15 @@ script_to_roman:
       - "\u1F49"
       - "\u1F4B"
       - "\u1F4D"
+    # Rho
+    "\u03C1":
+      - "\u1FE4"
+    "\u03C1\u0314":
+      - "\u1FE5"
+    # Rough Rho
+    "\u03A1\u0314":
+      - "\u1FEC"
+
     # Upsilon
     "\u03C5":
       - "\u03CD"
@@ -145,8 +143,7 @@ script_to_roman:
       - "\u1FE6"
       - "\u1F7A"
       - "\u1F7B"
-    "\u03A5":
-      - "\u038E"
+    #"\u03A5":
         # NOTE: Capital upsilon + psili seems to be absent from Unicode table.
     # Rough Upsilon
     "\u03C5\u0314":
@@ -161,7 +158,6 @@ script_to_roman:
       - "\u1F5F"
     # Omega
     "\u03C9":
-      - "\u03CE"
       - "\u1F60"
       - "\u1F62"
       - "\u1F64"
@@ -169,7 +165,6 @@ script_to_roman:
       - "\u1F7C"
       - "\u1F7D"
     "\u03A9":
-      - "\u038F"
       - "\u1F68"
       - "\u1F6A"
       - "\u1F6C"
@@ -191,10 +186,8 @@ script_to_roman:
     "":
       - "\u0342"
       - "\u0343"
-      - "\u0344"
       - "\u0345"
       - "\u037A"
-      - "\u0384"
       - "\u1FBD"
       - "\u1FBE"
       - "\u1FBF"

+ 39 - 0
scriptshifter/tables/data/greek_modern.yml

@@ -4,6 +4,45 @@ general:
     - greek_classical
 
 script_to_roman:
+  hooks:
+    begin_input_token:
+      -
+        - greek.parse_numeral
+
+  normalize:
+    "\u03B1":
+      - "\u03AC"
+    "\u0391":
+      - "\u0386"
+    "\u03B5":
+      - "\u03AD"
+    "\u0395":
+      - "\u0388"
+    "\u03B7":
+      - "\u03AE"
+    "\u0397":
+      - "\u0389"
+    "\u03B9":
+      - "\u03AF"
+    "\u0399":
+      - "\u038A"
+    "\u03BF":
+      - "\u03CC"
+    "\u039F":
+      - "\u038C"
+    "\u03C5":
+      - "\u03CD"
+    "\u03A5":
+      - "\u038E"
+    "\u03C9":
+      - "\u03CE"
+    "\u03A9":
+      - "\u038F"
+    "":
+      - "\u0344"
+      - "\u0384"
+      - "\u0385"
+
   map:
     "\u0392": "V"
     "\u03B2": "v"

+ 34 - 34
tests/data/script_samples/greek.csv

@@ -11,44 +11,44 @@ greek_classical,Λητοῦς καὶ Διὸς υἱός,Lētous kai Dios huios
 greek_classical,ὑϊκὸν πάσχειν,hyikon paschein,,
 greek_classical,εἶπε πρὸς τὸν ἄνδρα τὸν ἑωυτῆς,eipe pros ton andra ton heōutēs,,
 greek_classical,τί τοῦδ’ ἂν εὕρημ’ ηὗρον εὐτυχέστερον;,ti toud’ an heurēm’ hēuron eutychesteron,,
-greek_classical,Τοῦ Κατὰ πασῶν αἱρέσεων ἐλέγχου βιβλίον α,Tou Kata pasōn haireseōn elenchou biblion 1,,
+greek_classical,Τοῦ Κατὰ πασῶν αἱρέσεων ἐλέγχου βιβλίον αʹ,Tou Kata pasōn haireseōn elenchou biblion 1,,
 greek_classical,καλὸν κἀγαθόν,kalon kagathon,,
 greek_classical,ᾤχοντο θοἰμάτιον λαβόντες μου,ōchonto thoimation labontes mou,,
 greek_classical,Περὶ ἰλίγγων,Peri ilingōn,,
 greek_classical,ὅτε τ’ ἴαχε σάλπιγξ,hote t’ iache salpinx,,
 greek_classical,Ἐγχειρίδιον ἁρμονικῆς,Encheiridion harmonikēs,,
-greek_classical,ἄλαϲτα δὲ ϝέργα πάθον κακὰ μηϲαμένο,alasta de werga pathon kaka mēsamenoi,,
-greek_classical,Δαμαρέτα τ’ ἐρατά τε Ϝιανθεμί,Damareta t’ erata te Wianthemis,,
+greek_classical,ἄλαϲτα δὲ ϝέργα πάθον κακὰ μηϲαμένοι,alasta de werga pathon kaka mēsamenoi,,
+greek_classical,Δαμαρέτα τ’ ἐρατά τε Ϝιανθεμίϲ,Damareta t’ erata te Wianthemis,,
 greek_classical,ξένϝος,xenwos,,
 greek_classical,Πάτροϙλος,Patroḳlos,,
-,"Ἐτήσια ἔκθεσις / Κυπριακὴ Δημοκρατία, Ὑπουργεῖον Ἐργασίας καὶ Κοινωνικῶν Ἀσφαλίσεων","Etēsia ekthesis / Kypriakē Dēmokratia, Hypourgeion Ergasias kai Koinōnikōn Asphaliseōn",,
-,"Ετήσια έκθεση / Κυπριακή Δημοκρατία, Υπουργείο Εργασίας και Κοινωνικών Ασφαλίσεων","Etēsia ekthesē / Kypriakē Dēmokratia, Hypourgeio Ergasias kai Koinōnikōn Asphaliseōn",,
-,Ελληνικό Ίδρυμα Ευρωπαϊκής και Εξωτερικής Πολιτικής,Hellēniko Hidryma Eurōpaikēs kai Exōterikēs Politikēs,,
-,Ελευθέριος Δ. Παυλίδης,Eleutherios D. Paulidēs,,
-,Ορθόδοξος Αυτοκέφαλος Εκκλησία της Αλβανίας,Orthodoxos Autokephalos Ekklēsia tēs Alvanias,,
-,Βίος και πολιτεία του Αλέξη Ζορμπά,Vios kai politeia tou Alexē Zormpa,,
-,Βίος καὶ πολιτεία τοῦ Ἀλέξη Ζορμπᾶ,Vios kai politeia tou Alexē Zormpa,,
-,Λασκαρίνα Μπουμπουλίνα,Laskarina Boumpoulina,,
-,Νταίηβιντ Μίτσελ,D̲aiēvint Mitsel,,
-,Τζαίημς Τζόυς,Tzaiēms Tzoys,,
-,Ἡ κοινωνιολογία τοῦ ρεμπέτικου,Hē koinōniologia tou rempetikou,,
-,Βίλλυ Μπραντ,Villy Brant,,
-,Μπραντ Πιτ,Brant Pit,,
-,Γιάκομπ Φίλιπ Φαλμεράυερ,Giakomp Philip Phalmerayer,,
-,Σαρλ Ογκουστίν ντε Κουλόμπ,Sarl Onkoustin de Koulomp,,
-,Λαμπέρτο Ντίνι,Lamperto D̲ini,,
-,Τζωρτζ Χέρμπερτ Ουώκερ Μπους,Tzōrtz Chermpert Ouōker Bous,,
-,Ουίνστων Τσώρτσιλ,Ouinstōn Tsōrtsil,,
-,Παγκόσμιο Κέντρο Εμπορίου,Pankosmio Kentro Emporiou,,
-,Φαίδων Γκιζίκης,Phaidōn Gkizikēs,,
-,Γκέτεμποργκ,Gketemporgk,,
-,Ουάσιγκτον,Ouasinkton,,
-,Ουάσινγκτον,Ouasinnkton,,
-,Αεροδρόμιο Ρόναλντ Ρέιγκαν της Ουάσινγκτον,Aerodromio Ronalnt Reinkan tēs Ouasinnkton,,
-,Ντμίτρι Ιβάνοβιτς Μεντελέγιεφ,D̲mitri Ivanovits Mentelegieph,,
-,Άγγελος Σταύρου Βλάχος,Angelos Staurou Vlachos,,
-,ΟΔΗΓΟΣ ΜΑΡΚΕΤΙΝΓΚ ΕΛΛΑΔΟΣ / Ἑλληνικό Ἰνστιτοῦτο Μάρκετινγκ τῆς Ἑλληνικῆς Ἑταιρίας Διοικήσεως Ἐπιχειρήσεων,Hodēgos marketingk Hellados / Hellēniko Institouto Marketingk tēs Hellēnikēs Hetairias Dioikēseōs Epicheirēseōn,,
-,Σάλπιγξ Ἑλληνική,Salpinx Hellēnikē,,
-,Μπιντπάϋ,Bintpay,,
-,Η υιοθεσία ενηλίκων,Hē huiothesia enēlikōn,,
-,οι Άρπυιες,hoi Harpuies,,
+greek_modern,"Ἐτήσια ἔκθεσις / Κυπριακὴ Δημοκρατία, Ὑπουργεῖον Ἐργασίας καὶ Κοινωνικῶν Ἀσφαλίσεων","Etēsia ekthesis / Kypriakē Dēmokratia, Hypourgeion Ergasias kai Koinōnikōn Asphaliseōn",,
+greek_modern,"Ετήσια έκθεση / Κυπριακή Δημοκρατία, Υπουργείο Εργασίας και Κοινωνικών Ασφαλίσεων","Etēsia ekthesē / Kypriakē Dēmokratia, Hypourgeio Ergasias kai Koinōnikōn Asphaliseōn",,
+greek_modern,Ελληνικό Ίδρυμα Ευρωπαϊκής και Εξωτερικής Πολιτικής,Hellēniko Hidryma Eurōpaikēs kai Exōterikēs Politikēs,,
+greek_modern,Ελευθέριος Δ. Παυλίδης,Eleutherios D. Paulidēs,,
+greek_modern,Ορθόδοξος Αυτοκέφαλος Εκκλησία της Αλβανίας,Orthodoxos Autokephalos Ekklēsia tēs Alvanias,,
+greek_modern,Βίος και πολιτεία του Αλέξη Ζορμπά,Vios kai politeia tou Alexē Zormpa,,
+greek_modern,Βίος καὶ πολιτεία τοῦ Ἀλέξη Ζορμπᾶ,Vios kai politeia tou Alexē Zormpa,,
+greek_modern,Λασκαρίνα Μπουμπουλίνα,Laskarina Boumpoulina,,
+greek_modern,Νταίηβιντ Μίτσελ,D̲aiēvint Mitsel,,
+greek_modern,Τζαίημς Τζόυς,Tzaiēms Tzoys,,
+greek_modern,Ἡ κοινωνιολογία τοῦ ρεμπέτικου,Hē koinōniologia tou rempetikou,,
+greek_modern,Βίλλυ Μπραντ,Villy Brant,,
+greek_modern,Μπραντ Πιτ,Brant Pit,,
+greek_modern,Γιάκομπ Φίλιπ Φαλμεράυερ,Giakomp Philip Phalmerayer,,
+greek_modern,Σαρλ Ογκουστίν ντε Κουλόμπ,Sarl Onkoustin de Koulomp,,
+greek_modern,Λαμπέρτο Ντίνι,Lamperto D̲ini,,
+greek_modern,Τζωρτζ Χέρμπερτ Ουώκερ Μπους,Tzōrtz Chermpert Ouōker Bous,,
+greek_modern,Ουίνστων Τσώρτσιλ,Ouinstōn Tsōrtsil,,
+greek_modern,Παγκόσμιο Κέντρο Εμπορίου,Pankosmio Kentro Emporiou,,
+greek_modern,Φαίδων Γκιζίκης,Phaidōn Gkizikēs,,
+greek_modern,Γκέτεμποργκ,Gketemporgk,,
+greek_modern,Ουάσιγκτον,Ouasinkton,,
+greek_modern,Ουάσινγκτον,Ouasinnkton,,
+greek_modern,Αεροδρόμιο Ρόναλντ Ρέιγκαν της Ουάσινγκτον,Aerodromio Ronalnt Reinkan tēs Ouasinnkton,,
+greek_modern,Ντμίτρι Ιβάνοβιτς Μεντελέγιεφ,D̲mitri Ivanovits Mentelegieph,,
+greek_modern,Άγγελος Σταύρου Βλάχος,Angelos Staurou Vlachos,,
+greek_modern,ΟΔΗΓΟΣ ΜΑΡΚΕΤΙΝΓΚ ΕΛΛΑΔΟΣ / Ἑλληνικό Ἰνστιτοῦτο Μάρκετινγκ τῆς Ἑλληνικῆς Ἑταιρίας Διοικήσεως Ἐπιχειρήσεων,Hodēgos marketingk Hellados / Hellēniko Institouto Marketingk tēs Hellēnikēs Hetairias Dioikēseōs Epicheirēseōn,,
+greek_modern,Σάλπιγξ Ἑλληνική,Salpinx Hellēnikē,,
+greek_modern,Μπιντπάϋ,Bintpay,,
+greek_modern,Η υιοθεσία ενηλίκων,Hē huiothesia enēlikōn,,
+greek_modern,οι Άρπυιες,hoi Harpuies,,