Browse Source

Fix Greek numerals logic; add test strings.

scossu 11 months ago
parent
commit
7f1c33f8ef

+ 54 - 16
scriptshifter/hooks/greek/__init__.py

@@ -77,12 +77,17 @@ def parse_numeral(ctx):
     characters mixed with letter characters without a space. Therefore,
     "͵ακακαα" would transliterate "1021kaa", and "͵αακαα", "1001kaa".
     """
-    # Parse thousands.
+    # Parse ≥1000.
     if ctx.src[ctx.cur] == THOUSANDS_PREFIX:
         tk = ctx.src[ctx.cur + 1]
 
         try:
-            ctx.dest_ls.append(str(DIGITS[4][tk]))
+            # Exception for 2-letter digit.
+            if ctx.src[ctx.cur + 1: ctx.cur + 3] == "στ":
+                ctx.dest_ls.append(str(DIGITS[4]["στ"]))
+                ctx.cur += 1
+            else:
+                ctx.dest_ls.append(str(DIGITS[4][tk]))
             ctx.cur += 2
 
         except KeyError:
@@ -104,8 +109,13 @@ def parse_numeral(ctx):
                 ext[ext_cur] = str(DIGITS[3 - i][ctx.src[ctx.cur]])
                 ctx.cur += 1
             except KeyError:
-                # If the number char is not in the correct position, pad with 0
-                continue
+                # Exception for 2-letter digit.
+                if i == 2 and ctx.src[ctx.cur: ctx.cur + 2] == "στ":
+                    ext[ext_cur] = "6"
+                    ctx.cur += 2
+                else:
+                    # If the char is not in the correct position, pad with 0.
+                    continue
             finally:
                 ext_cur += 1
         ctx.dest_ls.extend(ext)
@@ -119,23 +129,51 @@ def parse_numeral(ctx):
     # transliterated characters.
     if ctx.src[ctx.cur] == NUM_SUFFIX:
         # Move back up to 3 positions.
-        for i in range(1, 4):
-            cur = ctx.cur - i
+        offset = 0  # Added offset if στ is found.
+        parsed = 0  # Parsed numeral to replace the alpha characters.
+        breakout = False  # Break out of i loop.
+
+        i = 1  # Current position in the numeral. 1 = units, 2 = tens, etc.
+        mark_pos = ctx.cur  # Mark this position to resume parsing later.
+        while i < 4:
+            if breakout:
+                break
+            cur = ctx.cur - i - offset
             if cur >= 0:
                 num_tk = ctx.src[cur]  # Number to be parsed
-                if ctx.src[cur] in DIGITS[i]:
-                    # Not yet reached word boundary.
-                    ctx.dest_ls[-i] = str(DIGITS[i][num_tk])
-                else:
-                    if ctx.src[cur] != " ":  # Word boundary.
-                        # Something's wrong.
+                # Exception for στ. Scan one character farther left.
+                if ctx.src[cur - 1:cur + 1] == "στ":
+                    num_tk = "στ"
+                    offset = 1
+                for j in range(i, 4):
+                    i = j
+                    if num_tk in DIGITS[j]:
+                        # Not yet reached word boundary.
+                        parsed += DIGITS[j][num_tk] * 10 ** (j - 1)
+                        break
+
+                    if num_tk == " " or cur == 0:  # Word boundary.
+                        breakout = True
+                        break
+
+                    # If we got here we tried all positions without finding a
+                    # match. Something's wrong.
+                    if j == 3:
+                        #     continue
                         ctx.warnings.append(
-                                f"Character `{ctx.src[cur] }` at position "
+                                f"Character `{num_tk}` at position "
                                 f"{cur} is not a valid digit character "
                                 f"at place #{4 - i} in a numeral.")
 
-                    ctx.cur += 1
-                    return CONT  # Continue normal parsing.
+                    # ctx.cur += 1 + offset
+                    # return CONT  # Continue normal parsing.
+            i += 1
+
+        if parsed > 0:
+            ctx.dest_ls = (
+                    ctx.dest_ls[:mark_pos - len(str(parsed)) - offset]
+                    + [str(parsed)])
+
+        ctx.cur = mark_pos + 1  # Skip past numeral suffix.
 
-        ctx.cur += 1
         return CONT

+ 2 - 0
scriptshifter/tables/data/greek_classical.yml

@@ -344,6 +344,7 @@ script_to_roman:
     "\u037C": "(."
     "\u037D": ".)"
     "\u037E": "?\u0333"
+    ";": "?"
     "\u037F": "J"
     # \u0380 reserved
     # \u0381 reserved
@@ -594,6 +595,7 @@ script_to_roman:
       ".)\u0333": "\u03FF"
       ".)": "\u037D"
       "?\u0333": "\u037E"
+      "?": "\u037E"
       "\"\u0332": "\u201C"
       "\"\u0333": "\u201D"
       "'\u0332": "\u2018"

+ 4 - 1
tests/data/script_samples/greek.csv

@@ -10,7 +10,7 @@ greek_classical,ἀΰπνους νύκτας ἴαυον,aypnous nyktas iauon,,
 greek_classical,Λητοῦς καὶ Διὸς υἱός,Lētous kai Dios huios,,
 greek_classical,ὑϊκὸν πάσχειν,hyikon paschein,,
 greek_classical,εἶπε πρὸς τὸν ἄνδρα τὸν ἑωυτῆς,eipe pros ton andra ton heōutēs,,
-greek_classical,τί τοῦδ’ ἂν εὕρημ’ ηὗρον εὐτυχέστερον;,ti toud’ an heurēm’ hēuron eutychesteron,,
+greek_classical,τί τοῦδ’ ἂν εὕρημ’ ηὗρον εὐτυχέστερον;,ti toud’ an heurēm’ hēuron eutychesteron?,,
 greek_classical,Τοῦ Κατὰ πασῶν αἱρέσεων ἐλέγχου βιβλίον αʹ,Tou Kata pasōn haireseōn elenchou biblion 1,,
 greek_classical,καλὸν κἀγαθόν,kalon kagathon,,
 greek_classical,ᾤχοντο θοἰμάτιον λαβόντες μου,ōchonto thoimation labontes mou,,
@@ -21,6 +21,9 @@ greek_classical,ἄλαϲτα δὲ ϝέργα πάθον κακὰ μηϲαμέ
 greek_classical,Δαμαρέτα τ’ ἐρατά τε Ϝιανθεμίϲ,Damareta t’ erata te Wianthemis,,
 greek_classical,ξένϝος,xenwos,,
 greek_classical,Πάτροϙλος,Patroḳlos,,
+greek_classical,"λβʹ. Ἐπεὶ δὲ ἡ τύχη κράτιστον ἐπὶ πάντα τὰ ἀνθρώπεια, μηδὲ Ἡλιόδωρος ἀπαξιούσθω σοφιστῶν κύκλου παράδοξον ἀγώνισμα τύχης γενόμενος·","32. Epei de ē tychi kratiston epi panta ta anthrōpeia, mide Hēliodōros apaxiousthō sophistōn kyklou paradoxon agōnisma tychis genomenos",,
+greek_classical,"κζʹ. Μὴ δεύτερα τῶν προειρημένων σοφιστῶν μηδὲ Ἱππόδρομόν τις ἡγείσθω τὸν Θετταλόν, τῶν μὲν γὰρ βελτίων φαίνεται, τῶν δὲ οὐκ οἶδα ὅ τι λείπεται","27. Mē deutera tōn proeirēmenōn sophistōn mide Ippodromon tis ēgeisthō ton Thettalon, tōn men gar beltiōn phainetai, tōn de ouk oida o ti leipetai",,
+greek_classical,"ιγʹ. Πῶλον δὲ τὸν Ἀκραγαντῖνον Γοργίας σοφιστὴν ἐξεμελέτησε πολλῶν, ὥς φασι, χρημάτων, καὶ γὰρ δὴ καὶ τῶν πλουτούντων ὁ Πῶλος.","13. Pōlon de ton Akragantinon Gorgias sophistēn exemeletēse pollōn, ōs phasi, chrēmatōn, kai gar dē kai tōn ploutountōn o Pōlos",,
 greek_modern,"Ἐτήσια ἔκθεσις / Κυπριακὴ Δημοκρατία, Ὑπουργεῖον Ἐργασίας καὶ Κοινωνικῶν Ἀσφαλίσεων","Etēsia ekthesis / Kypriakē Dēmokratia, Hypourgeion Ergasias kai Koinōnikōn Asphaliseōn",,
 greek_modern,"Ετήσια έκθεση / Κυπριακή Δημοκρατία, Υπουργείο Εργασίας και Κοινωνικών Ασφαλίσεων","Etēsia ekthesē / Kypriakē Dēmokratia, Hypourgeio Ergasias kai Koinōnikōn Asphaliseōn",,
 greek_modern,Ελληνικό Ίδρυμα Ευρωπαϊκής και Εξωτερικής Πολιτικής,Hellēniko Hidryma Eurōpaikēs kai Exōterikēs Politikēs,,