Pārlūkot izejas kodu

Experimenting with ordering.

scossu 1 gadu atpakaļ
vecāks
revīzija
ccda2fa306

+ 81 - 14
scriptshifter/tables/__init__.py

@@ -46,7 +46,7 @@ WORD_BOUNDARY = " \n\t:;.,\"'-()[]{}"
 logger = logging.getLogger(__name__)
 
 
-class Token(str):
+class Token:
     """
     Token class: minimal unit of text parsing.
 
@@ -71,20 +71,88 @@ class Token(str):
         - BEFGH
         - B
         """
-        logger.debug(f"a: {self.content}, b: {other.content}")
-        self_len = len(self.content)
-        other_len = len(other.content)
-        min_len = min(self_len, other_len)
-
-        # If one of the strings is entirely contained in the other string...
-        if self.content[:min_len] == other.content[:min_len]:
-            logger.debug("Roots match.")
-            # ...then the longer one takes precedence (is "less")
-            return self_len > other_len
-
-        # If the root strings are different, perform a normal comparison.
+        logger.debug(f"lt called on {self.content}, {other.content}")
+
+        if self.content == other.content:
+            return False
+
+        # If one of the strings is entirely contained in the other string, then
+        # the containing string has precedence (is "more").
+        if other.content.startswith(self.content):
+            logger.debug(f"{other.content} comes before {self.content}")
+            return False
+
+        # Other way around.
+        if self.content.startswith(other.content):
+            logger.debug(f"{self.content} comes before {other.content}")
+            return True
+
+        # If neither of the strings contains the other, perform a normal
+        # string comparison.
+        logger.debug(f"neither {other.content} nor {self.content} are subs.")
+        return self.content < other.content
+
+    def __le__(self, other):
+        logger.debug(f"le called on {self.content}, {other.content}")
+
+        if self.content == other.content:
+            return True
+
+        if self.content in other.content:
+            logger.debug(f"{other.content} comes before {self.content}")
+            return False
+
+        if other.content in self.content:
+            logger.debug(f"{self.content} comes before {other.content}")
+            return True
+
+        logger.debug(f"neither {other.content} nor {self.content} are subs.")
         return self.content < other.content
 
+    def __gt__(self, other):
+        logger.debug(f"gt called on {self.content}, {other.content}")
+
+        if self.content == other.content:
+            return False
+
+        if self.content in other.content:
+            logger.debug(f"{other.content} comes after {self.content}")
+            return True
+
+        if other.content in self.content:
+            logger.debug(f"{self.content} comes after {other.content}")
+            return False
+
+        logger.debug(f"neither {other.content} nor {self.content} are subs.")
+        return self.content > other.content
+
+    def __ge__(self, other):
+        logger.debug(f"ge called on {self.content}, {other.content}")
+
+        if self.content == other.content:
+            return True
+
+        if self.content in other.content:
+            logger.debug(f"{other.content} comes after {self.content}")
+            return True
+
+        if other.content in self.content:
+            logger.debug(f"{self.content} comes after {other.content}")
+            return False
+
+        logger.debug(f"neither {other.content} nor {self.content} are subs.")
+        return self.content > other.content
+
+    def __eq__(self, other):
+        logger.debug(f"eq called on {self.content}, {other.content}")
+
+        return self.content == other.content
+
+    def __ne__(self, other):
+        logger.debug(f"ne called on {self.content}, {other.content}")
+
+        return self.content != other.content
+
     def __hash__(self):
         return hash(self.content)
 
@@ -116,7 +184,6 @@ def load_table(tname):
     with open(fname) as fh:
         tdata = load(fh, Loader=Loader)
 
-    # NOTE Only one level of inheritance. No need for recursion for now.
     parents = tdata.get("general", {}).get("parents", [])
 
     if "script_to_roman" in tdata:

+ 2 - 0
tests/data/ordering.yml

@@ -11,4 +11,6 @@ roman_to_script:
     "A": ""
     "AB": ""
     "ABCD": ""
+    "ZABCD": ""
+    "ZAB": ""
 

+ 3 - 1
tests/test01_cfg.py

@@ -14,8 +14,10 @@ class TestConfig(TestCase):
         self.tables = reload_tables()
 
     def test_ordering(self):
+        breakpoint()
         tbl = self.tables.load_table("ordering")
-        exp_order = ["ABCD", "AB", "A", "BCDE", "BCD", "BEFGH", "B"]
+        exp_order = [
+                "ZABCD", "ABCD", "ZAB", "AB", "A", "BCDE", "BCD", "BEFGH", "B"]
 
         self.assertEqual(
                 [s[0] for s in tbl["roman_to_script"]["map"]], exp_order)

+ 6 - 4
tests/test02_transliteration.py

@@ -34,10 +34,11 @@ class TestTrans(TestCase):
         """
         config = scriptshifter.tables.load_table(self.tbl)
         if "script_to_roman" in config:
-            txl = transliterate(self.script, self.tbl)
+            txl = transliterate(self.script, self.tbl)[0]
             self.assertEqual(
                     txl, self.roman,
-                    f"S2R transliteration error for {self.tbl}!")
+                    f"S2R transliteration error for {self.tbl}!\n"
+                    f"Original: {self.script}")
 
     def sample_r2s(self):
         """
@@ -48,10 +49,11 @@ class TestTrans(TestCase):
         """
         config = scriptshifter.tables.load_table(self.tbl)
         if "roman_to_script" in config:
-            txl = transliterate(self.roman, self.tbl, r2s=True)
+            txl = transliterate(self.roman, self.tbl, r2s=True)[0]
             self.assertEqual(
                     txl, self.script,
-                    f"R2S transliteration error for {self.tbl}!")
+                    f"R2S transliteration error for {self.tbl}!\n"
+                    f"Original: {self.roman}")
 
 
 def make_suite():

+ 6 - 6
tests/test03_capitalization.py

@@ -17,9 +17,9 @@ class TestCapitalization(TestCase):
     def test_cap(self):
         tbl = "cap_inherited"
         in_str = "зг іо"
-        tx = transliterate(in_str, tbl)
-        tx_cap = transliterate(in_str, tbl, capitalize="first")
-        tx_all = transliterate(in_str, tbl, capitalize="all")
+        tx = transliterate(in_str, tbl)[0]
+        tx_cap = transliterate(in_str, tbl, capitalize="first")[0]
+        tx_all = transliterate(in_str, tbl, capitalize="all")[0]
 
         self.assertEqual(tx, "zh io")
         self.assertEqual(tx_cap, "Zh io")
@@ -28,9 +28,9 @@ class TestCapitalization(TestCase):
     def test_cap_ligatures(self):
         tbl = "cap_inherited"
         in_str = "жзг ёіо зг іо"
-        tx = transliterate(in_str, tbl)
-        tx_cap = transliterate(in_str, tbl, capitalize="first")
-        tx_all = transliterate(in_str, tbl, capitalize="all")
+        tx = transliterate(in_str, tbl)[0]
+        tx_cap = transliterate(in_str, tbl, capitalize="first")[0]
+        tx_all = transliterate(in_str, tbl, capitalize="all")[0]
 
         self.assertEqual(tx, "z︠h︡zh i︠o︡io zh io")
         self.assertEqual(tx_cap, "Z︠H︡zh i︠o︡io zh io")