|
@@ -46,7 +46,7 @@ WORD_BOUNDARY = " \n\t:;.,\"'-()[]{}"
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
-class Token(str):
|
|
|
+class Token:
|
|
|
"""
|
|
|
Token class: minimal unit of text parsing.
|
|
|
|
|
@@ -71,20 +71,88 @@ class Token(str):
|
|
|
- BEFGH
|
|
|
- B
|
|
|
"""
|
|
|
- logger.debug(f"a: {self.content}, b: {other.content}")
|
|
|
- self_len = len(self.content)
|
|
|
- other_len = len(other.content)
|
|
|
- min_len = min(self_len, other_len)
|
|
|
-
|
|
|
- # If one of the strings is entirely contained in the other string...
|
|
|
- if self.content[:min_len] == other.content[:min_len]:
|
|
|
- logger.debug("Roots match.")
|
|
|
- # ...then the longer one takes precedence (is "less")
|
|
|
- return self_len > other_len
|
|
|
-
|
|
|
- # If the root strings are different, perform a normal comparison.
|
|
|
+ logger.debug(f"lt called on {self.content}, {other.content}")
|
|
|
+
|
|
|
+ if self.content == other.content:
|
|
|
+ return False
|
|
|
+
|
|
|
+ # If one of the strings is entirely contained in the other string, then
|
|
|
+ # the containing string has precedence (is "more").
|
|
|
+ if other.content.startswith(self.content):
|
|
|
+ logger.debug(f"{other.content} comes before {self.content}")
|
|
|
+ return False
|
|
|
+
|
|
|
+ # Other way around.
|
|
|
+ if self.content.startswith(other.content):
|
|
|
+ logger.debug(f"{self.content} comes before {other.content}")
|
|
|
+ return True
|
|
|
+
|
|
|
+ # If neither of the strings contains the other, perform a normal
|
|
|
+ # string comparison.
|
|
|
+ logger.debug(f"neither {other.content} nor {self.content} are subs.")
|
|
|
+ return self.content < other.content
|
|
|
+
|
|
|
+ def __le__(self, other):
|
|
|
+ logger.debug(f"le called on {self.content}, {other.content}")
|
|
|
+
|
|
|
+ if self.content == other.content:
|
|
|
+ return True
|
|
|
+
|
|
|
+ if self.content in other.content:
|
|
|
+ logger.debug(f"{other.content} comes before {self.content}")
|
|
|
+ return False
|
|
|
+
|
|
|
+ if other.content in self.content:
|
|
|
+ logger.debug(f"{self.content} comes before {other.content}")
|
|
|
+ return True
|
|
|
+
|
|
|
+ logger.debug(f"neither {other.content} nor {self.content} are subs.")
|
|
|
return self.content < other.content
|
|
|
|
|
|
+ def __gt__(self, other):
|
|
|
+ logger.debug(f"gt called on {self.content}, {other.content}")
|
|
|
+
|
|
|
+ if self.content == other.content:
|
|
|
+ return False
|
|
|
+
|
|
|
+ if self.content in other.content:
|
|
|
+ logger.debug(f"{other.content} comes after {self.content}")
|
|
|
+ return True
|
|
|
+
|
|
|
+ if other.content in self.content:
|
|
|
+ logger.debug(f"{self.content} comes after {other.content}")
|
|
|
+ return False
|
|
|
+
|
|
|
+ logger.debug(f"neither {other.content} nor {self.content} are subs.")
|
|
|
+ return self.content > other.content
|
|
|
+
|
|
|
+ def __ge__(self, other):
|
|
|
+ logger.debug(f"ge called on {self.content}, {other.content}")
|
|
|
+
|
|
|
+ if self.content == other.content:
|
|
|
+ return True
|
|
|
+
|
|
|
+ if self.content in other.content:
|
|
|
+ logger.debug(f"{other.content} comes after {self.content}")
|
|
|
+ return True
|
|
|
+
|
|
|
+ if other.content in self.content:
|
|
|
+ logger.debug(f"{self.content} comes after {other.content}")
|
|
|
+ return False
|
|
|
+
|
|
|
+ logger.debug(f"neither {other.content} nor {self.content} are subs.")
|
|
|
+ return self.content > other.content
|
|
|
+
|
|
|
+ def __eq__(self, other):
|
|
|
+ logger.debug(f"eq called on {self.content}, {other.content}")
|
|
|
+
|
|
|
+ return self.content == other.content
|
|
|
+
|
|
|
+ def __ne__(self, other):
|
|
|
+ logger.debug(f"ne called on {self.content}, {other.content}")
|
|
|
+
|
|
|
+ return self.content != other.content
|
|
|
+
|
|
|
def __hash__(self):
|
|
|
return hash(self.content)
|
|
|
|
|
@@ -116,7 +184,6 @@ def load_table(tname):
|
|
|
with open(fname) as fh:
|
|
|
tdata = load(fh, Loader=Loader)
|
|
|
|
|
|
- # NOTE Only one level of inheritance. No need for recursion for now.
|
|
|
parents = tdata.get("general", {}).get("parents", [])
|
|
|
|
|
|
if "script_to_roman" in tdata:
|