1 年之前 · 42dc6f1a58
--- a/scriptshifter/tables/__init__.py
+++ b/scriptshifter/tables/__init__.py
@@ -44,6 +44,15 @@ HOOK_PKG_PATH = "scriptshifter.hooks"
 
				 # Default characters defining a word boundary. This is configurable per-table.
			
 
				 WORD_BOUNDARY = " \n\t:;.,\"'-()[]{}"
			
 
				 
			
 
				+# Token word boundary marker. Used in maps to distinguish special
			
 
				+# transliterations for initial, final, and standalone tokens.
			
 
				+TOKEN_WB_MARKER = "%"
			
 
				+
			
 
				+# Word boundary bitwise flags.
			
 
				+BOW = 1 << 1
			
 
				+EOW = 1 << 0
			
 
				+
			
 
				+
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 
			
@@ -55,9 +64,22 @@ class Token(str):
 
				     in a way that prioritizes a longer string over a shorter one with identical
			
 
				     root.
			
 
				     """
			
 
				+    flags = 0
			
 
				+
			
 
				     def __init__(self, content):
			
 
				         self.content = content
			
 
				 
			
 
				+        # Assign special precedence based on token position.
			
 
				+        # Standalone has precedence, then initial, then final, then medial.
			
 
				+        # This is somewhat arbitrary and may change if special cases arise.
			
 
				+        # WB markers are moved to flags to allow default comparison.
			
 
				+        if self.content.endswith(TOKEN_WB_MARKER):
			
 
				+            self.flags |= BOW
			
 
				+            self.content = self.content.rstrip(TOKEN_WB_MARKER)
			
 
				+        if self.content.startswith(TOKEN_WB_MARKER):
			
 
				+            self.flags |= EOW
			
 
				+            self.content = self.content.lstrip(TOKEN_WB_MARKER)
			
 
				+
			
 
				     def __lt__(self, other):
			
 
				         """
			
 
				         Operator to sort tokens.
			
@@ -77,6 +99,17 @@ class Token(str):
 
				         other_len = len(other.content)
			
 
				         min_len = min(self_len, other_len)
			
 
				 
			
 
				+        # Check word boundary flags only if tokens are identical.
			
 
				+        # Higher flag value has precedence.
			
 
				+        if (
			
 
				+                (self.flags > 0 or other.flags > 0)
			
 
				+                and self.content == other.content):
			
 
				+            logger.debug(f"{self.content} flags: {self.flags}")
			
 
				+            logger.debug(f"{other.content} flags: {other.flags}")
			
 
				+            logger.debug("Performing flags comparison.")
			
 
				+
			
 
				+            return self.flags > other.flags
			
 
				+
			
 
				         # If one of the strings is entirely contained in the other string...
			
 
				         if self.content[:min_len] == other.content[:min_len]:
			
 
				             # logger.debug("Roots match.")
			
@@ -148,7 +181,7 @@ def load_table(tname):
 
				                 Token(k): v
			
 
				                 for k, v in tdata["script_to_roman"].get("map", {}).items()}
			
 
				         tdata["script_to_roman"]["map"] = tuple(
			
 
				-                (k.content, tokens[k]) for k in sorted(tokens))
			
 
				+                (k, tokens[k]) for k in sorted(tokens))
			
 
				 
			
 
				         # Normalization.
			
 
				         normalize = {}
			
@@ -184,7 +217,7 @@ def load_table(tname):
 
				             for k, v in tdata["roman_to_script"].get("map", {}).items()
			
 
				         }
			
 
				         tdata["roman_to_script"]["map"] = tuple(
			
 
				-                (k.content, tokens[k]) for k in sorted(tokens))
			
 
				+                (k, tokens[k]) for k in sorted(tokens))
			
 
				 
			
 
				         # Ignore regular expression patterns.
			
 
				         # Patterns are evaluated in the order they are listed in the config.
			
--- a/scriptshifter/tables/data/greek_classical.yml
+++ b/scriptshifter/tables/data/greek_classical.yml
@@ -14,7 +14,8 @@ script_to_roman:
 
				   normalize:
			
 
				     # Assimilate all vowels that can be in a diphthong with upsilon to a
			
 
				     # non-tonal and a tonal form, so that a hiatus can be established.
			
 
				-    # The accent used for the assimilated form is Varia.
			
 
				+    # The accent used for the assimilated form is Varia, which is used for the
			
 
				+    # transliteration rules of hiatuses further down.
			
 
				 
			
 
				     # Alpha
			
 
				     "\u03B1":  # α 	Greek Small Letter Alpha
			
@@ -42,8 +43,9 @@ script_to_roman:
 
				       - "\u1FB8"  # Ᾰ 	Greek Capital Letter Alpha With Vrachy
			
 
				       - "\u1FB9"  # Ᾱ 	Greek Capital Letter Alpha With Macron
			
 
				       - "\u1FBC"  # ᾼ 	Greek Capital Letter Alpha With Prosgegrammeni
			
 
				-    "\u1F0A":  # Ἂ 	Greek Capital Letter Alpha With Psili And Varia
			
 
				+    "\u1FBA":  # Ὰ 	Greek Capital Letter Alpha With Varia
			
 
				       - "\u0386"  # Ά 	Greek Capital Letter Alpha With Tonos
			
 
				+      - "\u1F0A"  # Ἂ 	Greek Capital Letter Alpha With Psili And Varia
			
 
				       - "\u1F0C"  # Ἄ 	Greek Capital Letter Alpha With Psili And Oxia
			
 
				       - "\u1F8A"  # ᾊ 	Greek Capital Letter Alpha With Psili And Varia And Prosgegrammeni
			
 
				       - "\u1F8C"  # ᾌ 	Greek Capital Letter Alpha With Psili And Oxia And Prosgegrammeni
			
@@ -79,6 +81,7 @@ script_to_roman:
 
				       - "\u1F18"  # Ἐ 	Greek Capital Letter Epsilon With Psili
			
 
				     "\u1F1A":  # Ἒ 	Greek Capital Letter Epsilon With Psili And Varia
			
 
				       - "\u1F1C"  # Ἔ 	Greek Capital Letter Epsilon With Psili And Oxia
			
 
				+      - "\u0388"
			
 
				     # Rough epsilon
			
 
				     "\u1F11":  #  ἑ 	Greek Small Letter Epsilon With Dasia
			
 
				       - "\u03B5\u0314"  # Small epsilon + combination Dasia
			
@@ -90,11 +93,11 @@ script_to_roman:
 
				       - "\u1F1D"  # Ἕ 	Greek Capital Letter Epsilon With Dasia And Oxia
			
 
				     # Eta
			
 
				     "\u03B7":  # η 	Greek Small Letter Eta
			
 
				-      - "\u03AE"  # ή 	Greek Small Letter Eta With Tonos
			
 
				       - "\u1F20"  # ἠ 	Greek Small Letter Eta With Psili
			
 
				       - "\u1FC3"  # ῃ 	Greek Small Letter Eta With Ypogegrammeni
			
 
				       - "\u1F90"  # ᾐ 	Greek Small Letter Eta With Psili And Ypogegrammeni
			
 
				     "\u1F74":  # ὴ 	Greek Small Letter Eta With Varia
			
 
				+      - "\u03AE"  # ή 	Greek Small Letter Eta With Tonos
			
 
				       - "\u1F22"  # ἢ 	Greek Small Letter Eta With Psili And Varia
			
 
				       - "\u1F24"  # ἤ 	Greek Small Letter Eta With Psili And Oxia
			
 
				       - "\u1F26"  # ἦ 	Greek Small Letter Eta With Psili And Perispomeni
			
@@ -111,6 +114,7 @@ script_to_roman:
 
				       - "\u1F98"  # ᾘ 	Greek Capital Letter Eta With Psili And Prosgegrammeni
			
 
				       - "\u1FCC"  # ῌ 	Greek Capital Letter Eta With Prosgegrammeni
			
 
				     "\u1F2A":  # Ἢ 	Greek Capital Letter Eta With Psili And Varia
			
 
				+      - "\u0389"
			
 
				       - "\u1F2C"  # Ἤ 	Greek Capital Letter Eta With Psili And Oxia
			
 
				       - "\u1F2E"  # Ἦ 	Greek Capital Letter Eta With Psili And Perispomeni
			
 
				       - "\u1F9A"  # ᾚ 	Greek Capital Letter Eta With Psili And Varia And Prosgegrammeni
			
@@ -154,6 +158,7 @@ script_to_roman:
 
				       - "\u1FD6"
			
 
				       - "\u1FD7"
			
 
				     "\u0399":
			
 
				+      - "\u038A"
			
 
				       - "\u03AA"
			
 
				       - "\u1F38"
			
 
				       - "\u1F3A"
			
@@ -172,18 +177,18 @@ script_to_roman:
 
				       - "\u1F3B"
			
 
				       - "\u1F3D"
			
 
				       - "\u1F3F"
			
 
				-        # ὶ
			
 
				     # Omicron
			
 
				     "\u03BF":  # ο 	Greek Small Letter Omicron
			
 
				-      - "\u03cc"  # ό 	Greek Small Letter Omicron With Tonos
			
 
				       - "\u1F40"  # ὀ 	Greek Small Letter Omicron With Psili
			
 
				     "\u1F78":  # ὸ 	Greek Small Letter Omicron With Varia
			
 
				+      - "\u03CC"  # ό 	Greek Small Letter Omicron With Tonos
			
 
				       - "\u1F42"  # ὂ 	Greek Small Letter Omicron With Psili And Varia
			
 
				       - "\u1F44"  # ὄ 	Greek Small Letter Omicron With Psili And Oxia
			
 
				       - "\u1F79"  # ό 	Greek Small Letter Omicron With Oxia
			
 
				     "\u039F":  # Ο 	Greek Capital Letter Omicron
			
 
				-      - "\u1F48"  # Ὀ 	Greek Capital Letter Omicron With Psili
			
 
				+      - "\u1F48"  # Ὀ 	Greek Capital Letter Omicron With Psili
			
 
				     "\u1F4A":  # Ὂ 	Greek Capital Letter Omicron With Psili And Varia
			
 
				+      - "\u038C"
			
 
				       - "\u1F4C"  # Ὄ 	Greek Capital Letter Omicron With Psili And Oxia
			
 
				     # Rough Omicron
			
 
				     "\u1F41":  # ὁ 	Greek Small Letter Omicron With Dasia
			
@@ -205,26 +210,31 @@ script_to_roman:
 
				 
			
 
				     # Upsilon
			
 
				     "\u03C5":
			
 
				-      - "\u03CD"
			
 
				-      - "\u1F50"
			
 
				-      - "\u1F52"
			
 
				-      - "\u1F54"
			
 
				-      - "\u1F56"
			
 
				-      - "\u1FE6"
			
 
				-      - "\u1F7A"
			
 
				-      - "\u1F7B"
			
 
				-      - "\u1FE0"
			
 
				-      - "\u1FE1"
			
 
				+      - "\u03CD"  # ύ 	Greek Small Letter Upsilon With Tonos
			
 
				+      - "\u1F50"  # ὐ 	Greek Small Letter Upsilon With Psili
			
 
				+      - "\u1F52"  # ὒ 	Greek Small Letter Upsilon With Psili And Varia
			
 
				+      - "\u1F54"  # ὔ 	Greek Small Letter Upsilon With Psili And Oxia
			
 
				+      - "\u1F56"  # ὖ 	Greek Small Letter Upsilon With Psili And Perispomeni
			
 
				+      - "\u1F7A"  # ὺ 	Greek Small Letter Upsilon With Varia
			
 
				+      - "\u1F7B"  # ύ 	Greek Small Letter Upsilon With Oxia
			
 
				+      - "\u1FE0"  # ῠ 	Greek Small Letter Upsilon With Vrachy
			
 
				+      - "\u1FE1"  # ῡ 	Greek Small Letter Upsilon With Macron
			
 
				+      - "\u1FE6"  # ῦ 	Greek Small Letter Upsilon With Perispomeni
			
 
				     "\u03CB":  # ϋ 	Greek Small Letter Upsilon With Dialytika
			
 
				-      - "\u03B0"
			
 
				-      - "\u1FE2"
			
 
				-      - "\u1FE3"
			
 
				-      - "\u1FE7"
			
 
				+      - "\u03B0"  # ΰ 	Greek Small Letter Upsilon With Dialytika And Tonos
			
 
				+      - "\u1FE2"  # ῢ 	Greek Small Letter Upsilon With Dialytika And Varia
			
 
				+      - "\u1FE3"  # ΰ 	Greek Small Letter Upsilon With Dialytika And Oxia
			
 
				+      - "\u1FE7"  # ῧ 	Greek Small Letter Upsilon With Dialytika And Perispomeni
			
 
				     "\u03A5":
			
 
				         # NOTE: Capital upsilon + psili seems to be absent from Unicode table.
			
 
				-      - "\u03AB"
			
 
				-      - "\u1FE8"
			
 
				-      - "\u1FE9"
			
 
				+      - "\u03AB"  # Ϋ 	Greek Capital Letter Upsilon With Dialytika
			
 
				+      - "\u1F59"  # Ὑ 	Greek Capital Letter Upsilon With Dasia
			
 
				+      - "\u1FE8"  # Ῠ 	Greek Capital Letter Upsilon With Vrachy
			
 
				+      - "\u1FE9"  # Ῡ 	Greek Capital Letter Upsilon With Macron
			
 
				+    "\u1FEA":  # Ὺ 	Greek Capital Letter Upsilon With Varia
			
 
				+      - "\u1F5B"  # Ὓ 	Greek Capital Letter Upsilon With Dasia And Varia
			
 
				+      - "\u1F5D"  # Ὕ 	Greek Capital Letter Upsilon With Dasia And Oxia
			
 
				+      - "\u1F5F"  # Ὗ 	Greek Capital Letter Upsilon With Dasia And Perispomeni
			
 
				     # Rough Upsilon
			
 
				     "\u1F51":
			
 
				       - "\u03C5\u0314"
			
@@ -239,11 +249,11 @@ script_to_roman:
 
				 
			
 
				     # Omega
			
 
				     "\u03C9":  # ω 	Greek Small Letter Omega
			
 
				-      - "\u03CE"  # ώ 	Greek Small Letter Omega With Tonos
			
 
				       - "\u1F60"  # ὠ 	Greek Small Letter Omega With Psili
			
 
				       - "\u1FA0"  # ᾠ 	Greek Small Letter Omega With Psili And Ypogegrammeni
			
 
				       - "\u1FF3"  # ῳ 	Greek Small Letter Omega With Ypogegrammeni
			
 
				     "\u1F7C":  # ὼ 	Greek Small Letter Omega With Varia
			
 
				+      - "\u03CE"  # ώ 	Greek Small Letter Omega With Tonos
			
 
				       - "\u1F62"  # ὢ 	Greek Small Letter Omega With Psili And Varia
			
 
				       - "\u1F64"  # ὤ 	Greek Small Letter Omega With Psili And Oxia
			
 
				       - "\u1F66"  # ὦ 	Greek Small Letter Omega With Psili And Perispomeni
			
@@ -260,11 +270,12 @@ script_to_roman:
 
				       - "\u1FA8"  # ᾨ 	Greek Capital Letter Omega With Psili And Prosgegrammeni
			
 
				       - "\u1FFC"  # ῼ 	Greek Capital Letter Omega With Prosgegrammeni
			
 
				     "\u1FFA":  # Ὼ 	Greek Capital Letter Omega With Varia
			
 
				+      - "\u038F"  # Ώ 	Greek Capital Letter Omega With Tonos
			
 
				       - "\u1F6A"  # Ὢ 	Greek Capital Letter Omega With Psili And Varia
			
 
				       - "\u1F6C"  # Ὤ 	Greek Capital Letter Omega With Psili And Oxia
			
 
				       - "\u1F6E"  # Ὦ 	Greek Capital Letter Omega With Psili And Perispomeni
			
 
				       - "\u1FAA"  # ᾪ 	Greek Capital Letter Omega With Psili And Varia And Prosgegrammeni
			
 
				-      - "\u1FAC"  # ᾬ 	Greek Capital Letter Omega With Psili And Oxia And Prosgegrammeni
			
 
				+      - "\u1FAC"  # ᾬ 	Greek Capital Letter Omega With Psili And Oxia And Prosgegrammeni
			
 
				       - "\u1FAE"  # ᾮ 	Greek Capital Letter Omega With Psili And Perispomeni And Prosgegrammeni
			
 
				     # Rough omega
			
 
				     "\u1F61":  # ὡ 	Greek Small Letter Omega With Dasia
			
@@ -294,6 +305,8 @@ script_to_roman:
 
				       - "\u0344"
			
 
				       - "\u0345"
			
 
				       - "\u037A"
			
 
				+      - "\u0384"
			
 
				+      - "\u0385"
			
 
				       - "\u1FBD"
			
 
				       - "\u1FBE"
			
 
				       - "\u1FBF"
			
@@ -306,18 +319,6 @@ script_to_roman:
 
				       - "\u1FEE"
			
 
				       - "\u1FFD"
			
 
				 
			
 
				-  map_initial:
			
 
				-    "\u0393\u03BA": "Gk"
			
 
				-    "\u03B3\u03BA": "gk"
			
 
				-    "\u039C\u03C0": "B"
			
 
				-    "\u03BC\u03C0": "b"
			
 
				-    "\u039D\u03C4": "\u1E0E"
			
 
				-    "\u03BD\u03C4": "\u1E0F"
			
 
				-
			
 
				-  map_final:
			
 
				-    "\u0393\u03BA": "Gk"
			
 
				-    "\u03B3\u03BA": "gk"
			
 
				-
			
 
				   map:
			
 
				     "\u201C": "\"\u0332"
			
 
				     "\u201D": "\"\u0333"
			
@@ -368,22 +369,34 @@ script_to_roman:
 
				     "\u038F": "\u014C\u0301"
			
 
				     "\u0390": "i\u0308\u0301"
			
 
				     "\u1F09": "Ha"
			
 
				+    "\u1F0B": "Ha"
			
 
				     "\u0391\u1F31": "Hai"
			
 
				     "\u0391\u1F51": "Hau"
			
 
				     "\u0391\u1F61": "Ha\u014D"
			
 
				     "\u0391\u03C5": "Au"
			
 
				+    "\u1FBA\u03C5": "Ay"  # Tonos on preceding vowel
			
 
				     "\u0391": "A"
			
 
				+    "\u1FBA": "A"
			
 
				     "\u0392": "B"
			
 
				     "\u0393": "G"
			
 
				     "\u0394": "D"
			
 
				-    "\u1F19": "He"
			
 
				     "\u0395\u03C5": "Eu"
			
 
				+    "\u1F19": "He"
			
 
				+    "\u1F1B": "He"
			
 
				+    "\u1F19\u03C5": "Heu"
			
 
				+    "\u1F1A\u03C5": "Ey"  # Tonos on preceding vowel
			
 
				+    "\u1F1B\u03C5": "Hey"  # Tonos on preceding vowel
			
 
				     "\u0395": "E"
			
 
				+    "\u1F1A": "E"
			
 
				     "\u0396": "Z"
			
 
				     "\u1F29": "H\u0113"
			
 
				+    "\u1F2B": "H\u0113"
			
 
				     "\u0397": "\u0112"
			
 
				+    "\u1F2A": "\u0112"
			
 
				     "\u0397\u03C5": "\u0112u"
			
 
				-    "\u0397\u1F51": "H\u0113u"
			
 
				+    "\u1F2A\u03C5": "\u0112y"  # Tonos on preceding vowel
			
 
				+    "\u1F29\u1F51": "H\u0113u"
			
 
				+    "\u1F2B\u1F51": "H\u0113y"  # Tonos on preceding vowel
			
 
				     "\u0398": "Th"
			
 
				     "\u1F39": "Hi"
			
 
				     "\u0399\u03C5": "Iu"
			
@@ -392,12 +405,15 @@ script_to_roman:
 
				     "\u039B": "L"
			
 
				     "\u039C\u03C0%": "B"
			
 
				     "\u039C": "M"
			
 
				-    # "\u039D\u03C4%": "D\u0332"
			
 
				+    "\u039D\u03C4%": "\u1E0E"
			
 
				     "\u039D": "N"
			
 
				     "\u039E": "X"
			
 
				     "\u1F49": "Ho"
			
 
				+    "\u1F4B": "Ho"
			
 
				     "\u039F\u03C5": "Ou"
			
 
				+    "\u1F4A\u03C5": "Oy"  # Tonos on preceding vowel
			
 
				     "\u039F": "O"
			
 
				+    "\u1F4A": "O"
			
 
				     "\u03A0": "P"
			
 
				     "\u1FEC": "Rh"
			
 
				     "\u03A1": "R"
			
@@ -412,8 +428,13 @@ script_to_roman:
 
				     "\u03A7": "Ch"
			
 
				     "\u03A8": "Ps"
			
 
				     "\u1F69": "H\u014D"
			
 
				+    "\u1F6B": "H\u014D"
			
 
				+    "\u1F69\u03C5": "H\u014Du"
			
 
				+    "\u1F6B\u03C5": "H\u014Dy"  # Tonos on preceding vowel
			
 
				     "\u03A9": "\u014C"
			
 
				+    "\u1FFA": "\u014C"
			
 
				     "\u03A9\u03C5": "\u014Cu"
			
 
				+    "\u1FFA\u03C5": "\u014Cy"  # Tonos on preceding vowel
			
 
				     "\u03AA": "I\u0308"
			
 
				     "\u03AB": "Y\u0308"
			
 
				     "\u03AC\u0314": "ha\u0301"
			
@@ -425,44 +446,68 @@ script_to_roman:
 
				     "\u03AF\u0314": "hi\u0301"
			
 
				     "\u03AF": "i\u0301"
			
 
				     "\u03B0": "y\u0308\u0301"
			
 
				-    "\u1F01": "ha"
			
 
				-    "\u1F01\u1F31": "hai"
			
 
				-    "\u1F01\u03C5": "hau"
			
 
				+    "\u03B1": "a"
			
 
				+    "\u1F70": "a"
			
 
				+    "\u03B1\u03C5": "au"
			
 
				     "\u03B1\u1F31": "hai"
			
 
				     "\u03B1\u1F51": "hau"
			
 
				     "\u03B1\u1F61": "ha\u014D"
			
 
				-    "\u03B1\u03C5": "au"
			
 
				-    "\u03B1": "a"
			
 
				+    "\u1F01": "ha"
			
 
				+    "\u1F03": "ha"
			
 
				+    "\u1F01\u03C5": "hau"
			
 
				+    "\u1F01\u1F31": "hai"
			
 
				+    "\u1F03\u03C5": "hay"  # Tonos on preceding vowel
			
 
				+    "\u1f70\u03C5": "ay"  # Tonos on preceding vowel
			
 
				     "\u03B2": "b"
			
 
				     "\u03B3\u03B3": "ng"
			
 
				-    "%\u03B3\u03BA%": "nk"
			
 
				+    "\u03B3\u03BA": "nk"
			
 
				+    "\u0393\u03BA%": "Gk"
			
 
				+    "\u03B3\u03BA%": "gk"
			
 
				+    "%\u0393\u03BA": "Gk"
			
 
				+    "%\u03B3\u03BA": "gk"
			
 
				     "\u03B3\u03BE": "nx"
			
 
				     "\u03B3\u03C7": "nch"
			
 
				     "\u03B3": "g"
			
 
				     "\u03B4": "d"
			
 
				     "\u1F11": "he"
			
 
				+    "\u1F13": "he"
			
 
				     "\u03B5\u03C5": "eu"
			
 
				+    "\u1F72\u03C5": "ey"  # Tonos on preceding vowel
			
 
				     "\u03B5\u1F51": "heu"
			
 
				+    "\u1F13\u1F51": "hey"  # Tonos on preceding vowel
			
 
				     "\u03B5": "e"
			
 
				+    "\u1F72": "e"
			
 
				     "\u03B6": "z"
			
 
				-    "\u1F21": "h\u0113"
			
 
				     "\u03B7": "\u0113"
			
 
				+    "\u1F74": "\u0113"
			
 
				     "\u03B7\u03C5": "\u0113u"
			
 
				+    "\u1F74\u03C5": "\u0113y"  # Tonos on preceding vowel
			
 
				+    "\u1F21": "h\u0113"
			
 
				+    "\u1F23": "h\u0113"
			
 
				+    "\u1F21\u03C5": "h\u0113u"
			
 
				     "\u03B7\u1F51": "h\u0113u"
			
 
				+    "\u1F23\u03C5": "h\u0113y"  # Tonos on preceding vowel
			
 
				     "\u03B8": "th"
			
 
				     "\u1F31": "hi"
			
 
				+    "\u1F31\u03C5": "hiu"
			
 
				     "\u03B9\u03C5": "iu"
			
 
				     "\u03B9": "i"
			
 
				     "\u03BA": "k"
			
 
				     "\u03BB": "l"
			
 
				     "\u03BC\u03C0%": "b"
			
 
				     "\u03BC": "m"
			
 
				-    # "\u03BD\u03C4%": "d\u0332"
			
 
				+    "\u03BD\u03C4%": "\u1E0F"
			
 
				     "\u03BD": "n"
			
 
				     "\u03BE": "x"
			
 
				     "\u1F41": "ho"
			
 
				-    "\u03BF\u03C5": "ou"
			
 
				+    "\u1F43": "ho"
			
 
				+    "\u1F41\u03C5": "hou"
			
 
				+    "\u03BF\u1F51": "hou"
			
 
				+    "\u1F43\u03C5": "hoy"  # Tonos on preceding vowel
			
 
				     "\u03BF": "o"
			
 
				+    "\u1F78": "o"
			
 
				+    "\u03BF\u03C5": "ou"
			
 
				+    "\u1F78\u03C5": "oy"  # Tonos on preceding vowel
			
 
				     "\u03C0": "p"
			
 
				     "\u1FE5": "rh"
			
 
				     "\u03C1": "r"
			
@@ -470,17 +515,25 @@ script_to_roman:
 
				     "\u03C3": "s"
			
 
				     "\u03C4": "t"
			
 
				     "\u1F51": "hy"
			
 
				+    "\u1F59": "Hy"
			
 
				     "\u03C5": "y"
			
 
				+    "\u03CB": "y"
			
 
				     "\u03C5\u03B9": "ui"
			
 
				     "\u03C5\u1F31": "hui"
			
 
				     "\u03C6": "ph"
			
 
				     "\u03C7": "ch"
			
 
				     "\u03C8": "ps"
			
 
				-    "\u1F61": "h\u014D"
			
 
				     "\u03C9": "\u014D"
			
 
				+    "\u1F7C": "\u014D"
			
 
				     "\u03C9\u03C5": "\u014Du"
			
 
				+    "\u1F7C\u03C5": "\u014Dy"  # Tonos on preceding vowel
			
 
				+    "\u1F61": "h\u014D"
			
 
				+    "\u1F63": "h\u014D"
			
 
				+    "\u1F61\u03C5": "h\u014Du"
			
 
				+    "\u03C9\u1F51": "h\u014Du"
			
 
				+    "\u1F63\u03C5": "h\u014Dy"  # Tonos on preceding vowel
			
 
				     "\u03CA": "i\u0308"
			
 
				-    "\u03CB": "y\u0308"
			
 
				+    "\u03CB": "y"
			
 
				     "\u03CC": "o\u0301"
			
 
				     "\u03CD": "y\u0301"
			
 
				     "\u03CE": "\u014D\u0301"
			
@@ -564,8 +617,8 @@ script_to_roman:
 
				       "Ch": "\u03A7"
			
 
				       "ch": "\u03C7"
			
 
				       "c\u030C": "\u03EB"
			
 
				-      "D\u0332": "\u039D\u03C4"
			
 
				-      "d\u0332": "\u03BD\u03C4"
			
 
				+      "\u1E0E": "\u039D\u03C4"
			
 
				+      "\u1E0F": "\u03BD\u03C4"
			
 
				       "D": "\u0394"
			
 
				       "d": "\u03B4"
			
 
				       "Eu": "\u0395\u03C5"
			
--- a/scriptshifter/tables/data/greek_modern.yml
+++ b/scriptshifter/tables/data/greek_modern.yml
@@ -4,40 +4,6 @@ general:
 
				     - greek_classical
			
 
				 
			
 
				 script_to_roman:
			
 
				-  normalize:
			
 
				-    "\u03B1":
			
 
				-      - "\u03AC"
			
 
				-    "\u0391":
			
 
				-      - "\u0386"
			
 
				-    "\u03B5":
			
 
				-      - "\u03AD"
			
 
				-    "\u0395":
			
 
				-      - "\u0388"
			
 
				-    "\u03B7":
			
 
				-      - "\u03AE"
			
 
				-    "\u0397":
			
 
				-      - "\u0389"
			
 
				-    "\u03B9":
			
 
				-      - "\u03AF"
			
 
				-    "\u0399":
			
 
				-      - "\u038A"
			
 
				-    "\u03BF":
			
 
				-      - "\u03CC"
			
 
				-    "\u039F":
			
 
				-      - "\u038C"
			
 
				-    "\u03C5":
			
 
				-      - "\u03CD"
			
 
				-    "\u03A5":
			
 
				-      - "\u038E"
			
 
				-    "\u03C9":
			
 
				-      - "\u03CE"
			
 
				-    "\u03A9":
			
 
				-      - "\u038F"
			
 
				-    "":
			
 
				-      - "\u0344"
			
 
				-      - "\u0384"
			
 
				-      - "\u0385"
			
 
				-
			
 
				   map:
			
 
				     "\u0392": "V"
			
 
				     "\u03B2": "v"
			
--- a/scriptshifter/trans.py
+++ b/scriptshifter/trans.py
@@ -2,16 +2,12 @@ import logging
 
				 import re
			
 
				 
			
 
				 from scriptshifter.exceptions import BREAK, CONT
			
 
				-from scriptshifter.tables import WORD_BOUNDARY, load_table
			
 
				+from scriptshifter.tables import BOW, EOW, WORD_BOUNDARY, load_table
			
 
				 
			
 
				 
			
 
				 # Match multiple spaces.
			
 
				 MULTI_WS_RE = re.compile(r"\s{2,}")
			
 
				 
			
 
				-# Cursor bitwise flags.
			
 
				-CUR_BOW = 1 << 0
			
 
				-CUR_EOW = 1 << 1
			
 
				-
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 
			
@@ -122,17 +118,6 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
 
				     ctx.cur = 0
			
 
				     word_boundary = langsec.get("word_boundary", WORD_BOUNDARY)
			
 
				 
			
 
				-    map_default = langsec["map"]
			
 
				-    map_initial = (
			
 
				-            langsec["map_initial"] + map_default
			
 
				-            if "map_initial" in langsec else None)
			
 
				-    map_final = (
			
 
				-            langsec["map_final"] + map_default
			
 
				-            if "map_final" in langsec else None)
			
 
				-    # TODO unused
			
 
				-    map_standalone = (
			
 
				-            langsec["map_standalone"] + map_default
			
 
				-            if "map_standalone" in langsec else None)
			
 
				     while ctx.cur < len(ctx.src):
			
 
				         # Reset cursor position flags.
			
 
				         # Carry over extended "beginning of word" flag.
			
@@ -140,19 +125,14 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
 
				         cur_char = ctx.src[ctx.cur]
			
 
				 
			
 
				         # Look for a word boundary and flag word beginning/end it if found.
			
 
				-        if (ctx.cur == 0 or ctx.src[ctx.cur - 1] in word_boundary) and (
			
 
				-                cur_char not in word_boundary):
			
 
				+        if _is_bow(ctx.cur, ctx, word_boundary):
			
 
				             # Beginning of word.
			
 
				             logger.debug(f"Beginning of word at position {ctx.cur}.")
			
 
				-            ctx.cur_flags |= CUR_BOW
			
 
				-        if (
			
 
				-            ctx.cur == len(ctx.src) - 1
			
 
				-            or ctx.src[ctx.cur + 1] in word_boundary
			
 
				-        ) and (cur_char not in word_boundary):
			
 
				-            # Beginning of word.
			
 
				+            ctx.cur_flags |= BOW
			
 
				+        if _is_eow(ctx.cur, ctx, word_boundary):
			
 
				             # End of word.
			
 
				             logger.debug(f"End of word at position {ctx.cur}.")
			
 
				-            ctx.cur_flags |= CUR_EOW
			
 
				+            ctx.cur_flags |= EOW
			
 
				 
			
 
				         # This hook may skip the parsing of the current
			
 
				         # token or exit the scanning loop altogether.
			
@@ -202,42 +182,44 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
 
				         # Begin transliteration token lookup.
			
 
				         ctx.match = False
			
 
				 
			
 
				-        # Assign special maps based on token position.
			
 
				-        # Standalone has precedence, then initial, then final, then medial.
			
 
				-        # This is totally arbitrary and amy change if special cases arise.
			
 
				-        if (
			
 
				-                ctx.cur_flags & CUR_BOW and ctx.cur_flags & CUR_EOW
			
 
				-                and map_standalone):
			
 
				-            map_ = map_standalone
			
 
				-        elif ctx.cur_flags & CUR_BOW and map_initial:
			
 
				-            map_ = map_initial
			
 
				-        elif ctx.cur_flags & CUR_EOW and map_final:
			
 
				-            map_ = map_final
			
 
				-        else:
			
 
				-            map_ = map_default
			
 
				-
			
 
				-        for ctx.src_tk, ctx.dest_tk in map_:
			
 
				+        for ctx.src_tk, ctx.dest_str in langsec["map"]:
			
 
				             hret = _run_hook("pre_tx_token", ctx, langsec_hooks)
			
 
				             if hret == BREAK:
			
 
				                 break
			
 
				             if hret == CONT:
			
 
				                 continue
			
 
				 
			
 
				-            step = len(ctx.src_tk)
			
 
				+            step = len(ctx.src_tk.content)
			
 
				+            # If the token is longer than the remaining of the string,
			
 
				+            # it surely won't match.
			
 
				+            if ctx.cur + step > len(ctx.src):
			
 
				+                continue
			
 
				 
			
 
				             # If the first character of the token is greater (= higher code
			
 
				             # point value) than the current character, then break the loop
			
 
				             # without a match, because we know there won't be any more match
			
 
				             # due to the alphabetical ordering.
			
 
				-            if ctx.src_tk[0] > cur_char:
			
 
				+            if ctx.src_tk.content[0] > cur_char:
			
 
				                 logger.debug(
			
 
				-                        f"{ctx.src_tk} is after "
			
 
				+                        f"{ctx.src_tk.content} is after "
			
 
				                         f"{ctx.src[ctx.cur:ctx.cur + step]}. Breaking loop.")
			
 
				                 break
			
 
				 
			
 
				+            # If src_tk has a WB flag but the token is not at WB, skip.
			
 
				+            if (
			
 
				+                (ctx.src_tk.flags & BOW and not ctx.cur_flags & BOW)
			
 
				+                or
			
 
				+                # Can't rely on EOW flag, we must check on the last character
			
 
				+                # of the potential match.
			
 
				+                (ctx.src_tk.flags & EOW and not _is_eow(
			
 
				+                        ctx.cur + step - 1, ctx, word_boundary))
			
 
				+            ):
			
 
				+                continue
			
 
				+
			
 
				             # Longer tokens should be guaranteed to be scanned before their
			
 
				             # substrings at this point.
			
 
				-            if ctx.src_tk == ctx.src[ctx.cur:ctx.cur + step]:
			
 
				+            # Similarly, flagged tokens are evaluated first.
			
 
				+            if ctx.src_tk.content == ctx.src[ctx.cur:ctx.cur + step]:
			
 
				                 ctx.match = True
			
 
				                 # This hook may skip this token or break out of the token
			
 
				                 # lookup for the current position.
			
@@ -256,20 +238,21 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
 
				                     or
			
 
				                     (
			
 
				                         ctx.options["capitalize"] == "all"
			
 
				-                        and ctx.cur_flags & CUR_BOW
			
 
				+                        and ctx.cur_flags & BOW
			
 
				                     )
			
 
				                 ):
			
 
				                     logger.info("Capitalizing token.")
			
 
				                     double_cap = False
			
 
				                     for dcap_rule in ctx.langsec.get("double_cap", []):
			
 
				-                        if ctx.dest_tk == dcap_rule:
			
 
				-                            ctx.dest_tk = ctx.dest_tk.upper()
			
 
				+                        if ctx.dest_str == dcap_rule:
			
 
				+                            ctx.dest_str = ctx.dest_str.upper()
			
 
				                             double_cap = True
			
 
				                             break
			
 
				                     if not double_cap:
			
 
				-                        ctx.dest_tk = ctx.dest_tk[0].upper() + ctx.dest_tk[1:]
			
 
				+                        ctx.dest_str = (
			
 
				+                                ctx.dest_str[0].upper() + ctx.dest_str[1:])
			
 
				 
			
 
				-                ctx.dest_ls.append(ctx.dest_tk)
			
 
				+                ctx.dest_ls.append(ctx.dest_str)
			
 
				                 ctx.cur += step
			
 
				                 break
			
 
				 
			
@@ -320,6 +303,18 @@ def _normalize_src(ctx):
 
				     logger.debug(f"Normalized source: {ctx.src}")
			
 
				 
			
 
				 
			
 
				+def _is_bow(cur, ctx, word_boundary):
			
 
				+    return (cur == 0 or ctx.src[cur - 1] in word_boundary) and (
			
 
				+            ctx.src[cur] not in word_boundary)
			
 
				+
			
 
				+
			
 
				+def _is_eow(cur, ctx, word_boundary):
			
 
				+    return (
			
 
				+        cur == len(ctx.src) - 1
			
 
				+        or ctx.src[cur + 1] in word_boundary
			
 
				+    ) and (ctx.src[cur] not in word_boundary)
			
 
				+
			
 
				+
			
 
				 def _run_hook(hname, ctx, hooks):
			
 
				     ret = None
			
 
				     for hook_def in hooks.get(hname, []):
			
--- a/tests/data/script_samples/greek.csv
+++ b/tests/data/script_samples/greek.csv
@@ -29,14 +29,14 @@ greek_modern,Ορθόδοξος Αυτοκέφαλος Εκκλησία της
 
				 greek_modern,Βίος και πολιτεία του Αλέξη Ζορμπά,Vios kai politeia tou Alexē Zormpa,,
			
 
				 greek_modern,Βίος καὶ πολιτεία τοῦ Ἀλέξη Ζορμπᾶ,Vios kai politeia tou Alexē Zormpa,,
			
 
				 greek_modern,Λασκαρίνα Μπουμπουλίνα,Laskarina Boumpoulina,,
			
 
				-greek_modern,Νταίηβιντ Μίτσελ,D̲aiēvint Mitsel,,
			
 
				+greek_modern,Νταίηβιντ Μίτσελ,Ḏaiēvint Mitsel,,
			
 
				 greek_modern,Τζαίημς Τζόυς,Tzaiēms Tzoys,,
			
 
				 greek_modern,Ἡ κοινωνιολογία τοῦ ρεμπέτικου,Hē koinōniologia tou rempetikou,,
			
 
				 greek_modern,Βίλλυ Μπραντ,Villy Brant,,
			
 
				 greek_modern,Μπραντ Πιτ,Brant Pit,,
			
 
				 greek_modern,Γιάκομπ Φίλιπ Φαλμεράυερ,Giakomp Philip Phalmerayer,,
			
 
				 greek_modern,Σαρλ Ογκουστίν ντε Κουλόμπ,Sarl Onkoustin de Koulomp,,
			
 
				-greek_modern,Λαμπέρτο Ντίνι,Lamperto D̲ini,,
			
 
				+greek_modern,Λαμπέρτο Ντίνι,Lamperto Ḏini,,
			
 
				 greek_modern,Τζωρτζ Χέρμπερτ Ουώκερ Μπους,Tzōrtz Chermpert Ouōker Bous,,
			
 
				 greek_modern,Ουίνστων Τσώρτσιλ,Ouinstōn Tsōrtsil,,
			
 
				 greek_modern,Παγκόσμιο Κέντρο Εμπορίου,Pankosmio Kentro Emporiou,,
			
@@ -45,7 +45,7 @@ greek_modern,Γκέτεμποργκ,Gketemporgk,,
 
				 greek_modern,Ουάσιγκτον,Ouasinkton,,
			
 
				 greek_modern,Ουάσινγκτον,Ouasinnkton,,
			
 
				 greek_modern,Αεροδρόμιο Ρόναλντ Ρέιγκαν της Ουάσινγκτον,Aerodromio Ronalnt Reinkan tēs Ouasinnkton,,
			
 
				-greek_modern,Ντμίτρι Ιβάνοβιτς Μεντελέγιεφ,D̲mitri Ivanovits Mentelegieph,,
			
 
				+greek_modern,Ντμίτρι Ιβάνοβιτς Μεντελέγιεφ,Ḏmitri Ivanovits Mentelegieph,,
			
 
				 greek_modern,Άγγελος Σταύρου Βλάχος,Angelos Staurou Vlachos,,
			
 
				 greek_modern,ΟΔΗΓΟΣ ΜΑΡΚΕΤΙΝΓΚ ΕΛΛΑΔΟΣ / Ἑλληνικό Ἰνστιτοῦτο Μάρκετινγκ τῆς Ἑλληνικῆς Ἑταιρίας Διοικήσεως Ἐπιχειρήσεων,Hodēgos marketingk Hellados / Hellēniko Institouto Marketingk tēs Hellēnikēs Hetairias Dioikēseōs Epicheirēseōn,,
			
 
				 greek_modern,Σάλπιγξ Ἑλληνική,Salpinx Hellēnikē,,