3 years ago · 19063a2efa
--- a/TODO.md
+++ b/TODO.md
@@ -14,16 +14,20 @@ discussion, etc.); *X* = not implementing.
 
				 - *D* Basic REST API
			
 
				 - *D* Basic UI
			
 
				 - *D* Life cycle hooks for plugins
			
 
				-- *W* Regular expressions in ignore lists
			
 
				+- *B* Regular expressions in ignore lists
			
 
				 - *W* Word boundaries (design)
			
 
				   - *B* Define word boundary characters
			
 
				   - *D* Mark end-of-word and beginning-of-word characters
			
 
				-- *P* Optimize token lookup
			
 
				-  - *P* Break loop early based on alphabetical order
			
 
				-  - *P* Ignore word break characters
			
 
				+- *B* Optimize token lookup
			
 
				+  - *D* Break loop early based on alphabetical order
			
 
				+  - *B* Ignore word break characters
			
 
				 - *D* API documentation
			
 
				-- *P* Config file documentation
			
 
				+- *D* Config file documentation
			
 
				 - *D* Hooks documentation
			
 
				+- *P* Tests
			
 
				+  - *P* Config parsing
			
 
				+  - *P* Transliteration
			
 
				+  - *P* REST API
			
 
				 - *W* Complete conversion of existing tables to YAML
			
 
				   - *P* Arabic
			
 
				   - *P* Armenian
			
--- a/transliterator/trans.py
+++ b/transliterator/trans.py
@@ -11,7 +11,7 @@ MULTI_WS_RE = re.compile(r"\s{2,}")
 
				 # per-table.
			
 
				 WORD_BOUNDARY = " \n\t:;.,\"'"
			
 
				 
			
 
				-# Cursor flags.
			
 
				+# Cursor bitwise flags.
			
 
				 CUR_BOW = 1
			
 
				 CUR_EOW = 2
			
 
				 
			
@@ -157,9 +157,20 @@ def transliterate(src, lang, r2s=False):
 
				             if hret == CONT:
			
 
				                 continue
			
 
				 
			
 
				+            step = len(ctx.src_tk)
			
 
				+
			
 
				+            # If the first character of the token is greater (= higher code
			
 
				+            # point value) than the current character, then break the loop
			
 
				+            # without a match, because we know there won't be any more match
			
 
				+            # due to the alphabetical ordering.
			
 
				+            if ctx.src_tk[0] > src[ctx.cur]:
			
 
				+                logger.debug(
			
 
				+                        f"{ctx.src_tk} is after {src[ctx.cur:ctx.cur + step]}."
			
 
				+                        " Breaking loop.")
			
 
				+                break
			
 
				+
			
 
				             # Longer tokens should be guaranteed to be scanned before their
			
 
				             # substrings at this point.
			
 
				-            step = len(ctx.src_tk)
			
 
				             if ctx.src_tk == src[ctx.cur:ctx.cur + step]:
			
 
				                 ctx.match = True
			
 
				                 # This hook may skip this token or break out of the token
			
@@ -186,7 +197,7 @@ def transliterate(src, lang, r2s=False):
 
				 
			
 
				             # No match found. Copy non-mapped character (one at a time).
			
 
				             logger.info(
			
 
				-                    f"Token {src[ctx.cur]} (\\u{hex(ord(src[ctx.cur]))[2:]})"
			
 
				+                    f"Token {src[ctx.cur]} (\\u{hex(ord(src[ctx.cur]))[2:]}) "
			
 
				                     f"at position {ctx.cur} is not mapped.")
			
 
				             ctx.dest_ls.append(src[ctx.cur])
			
 
				             ctx.cur += 1