1 year ago · 5aeb7a8705
--- a/TODO.md
+++ b/TODO.md
@@ -10,22 +10,22 @@ discussion, etc.); *X* = not implementing.
 
				 - *D* Basic transliteration in both directions
			
 
				 - *D* Basic REST API
			
 
				 - *D* Basic UI
			
 
				-- *W* Life cycle hooks for plugins
			
 
				+- *D* Life cycle hooks for plugins
			
 
				 - *P* Regular expressions in ignore lists
			
 
				 - *P* Word boundaries (design)
			
 
				 - *D* API documentation
			
 
				 - *P* Config file documentation
			
 
				-- *W* Hooks documentation
			
 
				+- *D* Hooks documentation
			
 
				 - *W* Complete conversion of existing tables to YAML
			
 
				   - *P* Arabic
			
 
				   - *P* Armenian
			
 
				   - *P* Azerbajani
			
 
				   - *D* Belarusian
			
 
				-  - *P* Bulgarian
			
 
				+  - *D* Bulgarian
			
 
				   - *D* Chinese
			
 
				   - *P* Ethiopic
			
 
				   - *P* Georgian
			
 
				-  - *P* Greek
			
 
				+  - *W* Greek
			
 
				   - *P* Hebrew and Yiddish
			
 
				   - *X* Japanese
			
 
				   - *P* Kazakh
			
@@ -35,8 +35,8 @@ discussion, etc.); *X* = not implementing.
 
				   - *P* Persian
			
 
				   - *P* Pushto
			
 
				   - *D* Russian
			
 
				-  - *P* Serbian
			
 
				-  - *P* Slavonic
			
 
				+  - *D* Serbian + Macedonic
			
 
				+  - *D* Slavonic
			
 
				   - *P* Tajik
			
 
				   - *P* Tatar
			
 
				   - *P* Thaana
			
--- a/transliterator/exceptions.py
+++ b/transliterator/exceptions.py
@@ -0,0 +1,9 @@
 
				+__doc__ = """ Exceptions and special return codes. """
			
 
				+
			
 
				+BREAK = "__break"
			
 
				+CONT = "__continue"
			
 
				+
			
 
				+
			
 
				+class ConfigError(Exception):
			
 
				+    """ Raised when a malformed configuration is detected. """
			
 
				+    pass
			
--- a/transliterator/tables/__init__.py
+++ b/transliterator/tables/__init__.py
@@ -10,6 +10,8 @@ try:
 
				 except ImportError:
			
 
				     from yaml import Loader
			
 
				 
			
 
				+from transliterator.exceptions import ConfigError
			
 
				+
			
 
				 
			
 
				 __doc__ = """
			
 
				 Transliteration tables.
			
@@ -39,10 +41,6 @@ HOOK_PKG_PATH = "transliterator.hooks"
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 
			
 
				-class ConfigError(Exception):
			
 
				-    """ Raised when a malformed configuration is detected. """
			
 
				-
			
 
				-
			
 
				 class Token(str):
			
 
				     """
			
 
				     Token class: minimal unit of text parsing.
			
--- a/transliterator/trans.py
+++ b/transliterator/trans.py
@@ -1,6 +1,7 @@
 
				 import logging
			
 
				 import re
			
 
				 
			
 
				+from transliterator.exceptions import BREAK, CONT
			
 
				 from transliterator.tables import load_table
			
 
				 
			
 
				 
			
@@ -73,7 +74,10 @@ def transliterate(src, lang, r2s=False):
 
				 
			
 
				     ctx = Context(src, general, langsec)
			
 
				 
			
 
				-    _run_hook("post_config", ctx, langsec_hooks)
			
 
				+    # This hook may take over the whole transliteration process or delegate it
			
 
				+    # to some external process, and return the output string directly.
			
 
				+    if _run_hook("post_config", ctx, langsec_hooks) == BREAK:
			
 
				+        return getattr(ctx, "dest", "")
			
 
				 
			
 
				     # Loop through source characters. The increment of each loop depends on
			
 
				     # the length of the token that eventually matches.
			
@@ -83,10 +87,10 @@ def transliterate(src, lang, r2s=False):
 
				         # This hook may skip the parsing of the current
			
 
				         # token or exit the scanning loop altogether.
			
 
				         hret = _run_hook("begin_input_token", ctx, langsec_hooks)
			
 
				-        if hret == "break":
			
 
				+        if hret == BREAK:
			
 
				             logger.debug("Breaking text scanning from hook signal.")
			
 
				             break
			
 
				-        if hret == "continue":
			
 
				+        if hret == CONT:
			
 
				             logger.debug("Skipping scanning iteration from hook signal.")
			
 
				             continue
			
 
				 
			
@@ -97,18 +101,18 @@ def transliterate(src, lang, r2s=False):
 
				             ctx.ignoring = False
			
 
				             for ctx.tk in ignore_list:
			
 
				                 hret = _run_hook("pre_ignore_token", ctx, langsec_hooks)
			
 
				-                if hret == "break":
			
 
				+                if hret == BREAK:
			
 
				                     break
			
 
				-                if hret == "continue":
			
 
				+                if hret == CONT:
			
 
				                     continue
			
 
				 
			
 
				                 step = len(ctx.tk)
			
 
				                 if ctx.tk == src[ctx.cur:ctx.cur + step]:
			
 
				                     # The position matches an ignore token.
			
 
				                     hret = _run_hook("on_ignore_match", ctx, langsec_hooks)
			
 
				-                    if hret == "break":
			
 
				+                    if hret == BREAK:
			
 
				                         break
			
 
				-                    if hret == "continue":
			
 
				+                    if hret == CONT:
			
 
				                         continue
			
 
				 
			
 
				                     logger.info(f"Ignored token: {ctx.tk}")
			
@@ -129,9 +133,9 @@ def transliterate(src, lang, r2s=False):
 
				         ctx.match = False
			
 
				         for ctx.src_tk, ctx.dest_tk in langsec["map"]:
			
 
				             hret = _run_hook("pre_tx_token", ctx, langsec_hooks)
			
 
				-            if hret == "break":
			
 
				+            if hret == BREAK:
			
 
				                 break
			
 
				-            if hret == "continue":
			
 
				+            if hret == CONT:
			
 
				                 continue
			
 
				 
			
 
				             # Longer tokens should be guaranteed to be scanned before their
			
@@ -142,9 +146,9 @@ def transliterate(src, lang, r2s=False):
 
				                 # This hook may skip this token or break out of the token
			
 
				                 # lookup for the current position.
			
 
				                 hret = _run_hook("on_tx_token_match", ctx, langsec_hooks)
			
 
				-                if hret == "break":
			
 
				+                if hret == BREAK:
			
 
				                     break
			
 
				-                if hret == "continue":
			
 
				+                if hret == CONT:
			
 
				                     continue
			
 
				 
			
 
				                 # A match is found. Stop scanning tokens, append result, and
			
@@ -156,9 +160,9 @@ def transliterate(src, lang, r2s=False):
 
				         if ctx.match is False:
			
 
				             delattr(ctx, "match")
			
 
				             hret = _run_hook("on_no_tx_token_match", ctx, langsec_hooks)
			
 
				-            if hret == "break":
			
 
				+            if hret == BREAK:
			
 
				                 break
			
 
				-            if hret == "continue":
			
 
				+            if hret == CONT:
			
 
				                 continue
			
 
				 
			
 
				             # No match found. Copy non-mapped character (one at a time).
			
@@ -201,7 +205,7 @@ def _run_hook(hname, ctx, hooks):
 
				     for hook_def in hooks.get(hname, []):
			
 
				         kwargs = hook_def[1] if len(hook_def) > 1 else {}
			
 
				         ret = hook_def[0](ctx, **kwargs)
			
 
				-        if ret in ("break", "cont"):
			
 
				+        if ret in (BREAK, CONT):
			
 
				             # This will stop parsing hooks functions and tell the caller to
			
 
				             # break out of the outer loop or skip iteration.
			
 
				             return ret