Browse Source

Use constants for hook fn return values.

Stefano Cossu 1 year ago
parent
commit
5aeb7a8705
4 changed files with 35 additions and 24 deletions
  1. 6 6
      TODO.md
  2. 9 0
      transliterator/exceptions.py
  3. 2 4
      transliterator/tables/__init__.py
  4. 18 14
      transliterator/trans.py

+ 6 - 6
TODO.md

@@ -10,22 +10,22 @@ discussion, etc.); *X* = not implementing.
 - *D* Basic transliteration in both directions
 - *D* Basic REST API
 - *D* Basic UI
-- *W* Life cycle hooks for plugins
+- *D* Life cycle hooks for plugins
 - *P* Regular expressions in ignore lists
 - *P* Word boundaries (design)
 - *D* API documentation
 - *P* Config file documentation
-- *W* Hooks documentation
+- *D* Hooks documentation
 - *W* Complete conversion of existing tables to YAML
   - *P* Arabic
   - *P* Armenian
   - *P* Azerbajani
   - *D* Belarusian
-  - *P* Bulgarian
+  - *D* Bulgarian
   - *D* Chinese
   - *P* Ethiopic
   - *P* Georgian
-  - *P* Greek
+  - *W* Greek
   - *P* Hebrew and Yiddish
   - *X* Japanese
   - *P* Kazakh
@@ -35,8 +35,8 @@ discussion, etc.); *X* = not implementing.
   - *P* Persian
   - *P* Pushto
   - *D* Russian
-  - *P* Serbian
-  - *P* Slavonic
+  - *D* Serbian + Macedonic
+  - *D* Slavonic
   - *P* Tajik
   - *P* Tatar
   - *P* Thaana

+ 9 - 0
transliterator/exceptions.py

@@ -0,0 +1,9 @@
+__doc__ = """ Exceptions and special return codes. """
+
+BREAK = "__break"
+CONT = "__continue"
+
+
+class ConfigError(Exception):
+    """ Raised when a malformed configuration is detected. """
+    pass

+ 2 - 4
transliterator/tables/__init__.py

@@ -10,6 +10,8 @@ try:
 except ImportError:
     from yaml import Loader
 
+from transliterator.exceptions import ConfigError
+
 
 __doc__ = """
 Transliteration tables.
@@ -39,10 +41,6 @@ HOOK_PKG_PATH = "transliterator.hooks"
 logger = logging.getLogger(__name__)
 
 
-class ConfigError(Exception):
-    """ Raised when a malformed configuration is detected. """
-
-
 class Token(str):
     """
     Token class: minimal unit of text parsing.

+ 18 - 14
transliterator/trans.py

@@ -1,6 +1,7 @@
 import logging
 import re
 
+from transliterator.exceptions import BREAK, CONT
 from transliterator.tables import load_table
 
 
@@ -73,7 +74,10 @@ def transliterate(src, lang, r2s=False):
 
     ctx = Context(src, general, langsec)
 
-    _run_hook("post_config", ctx, langsec_hooks)
+    # This hook may take over the whole transliteration process or delegate it
+    # to some external process, and return the output string directly.
+    if _run_hook("post_config", ctx, langsec_hooks) == BREAK:
+        return getattr(ctx, "dest", "")
 
     # Loop through source characters. The increment of each loop depends on
     # the length of the token that eventually matches.
@@ -83,10 +87,10 @@ def transliterate(src, lang, r2s=False):
         # This hook may skip the parsing of the current
         # token or exit the scanning loop altogether.
         hret = _run_hook("begin_input_token", ctx, langsec_hooks)
-        if hret == "break":
+        if hret == BREAK:
             logger.debug("Breaking text scanning from hook signal.")
             break
-        if hret == "continue":
+        if hret == CONT:
             logger.debug("Skipping scanning iteration from hook signal.")
             continue
 
@@ -97,18 +101,18 @@ def transliterate(src, lang, r2s=False):
             ctx.ignoring = False
             for ctx.tk in ignore_list:
                 hret = _run_hook("pre_ignore_token", ctx, langsec_hooks)
-                if hret == "break":
+                if hret == BREAK:
                     break
-                if hret == "continue":
+                if hret == CONT:
                     continue
 
                 step = len(ctx.tk)
                 if ctx.tk == src[ctx.cur:ctx.cur + step]:
                     # The position matches an ignore token.
                     hret = _run_hook("on_ignore_match", ctx, langsec_hooks)
-                    if hret == "break":
+                    if hret == BREAK:
                         break
-                    if hret == "continue":
+                    if hret == CONT:
                         continue
 
                     logger.info(f"Ignored token: {ctx.tk}")
@@ -129,9 +133,9 @@ def transliterate(src, lang, r2s=False):
         ctx.match = False
         for ctx.src_tk, ctx.dest_tk in langsec["map"]:
             hret = _run_hook("pre_tx_token", ctx, langsec_hooks)
-            if hret == "break":
+            if hret == BREAK:
                 break
-            if hret == "continue":
+            if hret == CONT:
                 continue
 
             # Longer tokens should be guaranteed to be scanned before their
@@ -142,9 +146,9 @@ def transliterate(src, lang, r2s=False):
                 # This hook may skip this token or break out of the token
                 # lookup for the current position.
                 hret = _run_hook("on_tx_token_match", ctx, langsec_hooks)
-                if hret == "break":
+                if hret == BREAK:
                     break
-                if hret == "continue":
+                if hret == CONT:
                     continue
 
                 # A match is found. Stop scanning tokens, append result, and
@@ -156,9 +160,9 @@ def transliterate(src, lang, r2s=False):
         if ctx.match is False:
             delattr(ctx, "match")
             hret = _run_hook("on_no_tx_token_match", ctx, langsec_hooks)
-            if hret == "break":
+            if hret == BREAK:
                 break
-            if hret == "continue":
+            if hret == CONT:
                 continue
 
             # No match found. Copy non-mapped character (one at a time).
@@ -201,7 +205,7 @@ def _run_hook(hname, ctx, hooks):
     for hook_def in hooks.get(hname, []):
         kwargs = hook_def[1] if len(hook_def) > 1 else {}
         ret = hook_def[0](ctx, **kwargs)
-        if ret in ("break", "cont"):
+        if ret in (BREAK, CONT):
             # This will stop parsing hooks functions and tell the caller to
             # break out of the outer loop or skip iteration.
             return ret