Преглед изворни кода

Implement working ROT3 test.

Stefano Cossu пре 2 година
родитељ
комит
0a60c96b12

+ 12 - 12
doc/hooks.md

@@ -148,12 +148,12 @@ This hook is run at the beginning of each iteration of the input parsing loop.
 
 #### Return
 
-Possible values are `"cont"`, `"break"`, or `None`. If `None` is
-returned, the parsing proceeds as normal. `"cont"` causes the application to
-skip the parsing of the current token. `"break"` interrupts the text scanning
-and proceeds directly to handling the result list for output. **CAUTION**: when
-returning "cont", it is the responsibility of the function to advance `ctx.cur`
-so that the loop doesn't become an infinite one. 
+Possible values are `"continue"`, `"break"`, or `None`. If `None` is returned,
+the parsing proceeds as normal. `"continue"` causes the application to skip the
+parsing of the current token. `"break"` interrupts the text scanning and
+proceeds directly to handling the result list for output. **CAUTION**: when
+returning "continue", it is the responsibility of the function to advance
+`ctx.cur` so that the loop doesn't become an infinite one. 
 
 ### `pre_ignore_token`
 
@@ -169,9 +169,9 @@ Run before each ignore token is compared with the input.
 
 #### Output
 
-`"cont"`, `"break"`, or `None`. `"cont"` skips the checks on the
+`"continue"`, `"break"`, or `None`. `"continue"` skips the checks on the
 current ignore token. `"break"` stops looking up ignore tokens for the current
-position. This function can return `"cont"` without advancing the cursor and
+position. This function can return `"continue"` without advancing the cursor and
 without causing an infinite loop.
 
 ### `on_ignore_match`
@@ -191,7 +191,7 @@ Run when an ignore token matches.
 
 #### Output
 
-`"cont"`, `"break"`, or `None`. `"cont"` voids the match and keeps
+`"continue"`, `"break"`, or `None`. `"continue"` voids the match and keeps
 on looking up the ignore list. `"break"` stops looking up ignore tokens for the
 current position. See cautionary note on `begin_input_token`.
 
@@ -211,7 +211,7 @@ Run before comparing each transliteration token with the current text.
 
 #### Output
 
-`"cont"`, `"break"`, or `None`. `"cont"` skips the checks on the
+`"continue"`, `"break"`, or `None`. `"continue"` skips the checks on the
 current token. `"break"` stops looking up all tokens for the current
 position. See cautionary note on `begin_input_token`.
 
@@ -234,7 +234,7 @@ Run when a transliteration token matches the input.
 
 #### Output
 
-`"cont"`, `"break"`, or `None`. `"cont"` voids the match and keeps
+`"continue"`, `"break"`, or `None`. `"continue"` voids the match and keeps
 on looking up the token list. `"break"` stops looking up tokens for the
 current position and effectively reports a non-match.
 
@@ -253,7 +253,7 @@ been found.
 
 #### Output
 
-`"cont"`, `"break"`, or `None`. `"cont"` skips to the next
+`"continue"`, `"break"`, or `None`. `"continue"` skips to the next
 position in the input text. Int his case, the function **must** advance the
 cursor. `"break"` stops all text parsing and proceeds to the assembly of the
 output.

+ 38 - 0
transliterator/hooks/test.py

@@ -0,0 +1,38 @@
+import logging
+
+
+__doc__ = """ Test hook functions. """
+
+
+logger = logging.getLogger(__name__)
+
+
+def rotate(ctx, n):
+    """
+    Simple character rotation.
+
+    Implements the Caesar's Cypher algorithm by shifting a single
+    [A-Za-z] character by `n` places, and wrapping around
+    the edges.
+
+    Characters not in range are not shifted.
+    """
+    uc = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    lc = uc.lower()
+    logger.debug(f"cursor: {ctx.cur}")
+
+    ch = ctx.src[ctx.cur]
+    if ch in uc:
+        idx = uc.index(ch)
+        dest_ch = uc[(idx + n) % len(uc)]
+    elif ch in lc:
+        idx = lc.index(ch)
+        dest_ch = lc[(idx + n) % len(lc)]
+    else:
+        dest_ch = ch
+    logger.debug(f"ROT {n}: {ch} -> {dest_ch}")
+
+    ctx.dest_ls.append(dest_ch)
+    ctx.cur += 1
+
+    return "continue"

+ 9 - 1
transliterator/rest_api.py

@@ -1,3 +1,4 @@
+from copy import deepcopy
 from os import environ
 
 from flask import Flask, Response, jsonify, render_template, request
@@ -45,7 +46,14 @@ def dump_table(lang):
     """
     Dump parsed transliteration table for a language.
     """
-    return jsonify(load_table(lang))
+    tbl = deepcopy(load_table(lang))
+    for sec_name in ("roman_to_script", "script_to_roman"):
+        if sec_name in tbl:
+            for hname, fn_defs in tbl[sec_name].get("hooks", {}).items():
+                tbl[sec_name]["hooks"][hname] = [
+                        (fn.__name__, kw) for (fn, kw) in fn_defs]
+
+    return jsonify(tbl)
 
 
 @app.route("/transliterate", methods=["POST"])

+ 9 - 7
transliterator/tables/__init__.py

@@ -1,6 +1,7 @@
 import logging
 
 from functools import cache
+from importlib import import_module
 from os import path, access, R_OK
 
 from yaml import load
@@ -132,7 +133,7 @@ def load_table(tname):
 
         if "hooks" in tdata["script_to_roman"]:
             tdata["script_to_roman"]["hooks"] = load_hook_fn(
-                    tdata["script_to_roman"]["hooks"])
+                    tname, tdata["script_to_roman"])
 
     if "roman_to_script" in tdata:
         tokens = {
@@ -164,7 +165,7 @@ def load_table(tname):
 
         if "hooks" in tdata["roman_to_script"]:
             tdata["roman_to_script"]["hooks"] = load_hook_fn(
-                    tdata["roman_to_script"]["hooks"])
+                    tname, tdata["script_to_roman"])
 
     return tdata
 
@@ -183,7 +184,7 @@ def load_hook_fn(cname, sec):
         dict: Dictionary of hook name and list of hook functions pairs.
     """
     hook_fn = {}
-    for cfg_hook, cfg_hook_fns in sec.get("hooks", {}):
+    for cfg_hook, cfg_hook_fns in sec.get("hooks", {}).items():
         if cfg_hook not in HOOKS:
             raise ConfigError(f"{cfg_hook} is not a valid hook name!")
 
@@ -191,16 +192,17 @@ def load_hook_fn(cname, sec):
         # There may be more than one function in each hook. They are
         # executed in the order they are found.
         for cfg_hook_fn in cfg_hook_fns:
-            modname, fnname = path.splitext(cfg_hook_fn)
+            modname, fnname = path.splitext(cfg_hook_fn[0])
             fnname = fnname.lstrip(".")
+            fn_kwargs = cfg_hook_fn[1]
             try:
-                fn = import_module(
-                        "." + modname, HOOK_PKG_PATH).getattr(fnname)
+                fn = getattr(import_module(
+                        "." + modname, HOOK_PKG_PATH), fnname)
             except NameError:
                 raise ConfigError(
                     f"Hook function {fnname} defined in {cname} configuration "
                     f"not found in module {HOOK_PKG_PATH}.{modname}!"
                 )
-            hook_fn[cfg_hook].append(fn)
+            hook_fn[cfg_hook].append((fn, fn_kwargs))
 
     return hook_fn

+ 73 - 0
transliterator/tables/data/rot3.yml

@@ -0,0 +1,73 @@
+# Caesar Cypher: shift a letter by 3 places to the right.
+# This configuration is only used to test features.
+#
+# Ironically, the "script" side is actually Latin (Roman) in the
+# original context, as this was a cryptography method
+# used by Julius Caesar. 
+
+general:
+  name: ROT3 (Caesar Cypher)
+
+roman_to_script:
+  map:
+    "A": "D"
+    "B": "E"
+    "C": "F"
+    "D": "G"
+    "E": "H"
+    "F": "I"
+    "G": "J"
+    "H": "K"
+    "I": "L"
+    "J": "M"
+    "K": "N"
+    "L": "O"
+    "M": "P"
+    "N": "Q"
+    "O": "R"
+    "P": "S"
+    "Q": "T"
+    "R": "U"
+    "S": "V"
+    "T": "W"
+    "U": "X"
+    "V": "Y"
+    "W": "Z"
+    "X": "A"
+    "Y": "B"
+    "Z": "C"
+    "a": "d"
+    "b": "e"
+    "c": "f"
+    "d": "g"
+    "e": "h"
+    "f": "i"
+    "g": "j"
+    "h": "k"
+    "i": "l"
+    "j": "m"
+    "k": "n"
+    "l": "o"
+    "m": "p"
+    "n": "q"
+    "o": "r"
+    "p": "s"
+    "q": "t"
+    "r": "u"
+    "s": "v"
+    "t": "w"
+    "u": "x"
+    "v": "y"
+    "w": "z"
+    "x": "a"
+    "y": "b"
+    "z": "c"
+
+script_to_roman:
+  # This does the opposite of roman to script, but by using hook functions.
+  # Note the absence of a "map" section.
+  hooks:
+    begin_input_token:
+      -
+        - test.rotate
+        - n: -3

+ 6 - 6
transliterator/trans.py

@@ -15,9 +15,6 @@ class Context:
     """
     Context used within the transliteration and passed to hook functions.
     """
-    cur = 0  # Input text cursor.
-    dest_ls = []  # Token list making up the output string.
-
     def __init__(self, src, general, langsec):
         """
         Initialize a context.
@@ -30,6 +27,7 @@ class Context:
         self.src = src
         self.general = general
         self.langsec = langsec
+        self.dest_ls = []
 
 
 def transliterate(src, lang, r2s=False):
@@ -80,14 +78,18 @@ def transliterate(src, lang, r2s=False):
     # Loop through source characters. The increment of each loop depends on
     # the length of the token that eventually matches.
     ignore_list = langsec.get("ignore", [])  # Only present in R2S
+    ctx.cur = 0
     while ctx.cur < len(src):
         # This hook may skip the parsing of the current
         # token or exit the scanning loop altogether.
         hret = _run_hook("begin_input_token", ctx, langsec_hooks)
         if hret == "break":
+            logger.debug("Breaking text scanning from hook signal.")
             break
         if hret == "continue":
+            logger.debug("Skipping scanning iteration from hook signal.")
             continue
+
         # Check ignore list first. Find as many subsequent ignore tokens
         # as possible before moving on to looking for match tokens.
         ctx.tk = None
@@ -151,8 +153,6 @@ def transliterate(src, lang, r2s=False):
                 ctx.cur += step
                 break
 
-        delattr(ctx, "src_tk")
-        delattr(ctx, "dest_tk")
         if ctx.match is False:
             delattr(ctx, "match")
             hret = _run_hook("on_no_tx_token_match", ctx, langsec_hooks)
@@ -199,7 +199,7 @@ def transliterate(src, lang, r2s=False):
 def _run_hook(hname, ctx, hooks):
     ret = None
     for hook_def in hooks.get(hname, []):
-        kwargs = hook_def[1] if len(hook_def > 1) else {}
+        kwargs = hook_def[1] if len(hook_def) > 1 else {}
         ret = hook_def[0](ctx, **kwargs)
         if ret in ("break", "cont"):
             # This will stop parsing hooks functions and tell the caller to