Browse Source

Add warnings to context object.

scossu 1 year ago
parent
commit
a73f904729
2 changed files with 19 additions and 13 deletions
  1. 5 0
      doc/hooks.md
  2. 14 13
      scriptshifter/trans.py

+ 5 - 0
doc/hooks.md

@@ -123,6 +123,11 @@ The following members of the context object are available in all the hooks:
 - `ctx.langsec`: language section (S2R or R2S) of configuration.
 - `ctx.options`: language-specific options defined in configuration and set
     at the beginning of the request.
+- `ctx.warnings`: list of warnings issued during the process. They will be
+  output in the return value of the `transliterate()` function. Normally
+  this function does not return an error if a malformed string was provided;
+  rather, it may return an empty string and some warnings about the issues
+  found with the input.
 
 Other members are available in different hooks. See the individual hooks
 reference below.

+ 14 - 13
scriptshifter/trans.py

@@ -46,6 +46,7 @@ class Context:
         self.options = options
         self.langsec = langsec
         self.dest_ls = []
+        self.warnings = []
 
 
 def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
@@ -107,28 +108,28 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
     # This hook may take over the whole transliteration process or delegate it
     # to some external process, and return the output string directly.
     if _run_hook("post_config", ctx, langsec_hooks) == BREAK:
-        return getattr(ctx, "dest", ""), getattr(ctx, "warnings", [])
+        return getattr(ctx, "dest", ""), ctx.warnings
 
     # Loop through source characters. The increment of each loop depends on
     # the length of the token that eventually matches.
     ignore_list = langsec.get("ignore", [])  # Only present in R2S
     ctx.cur = 0
     word_boundary = langsec.get("word_boundary", WORD_BOUNDARY)
-    while ctx.cur < len(src):
+    while ctx.cur < len(ctx.src):
         # Reset cursor position flags.
         # Carry over extended "beginning of word" flag.
         ctx.cur_flags = 0
-        cur_char = src[ctx.cur]
+        cur_char = ctx.src[ctx.cur]
 
         # Look for a word boundary and flag word beginning/end it if found.
-        if (ctx.cur == 0 or src[ctx.cur - 1] in word_boundary) and (
+        if (ctx.cur == 0 or ctx.src[ctx.cur - 1] in word_boundary) and (
                 cur_char not in word_boundary):
             # Beginning of word.
             logger.debug(f"Beginning of word at position {ctx.cur}.")
             ctx.cur_flags |= CUR_BOW
         if (
-            ctx.cur == len(src) - 1
-            or src[ctx.cur + 1] in word_boundary
+            ctx.cur == len(ctx.src) - 1
+            or ctx.src[ctx.cur + 1] in word_boundary
         ) and (cur_char not in word_boundary):
             # Beginning of word.
             # End of word.
@@ -158,7 +159,7 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
                     continue
 
                 step = len(ctx.tk)
-                if ctx.tk == src[ctx.cur:ctx.cur + step]:
+                if ctx.tk == ctx.src[ctx.cur:ctx.cur + step]:
                     # The position matches an ignore token.
                     hret = _run_hook("on_ignore_match", ctx, langsec_hooks)
                     if hret == BREAK:
@@ -197,13 +198,13 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
             # due to the alphabetical ordering.
             if ctx.src_tk[0] > cur_char:
                 logger.debug(
-                        f"{ctx.src_tk} is after {src[ctx.cur:ctx.cur + step]}."
-                        " Breaking loop.")
+                        f"{ctx.src_tk} is after "
+                        f"{ctx.src[ctx.cur:ctx.cur + step]}. Breaking loop.")
                 break
 
             # Longer tokens should be guaranteed to be scanned before their
             # substrings at this point.
-            if ctx.src_tk == src[ctx.cur:ctx.cur + step]:
+            if ctx.src_tk == ctx.src[ctx.cur:ctx.cur + step]:
                 ctx.match = True
                 # This hook may skip this token or break out of the token
                 # lookup for the current position.
@@ -263,7 +264,7 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
     # its own return value.
     hret = _run_hook("pre_assembly", ctx, langsec_hooks)
     if hret is not None:
-        return hret, getattr(ctx, "warnings", [])
+        return hret, ctx.warnings
 
     logger.debug(f"Output list: {ctx.dest_ls}")
     ctx.dest = "".join(ctx.dest_ls)
@@ -272,12 +273,12 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
     # return it immediately.
     hret = _run_hook("post_assembly", ctx, langsec_hooks)
     if hret == "ret":
-        return ctx.dest, getattr(ctx, "warnings", [])
+        return ctx.dest, ctx.warnings
 
     # Strip multiple spaces and leading/trailing whitespace.
     ctx.dest = re.sub(MULTI_WS_RE, ' ', ctx.dest.strip())
 
-    return ctx.dest, getattr(ctx, "warnings", [])
+    return ctx.dest, ctx.warnings
 
 
 def _run_hook(hname, ctx, hooks):