Browse Source

Complete basic tests for regex ignore.

scossu 6 months ago
parent
commit
91f3ad98ab
3 changed files with 37 additions and 7 deletions
  1. 26 4
      scriptshifter/trans.py
  2. 2 2
      tests/data/script_samples/unittest.csv
  3. 9 1
      tests/tables/data/regex.yml

+ 26 - 4
scriptshifter/trans.py

@@ -1,7 +1,7 @@
 import logging
 
 from importlib import import_module
-from re import compile
+from re import Pattern, compile
 
 from scriptshifter.exceptions import BREAK, CONT
 from scriptshifter.tables import (
@@ -152,7 +152,7 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
             # token or exit the scanning loop altogether.
             hret = _run_hook("begin_input_token", ctx)
             if hret == BREAK:
-                Logger.debug("Breaking text scanning from hook signal.")
+                logger.debug("Breaking text scanning from hook signal.")
                 break
             if hret == CONT:
                 logger.debug("Skipping scanning iteration from hook signal.")
@@ -170,8 +170,21 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
                     if hret == CONT:
                         continue
 
-                    step = len(ctx.tk)
-                    if ctx.tk == ctx.src[ctx.cur:ctx.cur + step]:
+                    _matching = False
+                    if type(ctx.tk) is Pattern:
+                        # Seach RE pattern beginning at cursor.
+                        if _ptn_match := ctx.tk.match(ctx.src[ctx.cur:]):
+                            ctx.tk = _ptn_match[0]
+                            logger.debug(f"Matched regex: {ctx.tk}")
+                            step = len(ctx.tk)
+                            _matching = True
+                    else:
+                        # Search exact match.
+                        step = len(ctx.tk)
+                        if ctx.tk == ctx.src[ctx.cur:ctx.cur + step]:
+                            _matching = True
+
+                    if _matching:
                         # The position matches an ignore token.
                         hret = _run_hook("on_ignore_match", ctx)
                         if hret == BREAK:
@@ -182,6 +195,12 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
                         logger.info(f"Ignored token: {ctx.tk}")
                         ctx.dest_ls.append(ctx.tk)
                         ctx.cur += step
+                        if ctx.cur >= len(ctx.src):
+                            # reached end of string. Stop ignoring.
+                            # The outer loop will exit imediately after.
+                            ctx.ignoring = False
+                            break
+
                         cur_char = ctx.src[ctx.cur]
                         ctx.ignoring = True
                         break
@@ -194,6 +213,9 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
             delattr(ctx, "tk")
             delattr(ctx, "ignoring")
 
+            if ctx.cur >= len(ctx.src):
+                break
+
             # Begin transliteration token lookup.
             ctx.match = False
 

+ 2 - 2
tests/data/script_samples/unittest.csv

@@ -4,5 +4,5 @@ rot3,defg,abcd,,
 rot3,HIJK,KLMN,"{""t_dir"": ""r2s""}",
 rot3,st uv,Vw xy,"{""t_dir"": ""r2s"", ""capitalize"": ""first""}",
 rot3,st uv,Vw Xy,"{""t_dir"": ""r2s"", ""capitalize"": ""all""}",
-regex,Hello abc,Hello 907,"{""t_dir"": ""r2s""}",
-regex,Hollo abc,Hollo 907,"{""t_dir"": ""r2s""}",
+regex,Hello abc,Hello 678,"{""t_dir"": ""r2s""}",
+regex,Hullo abc,5u22o 678,"{""t_dir"": ""r2s""}",

+ 9 - 1
tests/tables/data/regex.yml

@@ -8,4 +8,12 @@ general:
 
 roman_to_script:
   ignore_ptn:
-    - "[hH][aeu]llo"
+    - "[hH][ae]llo"
+
+  map:
+    "h": "1"
+    "H": "5"
+    "l": "2"
+    "a": "6"
+    "b": "7"
+    "c": "8"