3 vuotta sitten · c5de4bbd6c
--- a/TODO.md
+++ b/TODO.md
@@ -30,7 +30,7 @@ discussion, etc.); *X* = not implementing.
 
				 - *D* Hooks documentation
			
 
				 - *W* Tests
			
 
				   - *W* Config parsing
			
 
				-  - *P* Transliteration
			
 
				+  - *W* Transliteration
			
 
				   - *P* REST API
			
 
				 - *W* Complete conversion of existing tables to YAML
			
 
				   - *P* Arabic
			
--- a/transliterator/trans.py
+++ b/transliterator/trans.py
@@ -92,13 +92,17 @@ def transliterate(src, lang, r2s=False, capitalize=False):
 
				     while ctx.cur < len(src):
			
 
				         # Reset cursor position flags.
			
 
				         ctx.cur_flags = 0
			
 
				+        cur_char = src[ctx.cur]
			
 
				 
			
 
				         # Look for a word boundary and flag word beginning/end it if found.
			
 
				-        if ctx.cur == 0 or src[ctx.cur - 1] in WORD_BOUNDARY:
			
 
				+        if (ctx.cur == 0 or src[ctx.cur - 1] in WORD_BOUNDARY) and (
			
 
				+                cur_char not in WORD_BOUNDARY):
			
 
				             # Beginning of word.
			
 
				             logger.debug(f"Beginning of word at position {ctx.cur}.")
			
 
				             ctx.cur_flags |= CUR_BOW
			
 
				-        if ctx.cur == len(src) - 1 or src[ctx.cur + 1] in WORD_BOUNDARY:
			
 
				+        if (ctx.cur == len(src) - 1 or src[ctx.cur + 1] in WORD_BOUNDARY) and (
			
 
				+                cur_char not in WORD_BOUNDARY):
			
 
				+            # Beginning of word.
			
 
				             # End of word.
			
 
				             logger.debug(f"End of word at position {ctx.cur}.")
			
 
				             ctx.cur_flags |= CUR_EOW
			
@@ -163,7 +167,7 @@ def transliterate(src, lang, r2s=False, capitalize=False):
 
				             # point value) than the current character, then break the loop
			
 
				             # without a match, because we know there won't be any more match
			
 
				             # due to the alphabetical ordering.
			
 
				-            if ctx.src_tk[0] > src[ctx.cur]:
			
 
				+            if ctx.src_tk[0] > cur_char:
			
 
				                 logger.debug(
			
 
				                         f"{ctx.src_tk} is after {src[ctx.cur:ctx.cur + step]}."
			
 
				                         " Breaking loop.")
			
@@ -205,9 +209,9 @@ def transliterate(src, lang, r2s=False, capitalize=False):
 
				 
			
 
				             # No match found. Copy non-mapped character (one at a time).
			
 
				             logger.info(
			
 
				-                    f"Token {src[ctx.cur]} (\\u{hex(ord(src[ctx.cur]))[2:]}) "
			
 
				+                    f"Token {cur_char} (\\u{hex(ord(cur_char))[2:]}) "
			
 
				                     f"at position {ctx.cur} is not mapped.")
			
 
				-            ctx.dest_ls.append(src[ctx.cur])
			
 
				+            ctx.dest_ls.append(cur_char)
			
 
				             ctx.cur += 1
			
 
				         else:
			
 
				             delattr(ctx, "match")