|
@@ -92,13 +92,17 @@ def transliterate(src, lang, r2s=False, capitalize=False):
|
|
|
while ctx.cur < len(src):
|
|
|
|
|
|
ctx.cur_flags = 0
|
|
|
+ cur_char = src[ctx.cur]
|
|
|
|
|
|
|
|
|
- if ctx.cur == 0 or src[ctx.cur - 1] in WORD_BOUNDARY:
|
|
|
+ if (ctx.cur == 0 or src[ctx.cur - 1] in WORD_BOUNDARY) and (
|
|
|
+ cur_char not in WORD_BOUNDARY):
|
|
|
|
|
|
logger.debug(f"Beginning of word at position {ctx.cur}.")
|
|
|
ctx.cur_flags |= CUR_BOW
|
|
|
- if ctx.cur == len(src) - 1 or src[ctx.cur + 1] in WORD_BOUNDARY:
|
|
|
+ if (ctx.cur == len(src) - 1 or src[ctx.cur + 1] in WORD_BOUNDARY) and (
|
|
|
+ cur_char not in WORD_BOUNDARY):
|
|
|
+
|
|
|
|
|
|
logger.debug(f"End of word at position {ctx.cur}.")
|
|
|
ctx.cur_flags |= CUR_EOW
|
|
@@ -163,7 +167,7 @@ def transliterate(src, lang, r2s=False, capitalize=False):
|
|
|
|
|
|
|
|
|
|
|
|
- if ctx.src_tk[0] > src[ctx.cur]:
|
|
|
+ if ctx.src_tk[0] > cur_char:
|
|
|
logger.debug(
|
|
|
f"{ctx.src_tk} is after {src[ctx.cur:ctx.cur + step]}."
|
|
|
" Breaking loop.")
|
|
@@ -205,9 +209,9 @@ def transliterate(src, lang, r2s=False, capitalize=False):
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
- f"Token {src[ctx.cur]} (\\u{hex(ord(src[ctx.cur]))[2:]}) "
|
|
|
+ f"Token {cur_char} (\\u{hex(ord(cur_char))[2:]}) "
|
|
|
f"at position {ctx.cur} is not mapped.")
|
|
|
- ctx.dest_ls.append(src[ctx.cur])
|
|
|
+ ctx.dest_ls.append(cur_char)
|
|
|
ctx.cur += 1
|
|
|
else:
|
|
|
delattr(ctx, "match")
|