|
@@ -9,7 +9,8 @@ __doc__ = """ Process contextual substitutions for prenasalization. """
|
|
logger = logging.getLogger(__name__)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
-def scrub_nasal(ctx):
|
|
|
|
|
|
+def s2r_post(ctx):
|
|
|
|
+ # clean up word initial pre-nasalization
|
|
regex1 = r"\b([Nn])([Nn])([dgjDGJ])"
|
|
regex1 = r"\b([Nn])([Nn])([dgjDGJ])"
|
|
subst1 = r"\g<1>\g<3>"
|
|
subst1 = r"\g<1>\g<3>"
|
|
ctx.dest = re.sub(regex1, subst1, ctx.dest, 0)
|
|
ctx.dest = re.sub(regex1, subst1, ctx.dest, 0)
|
|
@@ -18,13 +19,16 @@ def scrub_nasal(ctx):
|
|
ctx.dest = re.sub(regex2, subst2, ctx.dest, 0)
|
|
ctx.dest = re.sub(regex2, subst2, ctx.dest, 0)
|
|
regex3 = r"\b(N)(b)"
|
|
regex3 = r"\b(N)(b)"
|
|
subst3 = r"M\g<2>"
|
|
subst3 = r"M\g<2>"
|
|
|
|
+ # clean up nested lower case when source text is all caps
|
|
ctx.dest = re.sub(regex3, subst3, ctx.dest, 0)
|
|
ctx.dest = re.sub(regex3, subst3, ctx.dest, 0)
|
|
regex4 = r"([ABƁCDƊEFGHIJKLMNŊÑOPQRSTUVWYƳZ])([abɓcdɗefghijklmnŋñopqrstuvwyƴz][bhp]?)([ABƁCDƊEFGHIJKLMNŊÑOPQRSTUVWYƳZ])"
|
|
regex4 = r"([ABƁCDƊEFGHIJKLMNŊÑOPQRSTUVWYƳZ])([abɓcdɗefghijklmnŋñopqrstuvwyƴz][bhp]?)([ABƁCDƊEFGHIJKLMNŊÑOPQRSTUVWYƳZ])"
|
|
nested_lower = re.search(regex4, ctx.dest)
|
|
nested_lower = re.search(regex4, ctx.dest)
|
|
- ctx.dest = re.sub(regex4, nested_lower.string.upper(), ctx.dest, 0)
|
|
|
|
|
|
+ if nested_lower:
|
|
|
|
+ ctx.dest = re.sub(regex4, nested_lower.string.upper(), ctx.dest, 0)
|
|
return(None)
|
|
return(None)
|
|
|
|
|
|
-def strip_nyondal(ctx):
|
|
|
|
|
|
+def r2s_post(ctx):
|
|
|
|
+ # clean up word initial pre-nasalization
|
|
regex1 = r"\b([𞤲𞤐])𞥋([𞤄𞤁𞤘𞤔𞤦𞤣𞤺𞤶])"
|
|
regex1 = r"\b([𞤲𞤐])𞥋([𞤄𞤁𞤘𞤔𞤦𞤣𞤺𞤶])"
|
|
subst1 = r"\g<1>\g<2>"
|
|
subst1 = r"\g<1>\g<2>"
|
|
ctx.dest = re.sub(regex1, subst1, ctx.dest, 0)
|
|
ctx.dest = re.sub(regex1, subst1, ctx.dest, 0)
|