adlam_hooks.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import logging
  2. import re
  3. from scriptshifter.exceptions import CONT
  4. __doc__ = """ Process contextual substitutions for prenasalization. """
  5. logger = logging.getLogger(__name__)
  6. def s2r_post(ctx):
  7. # clean up word initial pre-nasalization
  8. regex1 = r"\b([Nn])([Nn])([dgjDGJ])"
  9. subst1 = r"\g<1>\g<3>"
  10. ctx.dest = re.sub(regex1, subst1, ctx.dest, 0)
  11. regex2 = r"\b([Mm])([Mm])([bB])"
  12. subst2 = r"\g<1>\g<3>"
  13. ctx.dest = re.sub(regex2, subst2, ctx.dest, 0)
  14. regex3 = r"\b(N)(b)"
  15. subst3 = r"M\g<2>"
  16. # clean up nested lower case when source text is all caps
  17. ctx.dest = re.sub(regex3, subst3, ctx.dest, 0)
  18. regex4 = r"([ABƁCDƊEFGHIJKLMNŊÑOPQRSTUVWYƳZ])([abɓcdɗefghijklmnŋñopqrstuvwyƴz][bhp]?)([ABƁCDƊEFGHIJKLMNŊÑOPQRSTUVWYƳZ])"
  19. nested_lower = re.search(regex4, ctx.dest)
  20. if nested_lower:
  21. ctx.dest = re.sub(regex4, nested_lower.string.upper(), ctx.dest, 0)
  22. return(None)
  23. def r2s_post(ctx):
  24. # clean up word initial pre-nasalization
  25. regex1 = r"\b([𞤲𞤐])𞥋([𞤄𞤁𞤘𞤔𞤦𞤣𞤺𞤶])"
  26. subst1 = r"\g<1>\g<2>"
  27. ctx.dest = re.sub(regex1, subst1, ctx.dest, 0)
  28. regex2 = r"\b([𞤃])([𞤦])"
  29. subst2 = r"𞤐\g<2>"
  30. ctx.dest = re.sub(regex2, subst2, ctx.dest, 0)
  31. return(None)