__init__.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. __doc__ = """
  2. General-purpose hooks.
  3. """
  4. from logging import getLogger
  5. from re import compile
  6. # Match multiple spaces.
  7. MULTI_WS_RE = compile(r"(\s){2,}")
  8. # Punctuation and brackets.
  9. # TODO add angled brackets, opening and closing quotes, etc.
  10. NORM1_RE = compile(r"\s([.,;:\)\]}])")
  11. NORM2_RE = compile(r"([,;\)\]}])(\S)")
  12. NORM3_RE = compile(r"([\(\[\{])\s")
  13. NORM4_RE = compile(r"(\S)([\(\[\{])")
  14. # "Straight" quotes.
  15. # TODO Add single quotes.
  16. NORM5_RE = compile(r"\"\s*([^\"]?)\s*\"")
  17. NORM6_RE = compile(r"(\S)(\"[^\"]?\")")
  18. NORM7_RE = compile(r"(\"[^\"]?\")(\S)")
  19. # Space between symbols.
  20. NORM8_RE = compile(r"([.,;:\(\[\{\)\]}])\s+([.,;:\(\[\{\)\]}])")
  21. logger = getLogger(__name__)
  22. def capitalize_pre_assembly(ctx):
  23. """
  24. Capitalize a not-yet-assembled result list according to user options.
  25. """
  26. ctx.dest_ls = _capitalize(ctx.dest_ls, ctx.options.get("capitalize"))
  27. def capitalize_post_assembly(ctx):
  28. """
  29. Capitalize an already assembled result string according to user options.
  30. """
  31. dest_ls = ctx.dest.split(" ")
  32. dest_ls = _capitalize(dest_ls, ctx.options.get("capitalize"))
  33. ctx.dest = " ".join(dest_ls)
  34. def normalize_spacing_post_assembly(ctx):
  35. """
  36. Remove duplicate and unwanted whitespace around punctuation.
  37. NOTE: This is called by default by transliterate() immediately after the
  38. `post_assembly` hook.
  39. """
  40. # De-duplicate whitespace.
  41. logger.debug(f"Dest pre manipulation: {ctx.dest}")
  42. # Remove white space between punctuation signs.
  43. norm = MULTI_WS_RE.sub(r"\1", ctx.dest.strip())
  44. # Remove space before punctuation and closing brackets.
  45. norm = NORM1_RE.sub(r"\1", norm)
  46. # Ensure space after punctuation and closing brackets.
  47. norm = NORM2_RE.sub(r"\1 \2", norm)
  48. # Remove space after opening brackets.
  49. norm = NORM3_RE.sub(r"\1", norm)
  50. # Ensure space before opening brackets.
  51. norm = NORM4_RE.sub(r"\1 \2", norm)
  52. # Remove space inside matched quotes.
  53. norm = NORM5_RE.sub(r"\"\1\"", norm)
  54. # Add space before opening double quote.
  55. norm = NORM6_RE.sub(r"\1 \2", norm)
  56. # Add space after closing double quote.
  57. norm = NORM7_RE.sub(r"\1 \2", norm)
  58. # Remove multiple white space characters.
  59. # norm = NORM8_RE.sub(r"\1\2", norm)
  60. ctx.dest = norm
  61. def _capitalize(src, which):
  62. """
  63. capitalize first word only or all words.
  64. NOTE: this function is only used for capitalizing hook-generated
  65. transliterations, which are not normally processed. Double cap rules are
  66. not applicable here.
  67. """
  68. if which == "first":
  69. src[0] = src[0].capitalize()
  70. return src
  71. if which == "all":
  72. return [tk[0].upper() + tk[1:] for tk in src]
  73. return src