decompose_tables.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. #!/usr/bin/env python
  2. __doc__ = """
  3. Usage: decompose_tables.py [CONFIG_FILE_PATH]
  4. Use this script to normalize Roman map keys to use combining characters
  5. (decomposed glyphs) vs. pre-composed glyphs.
  6. The script will create a new YAML file named according to the source.
  7. E.g. `myscript.yml` → `myscript_norm.yml`.
  8. NOTE: Check the YAML syntax as issues with indentation have been detected.
  9. Also, the original key order may be displaced, and whitespace and comments may
  10. disappear.
  11. """
  12. from argparse import ArgumentParser
  13. from os import path
  14. from unicodedata import normalize
  15. from yaml import load, dump
  16. try:
  17. from yaml import CLoader as Loader
  18. except ImportError:
  19. from yaml import Loader
  20. parser = ArgumentParser()
  21. parser.add_argument("src_fname")
  22. args = parser.parse_args()
  23. dest_fname = path.splitext(args.src_fname)[0] + "_norm.yml"
  24. with open(args.src_fname) as fh:
  25. data = load(fh, Loader=Loader)
  26. data["roman_to_script"]["map"] = {
  27. normalize("NFD", k): v
  28. for k, v in data["roman_to_script"]["map"].items()}
  29. data["script_to_roman"]["map"] = {
  30. k: normalize("NFD", v)
  31. for k, v in data["script_to_roman"]["map"].items()}
  32. with open(dest_fname, "w") as fh:
  33. dump(data, fh, indent=2)
  34. print("Done.")