decompose_samples.py 845 B

12345678910111213141516171819202122232425262728293031
  1. #!/usr/bin/env python
  2. __doc__ = """
  3. Usage: decompose_samples.py
  4. Use this script to normalize Roman map keys to use combining characters
  5. (decomposed glyphs) vs. pre-composed glyphs.
  6. The script will create a new CSV file named according to the source.
  7. E.g. `myscript.csv` → `myscript_norm.csv`.
  8. NOTE: the script does not parse the CSV, it scans it as a plain text file. It
  9. is unlikely but possible that some normalization may lead to an invalid CSV.
  10. """
  11. from os import path
  12. from unicodedata import normalize
  13. from glob import glob
  14. for fname in glob("*.csv"):
  15. dest_fname = path.splitext(fname)[0] + "_norm.csv"
  16. with open(fname) as fh:
  17. data = fh.read()
  18. norm_data = normalize("NFD", data)
  19. with open(dest_fname, "w") as fh:
  20. fh.write(norm_data)
  21. print(f"Normalized {fname} to {dest_fname}.")
  22. print("Done.")