12345678910111213141516171819202122232425262728293031 |
- #!/usr/bin/env python
- __doc__ = """
- Usage: decompose_samples.py
- Use this script to normalize Roman map keys to use combining characters
- (decomposed glyphs) vs. pre-composed glyphs.
- The script will create a new CSV file named according to the source.
- E.g. `myscript.csv` → `myscript_norm.csv`.
- NOTE: the script does not parse the CSV, it scans it as a plain text file. It
- is unlikely but possible that some normalization may lead to an invalid CSV.
- """
- from os import path
- from unicodedata import normalize
- from glob import glob
- for fname in glob("*.csv"):
- dest_fname = path.splitext(fname)[0] + "_norm.csv"
- with open(fname) as fh:
- data = fh.read()
- norm_data = normalize("NFD", data)
- with open(dest_fname, "w") as fh:
- fh.write(norm_data)
- print(f"Normalized {fname} to {dest_fname}.")
- print("Done.")
|