__init__.py 1.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. from functools import cache
  2. from glob import glob
  3. from os import path, access, R_OK
  4. from yaml import load
  5. try:
  6. from yaml import CLoader as Loader
  7. except ImportError:
  8. from yaml import Loader
  9. __doc__ = """
  10. Transliteration tables.
  11. These tables contain all transliteration information, grouped by script and
  12. language (or language and script? TBD)
  13. """
  14. TABLE_DIR = path.join(path.dirname(path.realpath(__file__)), "data")
  15. @cache
  16. def load_table(tname):
  17. """
  18. Load one transliteration table.
  19. The table file is parsed into an in-memory configuration that contains
  20. the language & script metadata and parsing rules.
  21. """
  22. fname = path.join(TABLE_DIR, tname + ".yml")
  23. if not access(fname, R_OK):
  24. raise ValueError(f"No transliteration table for {tname}!")
  25. with open(fname) as fh:
  26. tdata = load(fh, Loader=Loader)
  27. # TODO Rearrange parsing tokens alphabetically, but so that the longest
  28. # ones come first. E.g.
  29. # - ABCD
  30. # - AB
  31. # - A
  32. # - BCDE
  33. # - BCD
  34. # - BEFGH
  35. # - B
  36. return tdata