test02_transliteration.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. import logging
  2. from unittest import TestCase
  3. from csv import reader
  4. from importlib import reload
  5. from os import environ, path
  6. from tests import TEST_DATA_DIR
  7. from transliterator.trans import transliterate
  8. import transliterator.tables
  9. logger = logging.getLogger(__name__)
  10. class TestScriptToRoman(TestCase):
  11. """
  12. Test S2R transliteration.
  13. TODO use a comprehensive sample table and report errors for unsupported
  14. languages.
  15. """
  16. def setUp(self):
  17. if "TXL_CONFIG_TABLE_DIR" in environ:
  18. del environ["TXL_CONFIG_TABLE_DIR"]
  19. reload(transliterator.tables)
  20. # import transliterator.tables
  21. def test_basic_chinese(self):
  22. src = "撞倒須彌 : 漢傳佛教青年學者論壇論文集"
  23. dest = (
  24. "Zhuang dao Xumi : han zhuan Fo jiao qing nian xue zhe lun "
  25. "tan lun wen ji")
  26. trans = transliterate(src, "chinese")
  27. assert trans == dest
  28. def test_available_samples(self):
  29. """
  30. Test all available samples for the implemented tables.
  31. """
  32. for k, script, roman in _test_cases():
  33. txl = transliterate(script, k)
  34. if txl != roman:
  35. warn_str = f"Mismatching transliteration in {k}!"
  36. logger.warning("*" * len(warn_str))
  37. logger.warning(warn_str)
  38. logger.warning("*" * len(warn_str))
  39. logger.info(f"Transliterated string: {txl}")
  40. logger.info(f" Target string: {roman}")
  41. # assert txl == roman
  42. def _test_cases():
  43. test_cases = []
  44. with open(
  45. path.join(TEST_DATA_DIR, "sample_strings.csv"),
  46. newline="") as fh:
  47. csv = reader(fh)
  48. csv.__next__() # Discard header row.
  49. for row in csv:
  50. if len(row[2]):
  51. # Table key, script, Roman
  52. test_cases.append((row[2], row[3], row[4]))
  53. return test_cases