3 years ago · 180541aa67
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,4 +1,4 @@
 
				 from os import path
			
 
				 
			
 
				 TEST_DIR = path.dirname(path.realpath(__file__))
			
 
				-TEST_CONFIG_DIR = path.join(TEST_DIR, "data")
			
 
				+TEST_DATA_DIR = path.join(TEST_DIR, "data")
			
--- a/tests/data/transliterator_sample_strings.csv
+++ b/tests/data/transliterator_sample_strings.csv
--- a/tests/test01_cfg.py
+++ b/tests/test01_cfg.py
@@ -3,7 +3,7 @@ from unittest import TestCase
 
				 from importlib import reload
			
 
				 from os import environ
			
 
				 
			
 
				-from tests import TEST_CONFIG_DIR
			
 
				+from tests import TEST_DATA_DIR
			
 
				 import transliterator.tables
			
 
				 
			
 
				 
			
@@ -11,7 +11,7 @@ class TestConfig(TestCase):
 
				     """ Test configuration parsing. """
			
 
				 
			
 
				     def test_ordering(self):
			
 
				-        environ["TXL_CONFIG_TABLE_DIR"] = TEST_CONFIG_DIR
			
 
				+        environ["TXL_CONFIG_TABLE_DIR"] = TEST_DATA_DIR
			
 
				         reload(transliterator.tables)  # Reload new config dir.
			
 
				         from transliterator import tables
			
 
				         tables.list_tables.cache_clear()
			
--- a/tests/test02_transliteration.py
+++ b/tests/test02_transliteration.py
@@ -1,12 +1,19 @@
 
				+import logging
			
 
				+
			
 
				 from unittest import TestCase
			
 
				+from csv import reader
			
 
				 
			
 
				 from importlib import reload
			
 
				-from os import environ
			
 
				+from os import environ, path
			
 
				 
			
 
				+from tests import TEST_DATA_DIR
			
 
				 from transliterator.trans import transliterate
			
 
				 import transliterator.tables
			
 
				 
			
 
				 
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				 class TestScriptToRoman(TestCase):
			
 
				     """
			
 
				     Test S2R transliteration.
			
@@ -21,7 +28,6 @@ class TestScriptToRoman(TestCase):
 
				             # import transliterator.tables
			
 
				 
			
 
				     def test_basic_chinese(self):
			
 
				-        breakpoint()
			
 
				         src = "撞倒須彌 : 漢傳佛教青年學者論壇論文集"
			
 
				         dest = (
			
 
				                 "Zhuang dao Xumi : han zhuan Fo jiao qing nian xue zhe lun "
			
@@ -29,3 +35,34 @@ class TestScriptToRoman(TestCase):
 
				 
			
 
				         trans = transliterate(src, "chinese")
			
 
				         assert trans == dest
			
 
				+
			
 
				+    def test_available_samples(self):
			
 
				+        """
			
 
				+        Test all available samples for the implemented tables.
			
 
				+        """
			
 
				+        for k, script, roman in _test_cases():
			
 
				+            txl = transliterate(script, k)
			
 
				+            if txl != roman:
			
 
				+                warn_str = f"Mismatching transliteration in {k}!"
			
 
				+                logger.warning("*" * len(warn_str))
			
 
				+                logger.warning(warn_str)
			
 
				+                logger.warning("*" * len(warn_str))
			
 
				+                logger.info(f"Transliterated string: {txl}")
			
 
				+                logger.info(f"        Target string: {roman}")
			
 
				+
			
 
				+            # assert txl == roman
			
 
				+
			
 
				+
			
 
				+def _test_cases():
			
 
				+    test_cases = []
			
 
				+    with open(
			
 
				+            path.join(TEST_DATA_DIR, "sample_strings.csv"),
			
 
				+            newline="") as fh:
			
 
				+        csv = reader(fh)
			
 
				+        csv.__next__()  # Discard header row.
			
 
				+        for row in csv:
			
 
				+            if len(row[2]):
			
 
				+                # Table key, script, Roman
			
 
				+                test_cases.append((row[2], row[3], row[4]))
			
 
				+
			
 
				+    return test_cases
			
--- a/transliterator/tables/data/uzbek.yml
+++ b/transliterator/tables/data/uzbek.yml
@@ -2,7 +2,6 @@ general:
 
				   name: uzbek (Cyrillic)

			
 
				   parents:

			
 
				     - _cyrillic_base

			
 
				-    - _ignore_base

			
 
				 

			
 
				 roman_to_script:

			
 
				   map:

			
--- a/transliterator/trans.py
+++ b/transliterator/trans.py
@@ -9,7 +9,7 @@ from transliterator.tables import load_table
 
				 MULTI_WS_RE = re.compile(r"\s{2,}")
			
 
				 # Default characters defining a word boundary. TODO Make this configurable
			
 
				 # per-table.
			
 
				-WORD_BOUNDARY = " \n\t:;.,\"'"
			
 
				+WORD_BOUNDARY = " \n\t:;.,\"'-()[]{}"
			
 
				 
			
 
				 # Cursor bitwise flags.
			
 
				 CUR_BOW = 1