Browse Source

Do not apply Korean rules to strings with no CJK

Tom Ventimiglia 4 months ago
parent
commit
c11c3f5818
2 changed files with 7 additions and 0 deletions
  1. 5 0
      scriptshifter/hooks/korean/romanizer.py
  2. 2 0
      test/data/script_samples/korean.csv

+ 5 - 0
scriptshifter/hooks/korean/romanizer.py

@@ -386,6 +386,11 @@ def _romanize_oclc_auto(kor):
 
 # FKR068: Exceptions, Exceptions to initial sound law, Proper names
 def _kor_rom(kor):
+    # Only convert string if it contains CJK (i.e. do not change if already romanized)
+    # \u3000 is the ideographic space, the lowest codepoint in the Unicode CJK range
+    if max(kor) < '\u3000': 
+        return kor
+
     kor = re.sub(r"\s{2,}", " ", kor.strip())
     orig = kor
 

+ 2 - 0
test/data/script_samples/korean.csv

@@ -1758,3 +1758,5 @@
 "korean_nonames","황홀 하고 격정적인 한국 축구 를 위하여","Hwanghol hago kyŏkchŏngjŏgin Han'guk ch'ukku rŭl wihayŏ","s2r","{""capitalize"": ""first""}","From K-Romanizer",
 "korean_nonames","희망 열매 를 가꾼 지역들","Hŭimang yŏlmae rŭl kakkun chiyŏktŭl","s2r","{""capitalize"": ""first""}","From K-Romanizer",
 "korean_nonames","蒙骨 의 高麗· 日本 侵攻 과 韓日 關係","Monggol ŭi Koryŏ, Ilbon ch'imgong kwa Han-Il kwan'gye","s2r","{""capitalize"": ""first""}","From K-Romanizer",
+"korean_nonames","황국 flavor","hwangguk flavor","s2r","","From Tom Ventimiglia"
+"korean_nonames","roasted 황국 ","roasted hwangguk","s2r","","From Tom Ventimiglia"