瀏覽代碼

Fix typo bug; reorganize FKR073-109; fix tests.

scossu 9 月之前
父節點
當前提交
610df9be63

+ 2 - 2
scriptshifter/hooks/korean/FKR_index.csv

@@ -106,7 +106,7 @@ FKR105,"유성음화3 (ㄹ+ㄷ, ㄹ+ㅈ)","Vocalization 3 (ㄹ+ㄷ, ㄹ+ㅈ)"
 FKR106,말음법칙,Final sound law
 FKR107,예외 자음표기 '쉬' = shi,Exception for '쉬' = shi
 FKR108,예외 자음군 표기 'ㄴㄱ'= n'g,Exception for 'ㄴㄱ'= n'g
-FKR109,나머지 변환,Conver everything else
+FKR109,나머지 변환,Convert everything else
 FKR110,기호 변환,Convert symbols
 FKR111,"ㄹ + 모음/ㅎ/ㄹ, [""lr"",""ll""] 는 반드시 맨 끝에 둘 것","ㄹ + 모음/ㅎ/ㄹ, [""lr"",""ll""] must be in the last of the array"
 FKR112,두음법칙 예외,Exceptions to initial sound law
@@ -139,7 +139,7 @@ FKR138,ISO/TR 11941 END ISO/TR 11941 로마자 규칙,ISO/TR 11941  => No need f
 FKR139,한자음이 여러개일 경우,Chinese characters with multiple readings
 FKR140,樂은 그대로 둘것,Leave 樂 as it is
 FKR141,엑셀에서 복사,Chinese character list
-FKR142,한자 MARC8한자로 통일,Conver to MARC8 characters
+FKR142,한자 MARC8한자로 통일,Convert to MARC8 characters
 FKR143,한자 예외 한자어 변환 먼저,Process exceptions first
 FKR144,"한자 두음법칙 처리 (제외: 列, 烈, 裂, 劣)","Apply initial sound law (Except: 列, 烈, 裂, 劣)"
 FKR145,"한자 약자, 이체자","Simplified characters, variants"

+ 155 - 156
scriptshifter/hooks/korean/data.yml

@@ -3631,167 +3631,166 @@ fkr069:
   "윗옷": "위돗"
   "첫애": "처대"
 
-fkr073-100:
-  "f7~":  # FKR073
+fkr073:
     "f7~i11#m20": "f0~i12#m20"
     "f7~i11#m6": "f0~i12#m6"
     "f7~i18#m20": "f0~i14#m20"
     "f7~i18#m6": "f0~i14#m6"
 
-  "f25~":  #FKR074
-    "f25~i11#m20": "f0~i14#m20"
-    "f25~i11#m6": "f0~i14#m6"
-    "f25~i18#m20": "f0~i14#m20"
-    "f25~i18#m6": "f0~i14#m6"
-
-  "f1~":  # FKR075
-    "f1~i2#": "f21~i2#"
-    "f1~i5#": "f21~i2#"
-    "f1~i6#": "f21~i6#"
-
-  "f2~":  # FKR076
-    "f2~i2#": "f21~i2#"
-    "f2~i5#": "f21~i2#"
-    "f2~i6#": "f21~i6#"
-    "f2~i11#": "f0~i1#"
-
-  "f3~":  # FKR077
-    "f3~i0#": "k~k"
-    "f3~i2#": "ng~n"
-    "f3~i5#": "ng~n"
-    "f3~i6#": "ng~m"
-    "f3~i11#": "k~s"
-
-  "f4~":  # FKR078
-    "f4~i5#": "f8~i5#"
-
-  "f5~":  # FKR079
-    "f5~i0#": "n~k"
-    "f5~i2#": "n~n"
-    "f5~i3#": "n~t"
-    "f5~i12#": "n~ch"
-
-  "f6~":  # FKR080
-    "f6~i0#": "f4~i15#"
-    "f6~i2#": "f4~i2#"
-    "f6~i3#": "f4~i16#"
-    "f6~i12#": "f4~i14#"
-
-  "f7~":  # FKR081
-    "f7~i2#": "f4~i2#"
-    "f7~i5#": "f4~i2#"
-    "f7~i6#": "f4~i6#"
-
-  "f8~":  # FKR082
-    "f8~i2#": "f8~i5#"
-
-  "f9~":  # FKR083
-    "f9~i0#": "l~k"
-    "f9~i2#": "ng~n"
-    "f9~i3#": "k~t"
-    "f9~i12#": "k~ch"
-
-  "f10~":  # FKR084
-    "f10~i0#": "m~k"
-    "f10~i2#": "m~n"
-    "f10~i3#": "m~t"
-    "f10~i12#": "m~ch"
-
-  "f11~":  # FKR085
-    "f11~i0#": "l~k"
-    "f11~i2#": "m~n"
-    "f11~i3#": "l~t"
-    "f11~i12#": "l~ch"
-
-  "f13~":  # FKR086
-    "f13~i0#": "l~k"
-    "f13~i2#": "l~l"
-    "f13~i3#": "l~t"
-    "f13~i12#": "l~ch"
-
-  "f14~":  # FKR087
-    "f14~i0#": "p~k"
-    "f14~i2#": "m~n"
-    "f14~i3#": "p~t"
-    "f14~i12#": "p~ch"
-
-  "f15~":  # FKR088
-    "f15~i0#": "f8~i15#"
-    "f15~i2#": "f8~i5#"
-    "f15~i3#": "f8~i16#"
-    "f15~i12#": "f8~i14#"
-
-  "f16~":  # FKR089
-     "f16~i5#": "f16~i2#"
-
-  "f17~":  # FKR090
-    "f17~i2#": "f16~i2#"
-    "f17~i5#": "f16~i2#"
-    "f17~i6#": "f16~i6#"
-
-  "f18~":  # FKR091
-    "f18~i0#": "f17~i0#"
-    "f18~i2#": "f16~i2#"
-    "f18~i3#": "f17~i3#"
-    "f18~i5#": "f16~i2#"
-    "f18~i6#": "f16~i6#"
-    "f18~i9#": "f17~i9#"
-    "f18~i11#": "f17~i9#"
-    "f18~i12#": "f17~i12#"
-
-  "f19~":  # FKR092
-    "f19~i2#": "f4~i2#"
-    "f19~i5#": "f4~i2#"
-    "f19~i6#": "f4~i6#"
-    "f19~i11#": "f0~i9#"
-
-  "f20~":  # FKR093
-    "f20~i2#": "f4~i2#"
-    "f20~i5#": "f4~i2#"
-    "f20~i6#": "f4~i6#"
-    "f20~i11#": "f0~i10#"
-
-  "f21~":  # FKR094
-    "f21~i5#": "f21~i2#"
-
-  "f22~":  # FKR095
-    "f22~i2#": "f4~i2#"
-    "f22~i5#": "f4~i2#"
-    "f22~i6#": "f4~i6#"
-    "f22~i11#": "f0~i12#"
-    "f22~i18#": "f0~i14#"
-
-  "f23~":  # FKR096
-    "f23~i2#": "f4~i2#"
-    "f23~i5#": "f4~i2#"
-    "f23~i6#": "f4~i6#"
-    "f23~i11#": "f0~i14#"
-    "f23~i18#": "f0~i14#"
-
-  "f24~":  # FKR097
-    "f24~i2#": "f21~i2#"
-    "f24~i6#": "f21~i6#"
-    "f24~i11#": "f0~i15#"
-
-  "f25~":  # FKR098
-    "f25~i2#": "f4~i2#"
-    "f25~i6#": "f4~i6#"
-    "f25~i11#": "f0~i16#"
-
-  "f26~":  # FKR099
-    "f26~i2#": "f16~i2#"
-    "f26~i6#": "f16~i6#"
-    "f26~i11#": "f0~i17#"
-
-  "f27~":  # FKR100
-    "f27~i0#": "f0~i15#"
-    "f27~i2#": "f4~i2#"
-    "f27~i3#": "f0~i16#"
-    "f27~i5#": "f4~i2#"
-    "f27~i6#": "f4~i6#"
-    "f27~i7#": "f0~i17#"
-    "f27~i11#": "f0~i11#"
-    "f27~i12#": "f0~i14#"
+fkr074:
+  "f25~i11#m20": "f0~i14#m20"
+  "f25~i11#m6": "f0~i14#m6"
+  "f25~i18#m20": "f0~i14#m20"
+  "f25~i18#m6": "f0~i14#m6"
+
+fkr075:
+  "f1~i2#": "f21~i2#"
+  "f1~i5#": "f21~i2#"
+  "f1~i6#": "f21~i6#"
+
+fkr076:
+  "f2~i2#": "f21~i2#"
+  "f2~i5#": "f21~i2#"
+  "f2~i6#": "f21~i6#"
+  "f2~i11#": "f0~i1#"
+
+fkr077:
+  "f3~i0#": "k~k"
+  "f3~i2#": "ng~n"
+  "f3~i5#": "ng~n"
+  "f3~i6#": "ng~m"
+  "f3~i11#": "k~s"
+
+fkr078:
+  "f4~i5#": "f8~i5#"
+
+fkr079:
+  "f5~i0#": "n~k"
+  "f5~i2#": "n~n"
+  "f5~i3#": "n~t"
+  "f5~i12#": "n~ch"
+
+fkr080:
+  "f6~i0#": "f4~i15#"
+  "f6~i2#": "f4~i2#"
+  "f6~i3#": "f4~i16#"
+  "f6~i12#": "f4~i14#"
+
+fkr081:
+  "f7~i2#": "f4~i2#"
+  "f7~i5#": "f4~i2#"
+  "f7~i6#": "f4~i6#"
+
+fkr082:
+  "f8~i2#": "f8~i5#"
+
+fkr083:
+  "f9~i0#": "l~k"
+  "f9~i2#": "ng~n"
+  "f9~i3#": "k~t"
+  "f9~i12#": "k~ch"
+
+fkr084:
+  "f10~i0#": "m~k"
+  "f10~i2#": "m~n"
+  "f10~i3#": "m~t"
+  "f10~i12#": "m~ch"
+
+fkr085:
+  "f11~i0#": "l~k"
+  "f11~i2#": "m~n"
+  "f11~i3#": "l~t"
+  "f11~i12#": "l~ch"
+
+fkr086:
+  "f13~i0#": "l~k"
+  "f13~i2#": "l~l"
+  "f13~i3#": "l~t"
+  "f13~i12#": "l~ch"
+
+fkr087:
+  "f14~i0#": "p~k"
+  "f14~i2#": "m~n"
+  "f14~i3#": "p~t"
+  "f14~i12#": "p~ch"
+
+fkr088:
+  "f15~i0#": "f8~i15#"
+  "f15~i2#": "f8~i5#"
+  "f15~i3#": "f8~i16#"
+  "f15~i12#": "f8~i14#"
+
+fkr089:
+   "f16~i5#": "f16~i2#"
+
+fkr090:
+  "f17~i2#": "f16~i2#"
+  "f17~i5#": "f16~i2#"
+  "f17~i6#": "f16~i6#"
+
+fkr091:
+  "f18~i0#": "f17~i0#"
+  "f18~i2#": "f16~i2#"
+  "f18~i3#": "f17~i3#"
+  "f18~i5#": "f16~i2#"
+  "f18~i6#": "f16~i6#"
+  "f18~i9#": "f17~i9#"
+  "f18~i11#": "f17~i9#"
+  "f18~i12#": "f17~i12#"
+
+fkr092:
+  "f19~i2#": "f4~i2#"
+  "f19~i5#": "f4~i2#"
+  "f19~i6#": "f4~i6#"
+  "f19~i11#": "f0~i9#"
+
+fkr093:
+  "f20~i2#": "f4~i2#"
+  "f20~i5#": "f4~i2#"
+  "f20~i6#": "f4~i6#"
+  "f20~i11#": "f0~i10#"
+
+fkr094:
+  "f21~i5#": "f21~i2#"
+
+fkr095:
+  "f22~i2#": "f4~i2#"
+  "f22~i5#": "f4~i2#"
+  "f22~i6#": "f4~i6#"
+  "f22~i11#": "f0~i12#"
+  "f22~i18#": "f0~i14#"
+
+fkr096:
+  "f23~i2#": "f4~i2#"
+  "f23~i5#": "f4~i2#"
+  "f23~i6#": "f4~i6#"
+  "f23~i11#": "f0~i14#"
+  "f23~i18#": "f0~i14#"
+
+fkr097:
+  "f24~i2#": "f21~i2#"
+  "f24~i6#": "f21~i6#"
+  "f24~i11#": "f0~i15#"
+
+fkr098:
+  "f25~i2#": "f4~i2#"
+  "f25~i6#": "f4~i6#"
+  "f25~i11#": "f0~i16#"
+
+fkr099:
+  "f26~i2#": "f16~i2#"
+  "f26~i6#": "f16~i6#"
+  "f26~i11#": "f0~i17#"
+
+fkr100:
+  "f27~i0#": "f0~i15#"
+  "f27~i2#": "f4~i2#"
+  "f27~i3#": "f0~i16#"
+  "f27~i5#": "f4~i2#"
+  "f27~i6#": "f4~i6#"
+  "f27~i7#": "f0~i17#"
+  "f27~i11#": "f0~i11#"
+  "f27~i12#": "f0~i14#"
 
 fkr101:
   "f5~i11#": "f4~i12#"

+ 4 - 9
scriptshifter/hooks/korean/romanizer.py

@@ -275,6 +275,8 @@ def _romanize_oclc_auto(kor):
 
     kor = kor.replace("^", " GLOTTAL ")
 
+    logger.debug(f"Korean before romanization: {kor}")
+
     rom_ls = []
     for word in kor.split(" "):
         rom_ls.append(_kor_rom(word))
@@ -355,7 +357,7 @@ def _kor_rom(kor):
             niun_loc = rom.find("~", niun_loc + 1)
         rom_niun_a = rom[:niun_loc]
         rom_niun_b = rom[niun_loc + 1:]
-        if re.match("ill#m(?:2|6|12|17|20)", rom_niun_b):
+        if re.match("i11#m(?:2|6|12|17|20)", rom_niun_b):
             _fkr_log(71)
             rom_niun_b = rom_niun_b.replace("i11#m", "i2#m", 1)
 
@@ -394,13 +396,6 @@ def _kor_rom(kor):
     # FKR098: Consonant assimilation ㅌ
     # FKR099: Consonant assimilation ㅍ
     # FKR100: Consonant assimilation ㅎ
-    fkr_i = 73
-    for k, cmap in KCONF["fkr073-100"].items():
-        if k in rom:
-            _fkr_log(fkr_i)
-            rom = _replace_map(rom, cmap)
-        fkr_i += 1
-
     # FKR101: digraphic coda + ㅇ: ㄵ,ㄶ,ㄺ,ㄻ,ㄼ,ㄽ,ㄾ,ㄿ,ㅀ
     # FKR102: digraphic coda + ㅎ: ㄵ,ㄶ,ㄺ,ㄻ,ㄼ,(ㄽ),ㄾ,ㄿ,ㅀ
     # FKR103: Vocalization 1 (except ㄹ+ㄷ, ㄹ+ㅈ 제외) voiced + unvoiced
@@ -409,7 +404,7 @@ def _kor_rom(kor):
     # FKR106: Final sound law
     # FKR107: Exception for '쉬' = shi
     # FKR108: Exception for 'ㄴㄱ'= n'g
-    for fkr_i in range(101, 109):
+    for fkr_i in range(73, 109):
         _fkr_log(fkr_i)
         _bk = rom
         rom = _replace_map(rom, KCONF[f"fkr{fkr_i:03}"])

+ 3 - 2
tests/test02_transliteration.py

@@ -34,7 +34,7 @@ class TestTrans(TestCase):
         """
         config = scriptshifter.tables.load_table(self.tbl)
         if "script_to_roman" in config:
-            txl = transliterate(self.script, self.tbl)[0]
+            txl = transliterate(self.script, self.tbl, capitalize="first")[0]
             self.assertEqual(
                     txl, self.roman,
                     f"S2R transliteration error for {self.tbl}!\n"
@@ -49,7 +49,8 @@ class TestTrans(TestCase):
         """
         config = scriptshifter.tables.load_table(self.tbl)
         if "roman_to_script" in config:
-            txl = transliterate(self.roman, self.tbl, r2s=True)[0]
+            txl = transliterate(
+                    self.roman, self.tbl, r2s=True, capitalize="first")[0]
             self.assertEqual(
                     txl, self.script,
                     f"R2S transliteration error for {self.tbl}!\n"