Browse Source

Fix #31; rename variables.

scossu 1 year ago
parent
commit
ef7d9b8346
1 changed files with 23 additions and 20 deletions
  1. 23 20
      scriptshifter/hooks/korean/romanizer.py

+ 23 - 20
scriptshifter/hooks/korean/romanizer.py

@@ -216,39 +216,40 @@ def _kor_corp_name_rom(src):
     return rom
 
 
-def _romanize_oclc_auto(data):
+def _romanize_oclc_auto(kor):
     # FKR050: Starts preprocessing symbol
     for rname, rule in KCONF["fkr050"].items():
         logger.debug(f"Applying fkr050[{rname}]")
-        data = _replace_map(data, rule)
+        kor = _replace_map(kor, rule)
 
     # See https://github.com/lcnetdev/scriptshifter/issues/19
-    data = re.sub("제([0-9])", "제 \\1", data)
+    kor = re.sub("제([0-9])", "제 \\1", kor)
 
     # FKR052: Replace Che+number
     for rname, rule in KCONF["fkr052"].items():
         logger.debug(f"Applying fkr052[{rname}]")
-        data = _replace_map(data, rule)
+        kor = _replace_map(kor, rule)
 
     # Strip end and multiple whitespace.
-    data = re.sub(r"\s{2,}", " ", data.strip())
+    kor = re.sub(r"\s{2,}", " ", kor.strip())
 
-    data = data.replace("^", " GLOTTAL ")
+    kor = kor.replace("^", " GLOTTAL ")
 
-    data_ls = []
-    for word in data.split(" "):
-        data_ls.append(_kor_rom(word))
-    data = " ".join(data_ls)
+    rom_ls = []
+    # breakpoint()
+    for word in kor.split(" "):
+        rom_ls.append(_kor_rom(word))
+    rom = " ".join(rom_ls)
 
     # FKR059: Apply glottalization
-    data = _replace_map(
-            f" {data.strip()} ", {" GLOTTAL ": "", "*": "", "^": ""})
+    rom = _replace_map(
+            f" {rom.strip()} ", {" GLOTTAL ": "", "*": "", "^": ""})
 
     # FKR060: Process number + -년/-년도/-년대
     # TODO Add leading whitespace as per L1221? L1202 already added one.
-    data = _replace_map(data, KCONF["fkr060"])
+    rom = _replace_map(rom, KCONF["fkr060"])
 
-    data = re.sub(r"\s{2,}", " ", f" {data.strip()} ")
+    rom = re.sub(r"\s{2,}", " ", f" {rom.strip()} ")
 
     # FKR061: Jurisdiction (시)
     # FKR063: Jurisdiction (국,도,군,구)
@@ -256,16 +257,16 @@ def _romanize_oclc_auto(data):
     # FKR065: Frequent historical names
     for fkrkey in ("fkr061", "fkr063", "fkr064", "fkr065"):
         logger.debug(f"Applying {fkrkey.upper()}")
-        data = _replace_map(data, KCONF[fkrkey])
+        rom = _replace_map(rom, KCONF[fkrkey])
 
     # FKR066: Starts restore symbols
     for rname, rule in KCONF["fkr066"].items():
         logger.debug(f"Applying FKR066[{rname}]")
-        data = _replace_map(data, rule)
+        rom = _replace_map(rom, rule)
 
-    data = re.sub(r"\s{2,}", " ", data.strip())
+    rom = re.sub(r"\s{2,}", " ", rom.strip())
 
-    return data
+    return rom
 
 
 # FKR068: Exceptions, Exceptions to initial sound law, Proper names
@@ -290,7 +291,9 @@ def _kor_rom(kor):
             kor = kor[1:]
 
     rom_ls = []
-    cpoints = tuple(ord(c) for c in kor)
+    if non_kor > 0:
+        # Rebuild code point list with non_kor removed.
+        cpoints = tuple(ord(c) for c in kor)
     for i in range(len(kor)):
         cp = cpoints[i] - CP_MIN
         ini = "i" + str(cp // 588)
@@ -347,7 +350,7 @@ def _kor_rom(kor):
     for k, cmap in KCONF["fkr073-100"].items():
         if k in rom:
             logger.debug(f"Applying FKR{fkr_i:03}")
-            _replace_map(rom, cmap)
+            rom = _replace_map(rom, cmap)
         fkr_i += 1
 
     # FKR101: digraphic coda + ㅇ: ㄵ,ㄶ,ㄺ,ㄻ,ㄼ,ㄽ,ㄾ,ㄿ,ㅀ