scossu 1 سال پیش
والد
کامیت
4acae2c2d1
2فایلهای تغییر یافته به همراه15 افزوده شده و 25 حذف شده
  1. 1 1
      scriptshifter/hooks/korean/FKR_index.csv
  2. 14 24
      scriptshifter/hooks/korean/romanizer.py

+ 1 - 1
scriptshifter/hooks/korean/FKR_index.csv

@@ -109,7 +109,7 @@
 ,FKR108,예외 자음군 표기 'ㄴㄱ'= n'g,Exception for 'ㄴㄱ'= n'g
 ,FKR109,나머지 변환,Conver everything else
 ,FKR110,기호 변환,Convert symbols
-,FKR111,"ㄹ + 모음/ㅎ/ㄹ, [""lr"",""ll""] 는 반드시 맨 끝에 둘 것","ㄹ + 모음/ㅎ/ㄹ, [""lr"",""ll""] 는 반드시 맨 끝에 둘 것"
+,FKR111,"ㄹ + 모음/ㅎ/ㄹ, [""lr"",""ll""] 는 반드시 맨 끝에 둘 것","ㄹ + 모음/ㅎ/ㄹ, [""lr"",""ll""] must be in the last of the array"
 ,FKR112,두음법칙 예외,Exceptions to initial sound law
 ,FKR113,첫 글자로 외래어 판정,Check loan words by the first 1 letter
 ,FKR114,두 글자로 외래어 판정,Check loan words by the first 2 letters

+ 14 - 24
scriptshifter/hooks/korean/romanizer.py

@@ -51,7 +51,7 @@ def s2r_names_post_config(ctx):
     return BREAK
 
 
-def _romanize_nonames(src, hancha=False):
+def _romanize_nonames(src, capitalize=False, hancha=False):
     # FKR038
     if hancha:
         src = _hancha2hangul(_marc8_hancha(src))
@@ -72,20 +72,17 @@ def _romanize_nonames(src, hancha=False):
     data = _romanize_oclc_auto(data)
 
     # FKR042
-    if capitalize = "all":
+    if capitalize == "all":
         data = data.title()
     # FKR043
-    elif capitalize = "first":
+    elif capitalize == "first":
         data = data.capitalize()
 
     # FKR044
     ambi = re.sub("[,.\";: ]+", " ", data)
 
-    # TODO See https://github.com/lcnetdev/scriptshifter/issues/20
-    no_oclc_breve = False
-
-    if no_oclc_breve:
-        data = _replace_map(data, {"ŏ": "ŏ", "ŭ": "ŭ", "Ŏ": "Ŏ", "Ŭ": "Ŭ"})
+    # @TODO Move this to a generic normalization step (not only for K)
+    data = _replace_map(data, {"ŏ": "ŏ", "ŭ": "ŭ", "Ŏ": "Ŏ", "Ŭ": "Ŭ"})
 
     # TODO Decide what to do with these. There is no facility for outputting
     # warnings or notes to the user yet.
@@ -94,12 +91,11 @@ def _romanize_nonames(src, hancha=False):
         if exp in ambi:
             warnings.append(ambi if warn == "" else warn)
 
-
     return data, warnings
 
 
 def _romanize_names(src):
-    return "Nothing Here Yet."
+    return "Nothing Here Yet.", {}
 
 
 def _romanize_oclc_auto(data):
@@ -108,13 +104,8 @@ def _romanize_oclc_auto(data):
         logger.debug(f"Applying fkr050[{rname}]")
         data = _replace_map(data, rule)
 
-    # NOTE: Is this memant to replace " 제" followed by a digit with " 제 "?
-    # This may not yield the expected results as it could replace all
-    # occurrences of " 제" as long as there is a match somewhere in the text.
-    if re.match(" 제[0-9]", data):
-        data = data.replace(" 제", " 제 ")
-    # NOTE: Maybe this was meant:
-    # data = re.sub(" 제([0-9])", "제 \\1", data):
+    # See https://github.com/lcnetdev/scriptshifter/issues/19
+    data = re.sub("제([0-9])", "제 \\1", data)
 
     # FKR052
     for rname, rule in KCONF["fkr052"].items():
@@ -216,7 +207,7 @@ def _kor_rom(data):
         rom = _replace_map(rom, KCONF[f"fkr{fkr_i:03}"])
 
     # FKR109
-    for pos, data in KCONF["fkr109"]
+    for pos, data in KCONF["fkr109"]:
         logger.debug(f"Applying FKR109[{pos}]")
         rom = _replace_map(rom, data)
 
@@ -251,13 +242,12 @@ def _kor_rom(data):
             orig in KCONF["fkr118"] or
             # FKR119
             orig in KCONF["fkr119"] or
-            orig.endswith(tuple(KCONF["fkr119_suffix"])):
+            orig.endswith(tuple(KCONF["fkr119_suffix"])) or
             # FKR120
-            orig.endswith(tuple(KCONF["fkr120"])):
+            orig.endswith(tuple(KCONF["fkr120"]))):
         rom = rom.capitalize()
 
     # FKR121
-    # TODO Check global $ConvertR2L assigned in L17 and tested in L1849.
     if f" {orig} " in KCONF["fkr121"]:
         if rom.startswith("r"):
             rom = "l" + rom[1:]
@@ -308,9 +298,9 @@ def _hancha2hangul(data):
     return re.sub("\\W{2,}", " ", data.strip())
 
 
-def _replace_map(src, rmap):
+def _replace_map(src, rmap, *args, **kw):
     """ Replace occurrences in a string according to a map. """
-    for k, v in rmap:
-        src = src.replace(k, v)
+    for k, v in rmap.items():
+        src = src.replace(k, v, *args, **kw)
 
     return src