|
@@ -120,7 +120,18 @@ def _romanize_names(src):
|
|
|
|
|
|
src = _hancha2hangul(_marc8_hancha(src))
|
|
|
|
|
|
- src, warnings = _parse_kor_name(re.sub("\\W{2,}", " ", src.strip()))
|
|
|
+ if re.find("[a-zA-Z0-9]", src):
|
|
|
+ warnings.append(f"{src} may not be a personal name.")
|
|
|
+ return None, warnings
|
|
|
+
|
|
|
+ src, _warnings = _parse_kor_name(re.sub("\\W{2,}", " ", src.strip()))
|
|
|
+
|
|
|
+ if len(_warnings):
|
|
|
+ warnings += _warnings
|
|
|
+
|
|
|
+ if "~" in src:
|
|
|
+ lname, fname = src.split("~", 1)
|
|
|
+ fname_rom = _kor_fname_rom(fname)
|
|
|
|
|
|
return rom, warnings
|
|
|
|
|
@@ -401,3 +412,108 @@ def _replace_map(src, rmap, *args, **kw):
|
|
|
src = src.replace(k, v, *args, **kw)
|
|
|
|
|
|
return src
|
|
|
+
|
|
|
+
|
|
|
+def _kor_fname_rom(fname):
|
|
|
+ rom_ls = []
|
|
|
+ cpoints = tuple(ord(c) for c in fname)
|
|
|
+ for i in range(len(fname)):
|
|
|
+ cp = cpoints[i] - CP_MIN
|
|
|
+ ini = "i" + str(cp // 588)
|
|
|
+ med = "m" + str((cp // 28) % 21)
|
|
|
+ fin = "f" + str(cp % 28)
|
|
|
+ rom_ls.append("#".join((ini, med, fin)))
|
|
|
+ rom = "~".join(rom_ls) + "E"
|
|
|
+
|
|
|
+
|
|
|
+ origin_by_fin = "sino"
|
|
|
+ for tok in KCONF["fkr011"]["nat_fin"]:
|
|
|
+ if tok in rom:
|
|
|
+ origin_by_fin = "native"
|
|
|
+ break
|
|
|
+
|
|
|
+ j = False
|
|
|
+ for tok in KCONF["fkr011"]["nat_ini"]:
|
|
|
+ if tok in rom:
|
|
|
+ j = True
|
|
|
+
|
|
|
+ k = False
|
|
|
+ for tok in KCONF["fkr011"]["dino_ini"]:
|
|
|
+ if tok in rom:
|
|
|
+ k = True
|
|
|
+
|
|
|
+ if j:
|
|
|
+ if k:
|
|
|
+ origin_by_ini = "sino"
|
|
|
+ else:
|
|
|
+ origin_by_ini = "native"
|
|
|
+ else:
|
|
|
+ origin_by_ini = "sino"
|
|
|
+
|
|
|
+
|
|
|
+ origin_by_med = "sino"
|
|
|
+ for tok in KCONF["fkr011"]:
|
|
|
+ if tok in rom:
|
|
|
+ origin_by_med = "native"
|
|
|
+ break
|
|
|
+
|
|
|
+
|
|
|
+ if "m19#" in rom:
|
|
|
+ if "의" in fname or "희" in fname:
|
|
|
+ origin_by_med = "sino"
|
|
|
+ else:
|
|
|
+ origin_by_med = "native"
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ for i in range(14, 30):
|
|
|
+ fkrkey = f"fkr{i:03}"
|
|
|
+ logger.debug(f"Applying {fkrkey.upper()}")
|
|
|
+ rom = _replace_map(rom, KCONF[fkrkey])
|
|
|
+
|
|
|
+
|
|
|
+ for k, cmap in KCONF["fkr030"].items():
|
|
|
+ logger.debug(f"Applying FKR030[\"{k}\"]")
|
|
|
+ rom = _replace_map(cmap)
|
|
|
+
|
|
|
+ rom = _replace_map(rom.replace("#", ""), {"swi": "shwi", "Swi": "Shwi"}, 1)
|
|
|
+
|
|
|
+ if len(fname) == 2:
|
|
|
+ rom = rom.replace("~", "-")
|
|
|
+ else:
|
|
|
+ rom = _replace_map(rom, {"n~g": "n'g", "~": ""})
|
|
|
+
|
|
|
+
|
|
|
+ for k, cmap in KCONF["fkr031"].items():
|
|
|
+ logger.debug(f"Applying FKR031[\"{k}\"]")
|
|
|
+ rom = _replace_map(cmap)
|
|
|
+
|
|
|
+
|
|
|
+ rom = rom.capitalize()
|
|
|
+
|
|
|
+
|
|
|
+ if (
|
|
|
+ len(fname) == 2
|
|
|
+ and "native" in (origin_by_ini, origin_by_fin, origin_by_med)):
|
|
|
+ rom = _replace_map(rom, {"n-g": "n'g", "-": ""})
|
|
|
+
|
|
|
+
|
|
|
+ for k, v in KCONF["fkr034"].items():
|
|
|
+ if rom.startswith(k):
|
|
|
+ rom = rom.replace(k, v)
|
|
|
+
|
|
|
+ return rom
|