瀏覽代碼

WIP convert Korean parsing to structure.

scossu 5 月之前
父節點
當前提交
e570f0ac89
共有 2 個文件被更改,包括 368 次插入362 次删除
  1. 239 239
      scriptshifter/hooks/korean/data.yml
  2. 129 123
      scriptshifter/hooks/korean/romanizer.py

+ 239 - 239
scriptshifter/hooks/korean/data.yml

@@ -2875,245 +2875,245 @@ fkr069:
   "첫애": "처대"
 
 fkr073:
-  "f7~i11#m20": "f0~i12#m20"
-  "f7~i11#m6": "f0~i12#m6"
-  "f7~i18#m20": "f0~i14#m20"
-  "f7~i18#m6": "f0~i14#m6"
-
-fkr074:
-  "f25~i11#m20": "f0~i14#m20"
-  "f25~i11#m6": "f0~i14#m6"
-  "f25~i18#m20": "f0~i14#m20"
-  "f25~i18#m6": "f0~i14#m6"
-
-fkr075:
-  "f1~i2#": "f21~i2#"
-  "f1~i5#": "f21~i2#"
-  "f1~i6#": "f21~i6#"
-
-fkr076:
-  "f2~i2#": "f21~i2#"
-  "f2~i5#": "f21~i2#"
-  "f2~i6#": "f21~i6#"
-  "f2~i11#": "f0~i1#"
-
-fkr077:
-  "f3~i0#": "k~k"
-  "f3~i2#": "ng~n"
-  "f3~i5#": "ng~n"
-  "f3~i6#": "ng~m"
-  "f3~i11#": "k~s"
-
-fkr078:
-  "f4~i5#": "f8~i5#"
-
-fkr079:
-  "f5~i0#": "n~k"
-  "f5~i2#": "n~n"
-  "f5~i3#": "n~t"
-  "f5~i12#": "n~ch"
-
-fkr080:
-  "f6~i0#": "f4~i15#"
-  "f6~i2#": "f4~i2#"
-  "f6~i3#": "f4~i16#"
-  "f6~i12#": "f4~i14#"
-
-fkr081:
-  "f7~i2#": "f4~i2#"
-  "f7~i5#": "f4~i2#"
-  "f7~i6#": "f4~i6#"
-
-fkr082:
-  "f8~i2#": "f8~i5#"
-
-fkr083:
-  "f9~i0#": "l~k"
-  "f9~i2#": "ng~n"
-  "f9~i3#": "k~t"
-  "f9~i12#": "k~ch"
-
-fkr084:
-  "f10~i0#": "m~k"
-  "f10~i2#": "m~n"
-  "f10~i3#": "m~t"
-  "f10~i12#": "m~ch"
-
-fkr085:
-  "f11~i0#": "l~k"
-  "f11~i2#": "m~n"
-  "f11~i3#": "l~t"
-  "f11~i12#": "l~ch"
-
-fkr086:
-  "f13~i0#": "l~k"
-  "f13~i2#": "l~l"
-  "f13~i3#": "l~t"
-  "f13~i12#": "l~ch"
-
-fkr087:
-  "f14~i0#": "p~k"
-  "f14~i2#": "m~n"
-  "f14~i3#": "p~t"
-  "f14~i12#": "p~ch"
-
-fkr088:
-  "f15~i0#": "f8~i15#"
-  "f15~i2#": "f8~i5#"
-  "f15~i3#": "f8~i16#"
-  "f15~i12#": "f8~i14#"
-
-fkr089:
-  "f16~i5#": "f16~i2#"
-
-fkr090:
-  "f17~i2#": "f16~i2#"
-  "f17~i5#": "f16~i2#"
-  "f17~i6#": "f16~i6#"
-
-fkr091:
-  "f18~i0#": "f17~i0#"
-  "f18~i2#": "f16~i2#"
-  "f18~i3#": "f17~i3#"
-  "f18~i5#": "f16~i2#"
-  "f18~i6#": "f16~i6#"
-  "f18~i9#": "f17~i9#"
-  "f18~i11#": "f17~i9#"
-  "f18~i12#": "f17~i12#"
-
-fkr092:
-  "f19~i2#": "f4~i2#"
-  "f19~i5#": "f4~i2#"
-  "f19~i6#": "f4~i6#"
-  "f19~i11#": "f0~i9#"
-
-fkr093:
-  "f20~i2#": "f4~i2#"
-  "f20~i5#": "f4~i2#"
-  "f20~i6#": "f4~i6#"
-  "f20~i11#": "f0~i10#"
-
-fkr094:
-  "f21~i5#": "f21~i2#"
-
-fkr095:
-  "f22~i2#": "f4~i2#"
-  "f22~i5#": "f4~i2#"
-  "f22~i6#": "f4~i6#"
-  "f22~i11#": "f0~i12#"
-  "f22~i18#": "f0~i14#"
-
-fkr096:
-  "f23~i2#": "f4~i2#"
-  "f23~i5#": "f4~i2#"
-  "f23~i6#": "f4~i6#"
-  "f23~i11#": "f0~i14#"
-  "f23~i18#": "f0~i14#"
-
-fkr097:
-  "f24~i2#": "f21~i2#"
-  "f24~i6#": "f21~i6#"
-  "f24~i11#": "f0~i15#"
-
-fkr098:
-  "f25~i2#": "f4~i2#"
-  "f25~i6#": "f4~i6#"
-  "f25~i11#": "f0~i16#"
-
-fkr099:
-  "f26~i2#": "f16~i2#"
-  "f26~i6#": "f16~i6#"
-  "f26~i11#": "f0~i17#"
-
-fkr100:
-  "f27~i0#": "f0~i15#"
-  "f27~i2#": "f4~i2#"
-  "f27~i3#": "f0~i16#"
-  "f27~i5#": "f4~i2#"
-  "f27~i6#": "f4~i6#"
-  "f27~i7#": "f0~i17#"
-  "f27~i11#": "f0~i11#"
-  "f27~i12#": "f0~i14#"
-
-fkr101:
-  "f5~i11#": "f4~i12#"
-  "f6~i11#": "f4~i11#"
-  "f9~i11#": "f8~i0#"
-  "f10~i11#": "f8~i6#"
-  "f11~i11#": "f8~i7#"
-  "f12~i11#": "f8~i9#"
-  "f13~i11#": "f8~i16#"
-  "f14~i11#": "f8~i17#"
-  "f15~i11#": "f8~i11#"
-
-fkr102:
-  "f5~i18#": "f4~i14#"
-  "f6~i18#": "f4~i18#"
-  "f9~i18#": "f8~i15#"
-  "f10~i18#": "f16~i18#"
-  "f11~i18#": "f8~i17#"
-  "f13~i18#": "f8~i16#"
-  "f14~i18#": "f8~i17#"
-  "f15~i18#": "f8~i18#"
-
-fkr103:
-  "f0~i0#": "~g"
-  "f0~i3#": "~d"
-  "f0~i7#": "~b"
-  "f0~i12#": "~j"
-  "f4~i0#": "n~g"
-  "f4~i3#": "n~d"
-  "f4~i7#": "n~b"
-  "f4~i12#": "n~j"
-  "f8~i0#": "l~g"
-  "f8~i7#": "l~b"
-  "f16~i0#": "m~g"
-  "f16~i3#": "m~d"
-  "f16~i7#": "m~b"
-  "f16~i12#": "m~j"
-  "f21~i0#": "ng~g"
-  "f21~i3#": "ng~d"
-  "f21~i7#": "ng~b"
-  "f21~i12#": "ng~j"
-
-fkr104:
-  "f1~i11#": "g~"
-  "f7~i11#": "d~"
-  "f17~i11#": "b~"
-  "f22~i11#": "j~"
-
-fkr105:
-  "f8~i3#": "l~d"
-  "f8~i12#": "l~j"
-
-fkr106:
-  "f1E": "f1"
-  "f2E": "f1"
-  "f3E": "f1"
-  "f4E": "f4"
-  "f5E": "f4"
-  "f6E": "f4"
-  "f7E": "f7"
-  "f8E": "f8"
-  "f9E": "f1"
-  "f10E": "f16"
-  "f11E": "f8"
-  "f12E": "f8"
-  "f13E": "f8"
-  "f14E": "f17"
-  "f15E": "f8"
-  "f16E": "f16"
-  "f17E": "f17"
-  "f18E": "f17"
-  "f19E": "f7"
-  "f20E": "f7"
-  "f21E": "f21"
-  "f22E": "f7"
-  "f23E": "f7"
-  "f24E": "f1"
-  "f25E": "f7"
-  "f26E": "f17"
-  "f27E": "f7"
+  [["f", 7]["i", 11]["m", 20], ["f", 0]["i", 12]["m", 20]]
+  [["f", 7]["i", 11]["m", 6], ["f", 0]["i", 12]["m", 6]]
+  [["f", 7]["i", 18]["m", 20], ["f", 0]["i", 14]["m", 20]]
+  [["f", 7]["i", 18]["m", 6], ["f", 0]["i", 14]["m", 6]]
+
+fkr074,
+  [["f", 25]["i", 11]["m", 20], ["f", 0]["i", 14]["m", 20]]
+  [["f", 25]["i", 11]["m", 6], ["f", 0]["i", 14]["m", 6]]
+  [["f", 25]["i", 18]["m", 20], ["f", 0]["i", 14]["m", 20]]
+  [["f", 25]["i", 18]["m", 6], ["f", 0]["i", 14]["m", 6]]
+
+fkr075,
+  [["f", 1]["i", 2], ["f", 21]["i", 2]]
+  [["f", 1]["i", 5], ["f", 21]["i", 2]]
+  [["f", 1]["i", 6], ["f", 21]["i", 6]]
+
+fkr076,
+  [["f", 2]["i", 2], ["f", 21]["i", 2]]
+  [["f", 2]["i", 5], ["f", 21]["i", 2]]
+  [["f", 2]["i", 6], ["f", 21]["i", 6]]
+  [["f", 2]["i", 11], ["f", 0]["i", 1]]
+
+fkr077,
+  [["f", 3]["i", 0], k~k
+  [["f", 3]["i", 2], ng~n
+  [["f", 3]["i", 5], ng~n
+  [["f", 3]["i", 6], ng~m
+  [["f", 3]["i", 11], k~s
+
+fkr078,
+  [["f", 4]["i", 5], ["f", 8]["i", 5]]
+
+fkr079,
+  [["f", 5]["i", 0], n~k
+  [["f", 5]["i", 2], n~n
+  [["f", 5]["i", 3], n~t
+  [["f", 5]["i", 12], n~ch
+
+fkr080,
+  [["f", 6]["i", 0], ["f", 4]["i", 15]]
+  [["f", 6]["i", 2], ["f", 4]["i", 2]]
+  [["f", 6]["i", 3], ["f", 4]["i", 16]]
+  [["f", 6]["i", 12], ["f", 4]["i", 14]]
+
+fkr081,
+  [["f", 7]["i", 2], ["f", 4]["i", 2]]
+  [["f", 7]["i", 5], ["f", 4]["i", 2]]
+  [["f", 7]["i", 6], ["f", 4]["i", 6]]
+
+fkr082,
+  [["f", 8]["i", 2], ["f", 8]["i", 5]]
+
+fkr083,
+  [["f", 9]["i", 0], l~k
+  [["f", 9]["i", 2], ng~n
+  [["f", 9]["i", 3], k~t
+  [["f", 9]["i", 12], k~ch
+
+fkr084,
+  [["f", 10]["i", 0], m~k
+  [["f", 10]["i", 2], m~n
+  [["f", 10]["i", 3], m~t
+  [["f", 10]["i", 12], m~ch
+
+fkr085,
+  [["f", 11]["i", 0], l~k
+  [["f", 11]["i", 2], m~n
+  [["f", 11]["i", 3], l~t
+  [["f", 11]["i", 12], l~ch
+
+fkr086,
+  [["f", 13]["i", 0], l~k
+  [["f", 13]["i", 2], l~l
+  [["f", 13]["i", 3], l~t
+  [["f", 13]["i", 12], l~ch
+
+fkr087,
+  [["f", 14]["i", 0], p~k
+  [["f", 14]["i", 2], m~n
+  [["f", 14]["i", 3], p~t
+  [["f", 14]["i", 12], p~ch
+
+fkr088,
+  [["f", 15]["i", 0], ["f", 8]["i", 15]]
+  [["f", 15]["i", 2], ["f", 8]["i", 5]]
+  [["f", 15]["i", 3], ["f", 8]["i", 16]]
+  [["f", 15]["i", 12], ["f", 8]["i", 14]]
+
+fkr089,
+  [["f", 16]["i", 5], ["f", 16]["i", 2]]
+
+fkr090,
+  [["f", 17]["i", 2], ["f", 16]["i", 2]]
+  [["f", 17]["i", 5], ["f", 16]["i", 2]]
+  [["f", 17]["i", 6], ["f", 16]["i", 6]]
+
+fkr091,
+  [["f", 18]["i", 0], ["f", 17]["i", 0]]
+  [["f", 18]["i", 2], ["f", 16]["i", 2]]
+  [["f", 18]["i", 3], ["f", 17]["i", 3]]
+  [["f", 18]["i", 5], ["f", 16]["i", 2]]
+  [["f", 18]["i", 6], ["f", 16]["i", 6]]
+  [["f", 18]["i", 9], ["f", 17]["i", 9]]
+  [["f", 18]["i", 11], ["f", 17]["i", 9]]
+  [["f", 18]["i", 12], ["f", 17]["i", 12]]
+
+fkr092,
+  [["f", 19]["i", 2], ["f", 4]["i", 2]]
+  [["f", 19]["i", 5], ["f", 4]["i", 2]]
+  [["f", 19]["i", 6], ["f", 4]["i", 6]]
+  [["f", 19]["i", 11], ["f", 0]["i", 9]]
+
+fkr093,
+  [["f", 20]["i", 2], ["f", 4]["i", 2]]
+  [["f", 20]["i", 5], ["f", 4]["i", 2]]
+  [["f", 20]["i", 6], ["f", 4]["i", 6]]
+  [["f", 20]["i", 11], ["f", 0]["i", 10]]
+
+fkr094,
+  [["f", 21]["i", 5], ["f", 21]["i", 2]]
+
+fkr095,
+  [["f", 22]["i", 2], ["f", 4]["i", 2]]
+  [["f", 22]["i", 5], ["f", 4]["i", 2]]
+  [["f", 22]["i", 6], ["f", 4]["i", 6]]
+  [["f", 22]["i", 11], ["f", 0]["i", 12]]
+  [["f", 22]["i", 18], ["f", 0]["i", 14]]
+
+fkr096,
+  [["f", 23]["i", 2], ["f", 4]["i", 2]]
+  [["f", 23]["i", 5], ["f", 4]["i", 2]]
+  [["f", 23]["i", 6], ["f", 4]["i", 6]]
+  [["f", 23]["i", 11], ["f", 0]["i", 14]]
+  [["f", 23]["i", 18], ["f", 0]["i", 14]]
+
+fkr097,
+  [["f", 24]["i", 2], ["f", 21]["i", 2]]
+  [["f", 24]["i", 6], ["f", 21]["i", 6]]
+  [["f", 24]["i", 11], ["f", 0]["i", 15]]
+
+fkr098,
+  [["f", 25]["i", 2], ["f", 4]["i", 2]]
+  [["f", 25]["i", 6], ["f", 4]["i", 6]]
+  [["f", 25]["i", 11], ["f", 0]["i", 16]]
+
+fkr099,
+  [["f", 26]["i", 2], ["f", 16]["i", 2]]
+  [["f", 26]["i", 6], ["f", 16]["i", 6]]
+  [["f", 26]["i", 11], ["f", 0]["i", 17]]
+
+fkr100,
+  [["f", 27]["i", 0], ["f", 0]["i", 15]]
+  [["f", 27]["i", 2], ["f", 4]["i", 2]]
+  [["f", 27]["i", 3], ["f", 0]["i", 16]]
+  [["f", 27]["i", 5], ["f", 4]["i", 2]]
+  [["f", 27]["i", 6], ["f", 4]["i", 6]]
+  [["f", 27]["i", 7], ["f", 0]["i", 17]]
+  [["f", 27]["i", 11], ["f", 0]["i", 11]]
+  [["f", 27]["i", 12], ["f", 0]["i", 14]]
+
+fkr101,
+  [["f", 5]["i", 11], ["f", 4]["i", 12]]
+  [["f", 6]["i", 11], ["f", 4]["i", 11]]
+  [["f", 9]["i", 11], ["f", 8]["i", 0]]
+  [["f", 10]["i", 11], ["f", 8]["i", 6]]
+  [["f", 11]["i", 11], ["f", 8]["i", 7]]
+  [["f", 12]["i", 11], ["f", 8]["i", 9]]
+  [["f", 13]["i", 11], ["f", 8]["i", 16]]
+  [["f", 14]["i", 11], ["f", 8]["i", 17]]
+  [["f", 15]["i", 11], ["f", 8]["i", 11]]
+
+fkr102,
+  [["f", 5]["i", 18], ["f", 4]["i", 14]]
+  [["f", 6]["i", 18], ["f", 4]["i", 18]]
+  [["f", 9]["i", 18], ["f", 8]["i", 15]]
+  [["f", 10]["i", 18], ["f", 16]["i", 18]]
+  [["f", 11]["i", 18], ["f", 8]["i", 17]]
+  [["f", 13]["i", 18], ["f", 8]["i", 16]]
+  [["f", 14]["i", 18], ["f", 8]["i", 17]]
+  [["f", 15]["i", 18], ["f", 8]["i", 18]]
+
+fkr103,
+  [["f", 0]["i", 0], ~g
+  [["f", 0]["i", 3], ~d
+  [["f", 0]["i", 7], ~b
+  [["f", 0]["i", 12], ~j
+  [["f", 4]["i", 0], n~g
+  [["f", 4]["i", 3], n~d
+  [["f", 4]["i", 7], n~b
+  [["f", 4]["i", 12], n~j
+  [["f", 8]["i", 0], l~g
+  [["f", 8]["i", 7], l~b
+  [["f", 16]["i", 0], m~g
+  [["f", 16]["i", 3], m~d
+  [["f", 16]["i", 7], m~b
+  [["f", 16]["i", 12], m~j
+  [["f", 21]["i", 0], ng~g
+  [["f", 21]["i", 3], ng~d
+  [["f", 21]["i", 7], ng~b
+  [["f", 21]["i", 12], ng~j
+
+fkr104,
+  [["f", 1]["i", 11], g~
+  [["f", 7]["i", 11], d~
+  [["f", 17]["i", 11], b~
+  [["f", 22]["i", 11], j~
+
+fkr105,
+  [["f", 8]["i", 3], l~d
+  [["f", 8]["i", 12], l~j
+
+fkr106,
+  [["f", 1], ["f", 1]]
+  [["f", 2], ["f", 1]]
+  [["f", 3], ["f", 1]]
+  [["f", 4], ["f", 4]]
+  [["f", 5], ["f", 4]]
+  [["f", 6], ["f", 4]]
+  [["f", 7], ["f", 7]]
+  [["f", 8], ["f", 8]]
+  [["f", 9], ["f", 1]]
+  [["f", 10], ["f", 16]]
+  [["f", 11], ["f", 8]]
+  [["f", 12], ["f", 8]]
+  [["f", 13], ["f", 8]]
+  [["f", 14], ["f", 17]]
+  [["f", 15], ["f", 8]]
+  [["f", 16], ["f", 16]]
+  [["f", 17], ["f", 17]]
+  [["f", 18], ["f", 17]]
+  [["f", 19], ["f", 7]]
+  [["f", 20], ["f", 7]]
+  [["f", 21], ["f", 21]]
+  [["f", 22], ["f", 7]]
+  [["f", 23], ["f", 7]]
+  [["f", 24], ["f", 1]]
+  [["f", 25], ["f", 7]]
+  [["f", 26], ["f", 17]]
+  [["f", 27], ["f", 7]]
 
 fkr107:
   "i9#m16": "shwi"

+ 129 - 123
scriptshifter/hooks/korean/romanizer.py

@@ -181,31 +181,32 @@ def _romanize_name(src, options):
         warnings += _warnings
 
     if parsed:
-        if "~" in parsed:
-            lname, fname = parsed.split("~", 1)
-            logger.debug(f"First name: {fname}; Last name: {lname}")
-            fname_rom = _kor_fname_rom(fname)
-
-            lname_rom_ls = []
-            for n in lname.split("+"):
+        if isinstance(parsed, dict) and "fam" in parsed:
+            logger.debug(
+                    f"Given name: {parsed['giv']}; "
+                    f"Family name: {parsed['fam']}")
+            giv_name_rom = _kor_fname_rom(parsed["giv"])
+
+            fam_name_rom_ls = []
+            for n in parsed["fam"]:
                 _k = _kor_lname_rom(n)
                 logger.debug(f"Split last name part: {n}")
                 logger.debug(f"Split last name part romanized: {_k}")
                 if _k:
-                    lname_rom_ls.append(_k)
+                    fam_name_rom_ls.append(_k)
 
-            if not any(lname_rom_ls):
+            if not any(fam_name_rom_ls):
                 warnings.append(f"{parsed} is not a recognized Korean name.")
                 return "", warnings
 
-            lname_rom = " ".join(lname_rom_ls)
+            fam_name_rom = " ".join(fam_name_rom_ls)
 
             # Add comma after the last name for certain MARC fields.
             marc_field = options.get("marc_field")
             if marc_field in ("100", "600", "700", "800"):
-                rom = f"{lname_rom}, {fname_rom}"
+                rom = f"{fam_name_rom}, {giv_name_rom}"
             else:
-                rom = f"{lname_rom} {fname_rom}"
+                rom = f"{fam_name_rom} {giv_name_rom}"
 
             if False:
                 # TODO add option for authoritative name.
@@ -223,6 +224,13 @@ def _romanize_name(src, options):
 
 
 def _parse_kor_name(src, options):
+    """
+    Parse a string as a Korean or foreign name.
+
+    This returns a dict with a `fam` key for the family name, and `giv` for the
+    given name, both with all spaces removed.
+    """
+
     parsed = None
     warnings = []
 
@@ -244,42 +252,48 @@ def _parse_kor_name(src, options):
             warnings.append("ERROR: not a Korean name.")
             return None, warnings
 
-    ct_spaces = src.count(" ")
+    kor_ls = " ".split(src)
+    tk_ct = len(kor_ls)
+    # ct_spaces = src.count(" ")
     # FKR0006: Error if more than 2 spaces
-    if ct_spaces > 2:
+    if tk_ct > 3:
         warnings.append("ERROR: not a name (too many spaces)")
         return None, warnings
 
     # FKR007: 2 spaces (two family names)
-    if ct_spaces == 2:
-        logger.debug(f"Name {src} has 2 spaces.")
-        parsed = src.replace(" ", "+", 1).replace(" ", "~", 1)
-    elif ct_spaces == 1:
-        # FKR008: 1 space (2nd position)
-        if src[1] == " ":
-            logger.debug(f"Name {src} has 1 space in the 2nd position.")
-            parsed = src.replace(" ", "~")
-
-        # FKR009: 1 space (3nd position)
-        if src[2] == " ":
-            logger.debug(f"Name {src} has 1 space in the 3rd position.")
-            if two_syl_lname:
-                parsed = "+" + src.replace(" ", "~")
+    if tk_ct == 3:
+        logger.debug(f"Name {src} has 3 parts.")
+        parsed = {"fam": "".join(kor_ls[:1]), "giv": kor_ls[2]}
+    elif tk_ct == 2:
+        logger.debug(f"Name {src} has 2 parts.")
+        # FKR008: 1 word + 2 words
+        # FKR009: 2 words + 1 word
+        if len(kor_ls[0]) == 2 and not two_syl_lname:
+            warnings.append(
+                    f"{kor_ls[0]} is not a valid 2-syllable last name.")
+            return None, warnings
+        parsed = {"fam": kor_ls[0], "giv": kor_ls[1]}
 
     # FKR010: When there is no space
     else:
         logger.debug(f"Name {src} has no spaces.")
         if src_len == 2:
             logger.debug("Name has 2 characters.")
-            parsed = src[0] + "~" + src[1:]
-        elif src_len > 2:
+            parsed = {"fam": src[0], "giv": src[1]}
+        else:
             logger.debug("Name has more than 2 characters.")
             if two_syl_lname:
                 logger.debug("Last name has 2 syllables.")
-                parsed = src[:2] + "~" + src[2:]
+                parsed = {"fam": src[:2], "giv": src[2:]}
             else:
                 logger.debug("Last name has 1 syllable.")
-                parsed = src[0] + "~" + src[1:]
+                parsed = {"fam": src[0], "giv": src[1:]}
+
+    parsed = {
+        "fam": parsed["fam"].replace(" ", ""),
+        "giv": parsed["giv"].replace(" ", ""),
+    }
+
     return parsed, warnings
 
 
@@ -340,8 +354,7 @@ def _romanize_oclc_auto(kor):
     logger.debug(f"Korean before romanization: {kor}")
 
     rom_ls = []
-    breakpoint()
-    for word in kor.split(" "):
+    for word in kor:
         rom_ls.append(_kor_rom(word))
     rom = " ".join(rom_ls)
 
@@ -378,18 +391,19 @@ def _romanize_oclc_auto(kor):
 
 # FKR068: Exceptions, Exceptions to initial sound law, Proper names
 def _kor_rom(kor):
+    """
+    Encode a Korean token, performe replacements, and transliterate.
+
+    kor (str): Original Korean token.
+    struct (dict): Structural metadata for the token.
+    """
+
     kor = re.sub(r"\s{2,}", " ", kor.strip())
     orig = kor
 
     # FKR069: Irregular sound change list
     kor = _replace_map(kor, KCONF["fkr069"])
 
-    # FKR070: [n] insertion position mark +
-    niun = kor.find("+")
-    if niun > -1:
-        kor = kor.replace("+", "")
-        orig = kor
-
     non_kor = 0
     cpoints = tuple(ord(c) for c in kor)
     for cp in cpoints:
@@ -403,83 +417,74 @@ def _kor_rom(kor):
         cpoints = tuple(ord(c) for c in kor)
     for i in range(len(kor)):
         cp = cpoints[i] - CP_MIN
-        ini = "i" + str(cp // 588)
-        med = "m" + str((cp // 28) % 21)
-        fin = "f" + str(cp % 28)
-        rom_ls.append("#".join((ini, med, fin)))
-    rom = "~".join(rom_ls)
-    if len(rom):
-        rom = rom + "E"
-    logger.debug(f"Coded romanization before replacements: {rom}")
-
-    # FKR071: [n] insertion
-    if niun > -1:
-        niun_loc = rom.find("~")
-        # Advance until the niun'th occurrence of ~
-        # If niun is 0 or 1 the loop will be skipped.
-        for i in range(niun - 1):
-            niun_loc = rom.find("~", niun_loc + 1)
-        rom_niun_a = rom[:niun_loc]
-        rom_niun_b = rom[niun_loc + 1:]
-        if re.match("i11#m(?:2|6|12|17|20)", rom_niun_b):
-            _fkr_log(71)
-            rom_niun_b = rom_niun_b.replace("i11#m", "i2#m", 1)
-
-        # FKR072: [n]+[l] >[l] + [l]
-        if rom_niun_b.startswith("i5#") and rom_niun_a.endswith("f4"):
-            _fkr_log(72)
-            rom_niun_b = rom_niun_b.replace("i5#", "i2", 1)
-
-        rom = f"{rom_niun_a}~{rom_niun_b}"
-
-    # FKR073: Palatalization: ㄷ+이,ㄷ+여,ㄷ+히,ㄷ+혀
-    # FKR074: Palatalization: ㅌ+이,ㅌ+히,ㅌ+히,ㅌ+혀
-    # FKR075: Consonant assimilation ㄱ
-    # FKR076: Consonant assimilation ㄲ
-    # FKR077: Consonant assimilation ㄳ : ㄱ,ㄴ,ㄹ,ㅁ,ㅇ
-    # FKR078: Consonant assimilation ㄴ
-    # FKR079: Consonant assimilation ㄵ: ㄱ,ㄴ,ㄷ,ㅈ"
-    # FKR080: Consonant assimilation ㄶ : ㄱ,ㄴ,ㄷ,ㅈ
-    # FKR081: Consonant assimilation ㄷ
-    # FKR082: Consonant assimilation ㄹ
-    # FKR083: Consonant assimilation ㄺ : ㄱ,ㄴ,ㄷ,ㅈ
-    # FKR084: Consonant assimilation ㄻ : ㄱ,ㄴ,ㄷ,ㅈ
-    # FKR085: Consonant assimilation ㄼ : ㄱ,ㄴ,ㄷ,ㅈ
-    # FKR086: Consonant assimilation ㄾ : ㄱ,ㄴ,ㄷ,ㅈ
-    # FKR087: Consonant assimilation ㄿ : ㄱ,ㄴ,ㄷ,ㅈ
-    # FKR088: Consonant assimilation ㅀ : ㄱ,ㄴ,ㄷ,ㅈ
-    # FKR089: Consonant assimilation ㅁ
-    # FKR090: Consonant assimilation ㅂ
-    # FKR091: Consonant assimilation ㅄ
-    # FKR092: Consonant assimilation ㅅ
-    # FKR093: Consonant assimilation ㅆ
-    # FKR094: Consonant assimilation ㅇ
-    # FKR095: Consonant assimilation ㅈ
-    # FKR096: Consonant assimilation ㅊ
-    # FKR097: Consonant assimilation ㅋ
-    # FKR098: Consonant assimilation ㅌ
-    # FKR099: Consonant assimilation ㅍ
-    # FKR100: Consonant assimilation ㅎ
-    # FKR101: digraphic coda + ㅇ: ㄵ,ㄶ,ㄺ,ㄻ,ㄼ,ㄽ,ㄾ,ㄿ,ㅀ
-    # FKR102: digraphic coda + ㅎ: ㄵ,ㄶ,ㄺ,ㄻ,ㄼ,(ㄽ),ㄾ,ㄿ,ㅀ
-    # FKR103: Vocalization 1 (except ㄹ+ㄷ, ㄹ+ㅈ 제외) voiced + unvoiced
-    # FKR104: Vocalization 2 (except ㄹ+ㄷ, ㄹ+ㅈ 제외) unvoiced + voiced
-    # FKR105: Vocalization 3 (ㄹ+ㄷ, ㄹ+ㅈ)
-    # FKR106: Final sound law
-    # FKR107: Exception for '쉬' = shi
-    # FKR108: Exception for 'ㄴㄱ'= n'g
-    for fkr_i in range(73, 109):
-        _fkr_log(fkr_i)
-        _bk = rom
-        rom = _replace_map(rom, KCONF[f"fkr{fkr_i:03}"])
-        if _bk != rom:
-            logger.debug(f"FKR{fkr_i} substitution: {rom} (was: {_bk})")
-
-    logger.debug(f"Coded romanization after replacements: {rom}")
-    # FKR109: Convert everything else
-    _fkr_log(109)
-    for pos, data in KCONF["fkr109"].items():
-        rom = _replace_map(rom, data)
+        tk = {
+            "i": cp // 588,
+            "m": (cp // 28) % 21,
+            "f": cp % 28,
+        }
+        logger.debug(f"Coded token before replacements: {tk}")
+
+        # FKR071: [n] insertion
+        if "fam" in tk:
+            if tk["i"] == 11 and tk["m"] in (2, 6, 12, 17, 20):
+                _fkr_log(71)
+                tk["i"] = 2
+
+            # FKR072: [n]+[l] >[l] + [l]
+            if rom_niun_b.startswith("i5#") and rom_niun_a.endswith("f4"):
+                _fkr_log(72)
+                rom_niun_b = rom_niun_b.replace("i5#", "i2", 1)
+
+            rom_ls.append(f"{rom_niun_a}~{rom_niun_b}")
+
+        # FKR073: Palatalization: ㄷ+이,ㄷ+여,ㄷ+히,ㄷ+혀
+        # FKR074: Palatalization: ㅌ+이,ㅌ+히,ㅌ+히,ㅌ+혀
+        # FKR075: Consonant assimilation ㄱ
+        # FKR076: Consonant assimilation ㄲ
+        # FKR077: Consonant assimilation ㄳ : ㄱ,ㄴ,ㄹ,ㅁ,ㅇ
+        # FKR078: Consonant assimilation ㄴ
+        # FKR079: Consonant assimilation ㄵ: ㄱ,ㄴ,ㄷ,ㅈ"
+        # FKR080: Consonant assimilation ㄶ : ㄱ,ㄴ,ㄷ,ㅈ
+        # FKR081: Consonant assimilation ㄷ
+        # FKR082: Consonant assimilation ㄹ
+        # FKR083: Consonant assimilation ㄺ : ㄱ,ㄴ,ㄷ,ㅈ
+        # FKR084: Consonant assimilation ㄻ : ㄱ,ㄴ,ㄷ,ㅈ
+        # FKR085: Consonant assimilation ㄼ : ㄱ,ㄴ,ㄷ,ㅈ
+        # FKR086: Consonant assimilation ㄾ : ㄱ,ㄴ,ㄷ,ㅈ
+        # FKR087: Consonant assimilation ㄿ : ㄱ,ㄴ,ㄷ,ㅈ
+        # FKR088: Consonant assimilation ㅀ : ㄱ,ㄴ,ㄷ,ㅈ
+        # FKR089: Consonant assimilation ㅁ
+        # FKR090: Consonant assimilation ㅂ
+        # FKR091: Consonant assimilation ㅄ
+        # FKR092: Consonant assimilation ㅅ
+        # FKR093: Consonant assimilation ㅆ
+        # FKR094: Consonant assimilation ㅇ
+        # FKR095: Consonant assimilation ㅈ
+        # FKR096: Consonant assimilation ㅊ
+        # FKR097: Consonant assimilation ㅋ
+        # FKR098: Consonant assimilation ㅌ
+        # FKR099: Consonant assimilation ㅍ
+        # FKR100: Consonant assimilation ㅎ
+        # FKR101: digraphic coda + ㅇ: ㄵ,ㄶ,ㄺ,ㄻ,ㄼ,ㄽ,ㄾ,ㄿ,ㅀ
+        # FKR102: digraphic coda + ㅎ: ㄵ,ㄶ,ㄺ,ㄻ,ㄼ,(ㄽ),ㄾ,ㄿ,ㅀ
+        # FKR103: Vocalization 1 (except ㄹ+ㄷ, ㄹ+ㅈ 제외) voiced + unvoiced
+        # FKR104: Vocalization 2 (except ㄹ+ㄷ, ㄹ+ㅈ 제외) unvoiced + voiced
+        # FKR105: Vocalization 3 (ㄹ+ㄷ, ㄹ+ㅈ)
+        # FKR106: Final sound law
+        # FKR107: Exception for '쉬' = shi
+        # FKR108: Exception for 'ㄴㄱ'= n'g
+        for fkr_i in range(73, 109):
+            _fkr_log(fkr_i)
+            _bk = rom
+            rom = _replace_map(rom, KCONF[f"fkr{fkr_i:03}"])
+            if _bk != rom:
+                logger.debug(f"FKR{fkr_i} substitution: {rom} (was: {_bk})")
+
+        logger.debug(f"Coded romanization after replacements: {rom}")
+        # FKR109: Convert everything else
+        _fkr_log(109)
+        for pos, data in KCONF["fkr109"].items():
+            rom = _replace_map(rom, data)
 
     # FKR110: Convert symbols
     rom = _replace_map(rom, {"#": "", "~": ""})
@@ -631,12 +636,13 @@ def _kor_fname_rom(fname):
     cpoints = tuple(ord(c) for c in fname)
     for i in range(len(fname)):
         cp = cpoints[i] - CP_MIN
-        ini = "i" + str(cp // 588)
-        med = "m" + str((cp // 28) % 21)
-        fin = "f" + str(cp % 28)
-        rom_ls.append("#".join((ini, med, fin)))
-    rom = "~".join(rom_ls) + "E"
-    logger.debug(f"Encoded first name: {rom}")
+        tk = {
+            "i": cp // 588,
+            "m": (cp // 28) % 21,
+            "f": cp % 28,
+        }
+        rom_ls.append(tk)
+    logger.debug(f"Encoded first name: {rom_ls}")
 
     # FKR011: Check native Korean name, by coda
     native_by_fin = False