瀏覽代碼

WIP K-Romanizer conversion.

scossu 1 年之前
父節點
當前提交
f4e4f1c1a2

+ 1 - 1
scriptshifter/hooks/korean/Functions_KoreanHancha.au3

@@ -224,7 +224,7 @@ Func Hancha2Hangul()
 	  StringReplace($Hangul,"不","X")
 	  $PUL_Count = @Extended
 	  For $i=1 to $PUL_Count
-		 $PUL_Str = StringMid($Hangul,StringInStr($Hangul,"不",0,1)+1,1)
+		 $PUL_Str = StringMid($Hangul,StringInStr($Hangul,"不",0,1)+1,1) ; Get character after "不"
 		 Local $aArray = StringToASCIIArray($PUL_Str)
 		 If $aArray[0]>45795 AND $aArray[0]<46384 Then
 			$Hangul=StringReplace($Hangul,"不","부",1)

+ 9 - 9
scriptshifter/hooks/korean/Functions_KoreanRomanizer.au3

@@ -1298,7 +1298,7 @@ Func KorRom() ; FKR068
    EndIf
 
    $NonKor = 0
-   $NonKorCount = 0
+   $NonKorCount = 0 ; UNUSED
    Local $aArray = StringToASCIIArray ($TargetKor)
 
    Sleep (100)
@@ -1476,7 +1476,7 @@ If StringInStr($Rom,"f4~")>0 Then
 EndIf
 
 ;FKR079
-$CountNC = "0"
+$CountNC = "0" ; This never changes
 If StringInStr($Rom,"f5~")>0 Then
    Local $Rule1[4][2] = [["f5~i0#","n~k"],["f5~i2#","n~n"],["f5~i3#","n~t"],["f5~i12#","n~ch"]]
    For $i = 0 To Ubound($Rule1, 1) - 1
@@ -1509,7 +1509,7 @@ If StringInStr($Rom,"f8~")>0 Then
 EndIf
 
 ;FKR083
-$CountLK = "0"
+$CountLK = "0" ; This never changes
 If StringInStr($Rom,"f9~")>0 Then
    Local $Rule1[4][2] = [["f9~i0#","l~k"],["f9~i2#","ng~n"],["f9~i3#","k~t"],["f9~i12#","k~ch"]]
    For $i = 0 To Ubound($Rule1, 1) - 1
@@ -1518,7 +1518,7 @@ If StringInStr($Rom,"f9~")>0 Then
 EndIf
 
 ;FKR084
-$CountLM = "0"
+$CountLM = "0" ; This never changes
 If StringInStr($Rom,"f10~")>0 Then
    Local $Rule1[4][2] = [["f10~i0#","m~k"],["f10~i2#","m~n"],["f10~i3#","m~t"],["f10~i12#","m~ch"]]
    For $i = 0 To Ubound($Rule1, 1) - 1
@@ -1527,7 +1527,7 @@ If StringInStr($Rom,"f10~")>0 Then
 EndIf
 
 ;FKR085
-$CountLP = "0"
+$CountLP = "0" ; This never changes
 If StringInStr($Rom,"f11~")>0 Then
    Local $Rule1[4][2] = [["f11~i0#","l~k"],["f11~i2#","m~n"],["f11~i3#","l~t"],["f11~i12#","l~ch"]]
    For $i = 0 To Ubound($Rule1, 1) - 1
@@ -1536,7 +1536,7 @@ If StringInStr($Rom,"f11~")>0 Then
 EndIf
 
 ;FKR086
-$CountLTH = "0"
+$CountLTH = "0" ; This never changes
 If StringInStr($Rom,"f13~")>0 Then
    Local $Rule1[4][2] = [["f13~i0#","l~k"],["f13~i2#","l~l"],["f13~i3#","l~t"],["f13~i12#","l~ch"]]
    For $i = 0 To Ubound($Rule1, 1) - 1
@@ -1545,7 +1545,7 @@ If StringInStr($Rom,"f13~")>0 Then
 EndIf
 
 ;FKR087
-$CountLPH = "0"
+$CountLPH = "0" ; This never changes
 If StringInStr($Rom,"f14~")>0 Then
    Local $Rule1[4][2] = [["f14~i0#","p~k"],["f14~i2#","m~n"],["f14~i3#","p~t"],["f14~i12#","p~ch"]]
    For $i = 0 To Ubound($Rule1, 1) - 1
@@ -1670,7 +1670,7 @@ EndIf
    Next
 
 ;FKR103
-   If $CountLK+$CountLM+$CountLP+$CountLPH+$CountLTH+$CountNC = "0" Then
+   If $CountLK+$CountLM+$CountLP+$CountLPH+$CountLTH+$CountNC = "0" Then  ; Always true
 	  Local $Rule1[18][2] = [["f0~i0#","~g"],["f0~i3#","~d"],["f0~i7#","~b"],["f0~i12#","~j"],["f4~i0#","n~g"],["f4~i3#","n~d"],["f4~i7#","n~b"],["f4~i12#","n~j"],["f8~i0#","l~g"],["f8~i7#","l~b"],["f16~i0#","m~g"],["f16~i3#","m~d"],["f16~i7#","m~b"],["f16~i12#","m~j"],["f21~i0#","ng~g"],["f21~i3#","ng~d"],["f21~i7#","ng~b"],["f21~i12#","ng~j"]]
 	  For $i = 0 To Ubound($Rule1, 1) - 1
 		 $Rom = StringRegExpReplace($Rom, "\Q" & $Rule1[$i][0] & "\E",$Rule1[$i][1])
@@ -1684,7 +1684,7 @@ EndIf
    Next
 
 ;FKR105
-   If $CountLK+$CountLM+$CountLP+$CountLPH+$CountLTH+$CountNC = "0" Then
+   If $CountLK+$CountLM+$CountLP+$CountLPH+$CountLTH+$CountNC = "0" Then  ; Always true
 	  Local $Rule1[2][2] = [["f8~i3#","l~d"],["f8~i12#","l~j"]]
 	  For $i = 0 To Ubound($Rule1, 1) - 1
 		 $Rom = StringRegExpReplace($Rom, "\Q" & $Rule1[$i][0] & "\E",$Rule1[$i][1])

+ 180 - 269
scriptshifter/hooks/korean/data.yml

@@ -3625,166 +3625,167 @@ fkr069:
   "윗옷": "위돗"
   "첫애": "처대"
 
-fkr073:
-  "f7~i11#m20": "f0~i12#m20"
-  "f7~i11#m6": "f0~i12#m6"
-  "f7~i18#m20": "f0~i14#m20"
-  "f7~i18#m6": "f0~i14#m6"
-
-fkr074:
-  "f25~i11#m20": "f0~i14#m20"
-  "f25~i11#m6": "f0~i14#m6"
-  "f25~i18#m20": "f0~i14#m20"
-  "f25~i18#m6": "f0~i14#m6"
-
-fkr075:
-  "f1~i2#": "f21~i2#"
-  "f1~i5#": "f21~i2#"
-  "f1~i6#": "f21~i6#"
-
-fkr076:
-  "f2~i2#": "f21~i2#"
-  "f2~i5#": "f21~i2#"
-  "f2~i6#": "f21~i6#"
-  "f2~i11#": "f0~i1#"
-
-fkr077:
-  "f3~i0#": "k~k"
-  "f3~i2#": "ng~n"
-  "f3~i5#": "ng~n"
-  "f3~i6#": "ng~m"
-  "f3~i11#": "k~s"
-
-fkr078:
-  "f4~i5#": "f8~i5#"
-
-fkr079:
-  "f5~i0#": "n~k"
-  "f5~i2#": "n~n"
-  "f5~i3#": "n~t"
-  "f5~i12#": "n~ch"
-
-fkr080:
-  "f6~i0#": "f4~i15#"
-  "f6~i2#": "f4~i2#"
-  "f6~i3#": "f4~i16#"
-  "f6~i12#": "f4~i14#"
-
-fkr081:
-  "f7~i2#": "f4~i2#"
-  "f7~i5#": "f4~i2#"
-  "f7~i6#": "f4~i6#"
-
-fkr082:
-  "f8~i2#": "f8~i5#"
-
-fkr083:
-  "f9~i0#": "l~k"
-  "f9~i2#": "ng~n"
-  "f9~i3#": "k~t"
-  "f9~i12#": "k~ch"
-
-fkr084:
-  "f10~i0#": "m~k"
-  "f10~i2#": "m~n"
-  "f10~i3#": "m~t"
-  "f10~i12#": "m~ch"
-
-fkr085:
-  "f11~i0#": "l~k"
-  "f11~i2#": "m~n"
-  "f11~i3#": "l~t"
-  "f11~i12#": "l~ch"
-
-fkr086:
-  "f13~i0#": "l~k"
-  "f13~i2#": "l~l"
-  "f13~i3#": "l~t"
-  "f13~i12#": "l~ch"
-
-fkr087:
-  "f14~i0#": "p~k"
-  "f14~i2#": "m~n"
-  "f14~i3#": "p~t"
-  "f14~i12#": "p~ch"
-
-fkr088:
-  "f15~i0#": "f8~i15#"
-  "f15~i2#": "f8~i5#"
-  "f15~i3#": "f8~i16#"
-  "f15~i12#": "f8~i14#"
-
-fkr089:
-   "f16~i5#": "f16~i2#"
-
-fkr090:
-  "f17~i2#": "f16~i2#"
-  "f17~i5#": "f16~i2#"
-  "f17~i6#": "f16~i6#"
-
-fkr091:
-  "f18~i0#": "f17~i0#"
-  "f18~i2#": "f16~i2#"
-  "f18~i3#": "f17~i3#"
-  "f18~i5#": "f16~i2#"
-  "f18~i6#": "f16~i6#"
-  "f18~i9#": "f17~i9#"
-  "f18~i11#": "f17~i9#"
-  "f18~i12#": "f17~i12#"
-
-fkr092:
-  "f19~i2#": "f4~i2#"
-  "f19~i5#": "f4~i2#"
-  "f19~i6#": "f4~i6#"
-  "f19~i11#": "f0~i9#"
-
-fkr093:
-  "f20~i2#": "f4~i2#"
-  "f20~i5#": "f4~i2#"
-  "f20~i6#": "f4~i6#"
-  "f20~i11#": "f0~i10#"
-
-fkr094:
-  "f21~i5#": "f21~i2#"
-
-fkr095:
-  "f22~i2#": "f4~i2#"
-  "f22~i5#": "f4~i2#"
-  "f22~i6#": "f4~i6#"
-  "f22~i11#": "f0~i12#"
-  "f22~i18#": "f0~i14#"
-
-fkr096:
-  "f23~i2#": "f4~i2#"
-  "f23~i5#": "f4~i2#"
-  "f23~i6#": "f4~i6#"
-  "f23~i11#": "f0~i14#"
-  "f23~i18#": "f0~i14#"
-
-fkr097:
-  "f24~i2#": "f21~i2#"
-  "f24~i6#": "f21~i6#"
-  "f24~i11#": "f0~i15#"
-
-fkr098:
-  "f25~i2#": "f4~i2#"
-  "f25~i6#": "f4~i6#"
-  "f25~i11#": "f0~i16#"
-
-fkr099:
-  "f26~i2#": "f16~i2#"
-  "f26~i6#": "f16~i6#"
-  "f26~i11#": "f0~i17#"
-
-fkr100:
-  "f27~i0#": "f0~i15#"
-  "f27~i2#": "f4~i2#"
-  "f27~i3#": "f0~i16#"
-  "f27~i5#": "f4~i2#"
-  "f27~i6#": "f4~i6#"
-  "f27~i7#": "f0~i17#"
-  "f27~i11#": "f0~i11#"
-  "f27~i12#": "f0~i14#"
+fkr073-100:
+  "f7~":  # FKR073
+    "f7~i11#m20": "f0~i12#m20"
+    "f7~i11#m6": "f0~i12#m6"
+    "f7~i18#m20": "f0~i14#m20"
+    "f7~i18#m6": "f0~i14#m6"
+
+  "f25~":  #FKR074
+    "f25~i11#m20": "f0~i14#m20"
+    "f25~i11#m6": "f0~i14#m6"
+    "f25~i18#m20": "f0~i14#m20"
+    "f25~i18#m6": "f0~i14#m6"
+
+  "f1~":  # FKR075
+    "f1~i2#": "f21~i2#"
+    "f1~i5#": "f21~i2#"
+    "f1~i6#": "f21~i6#"
+
+  "f2~":  # FKR076
+    "f2~i2#": "f21~i2#"
+    "f2~i5#": "f21~i2#"
+    "f2~i6#": "f21~i6#"
+    "f2~i11#": "f0~i1#"
+
+  "f3~":  # FKR077
+    "f3~i0#": "k~k"
+    "f3~i2#": "ng~n"
+    "f3~i5#": "ng~n"
+    "f3~i6#": "ng~m"
+    "f3~i11#": "k~s"
+
+  "f4~":  # FKR078
+    "f4~i5#": "f8~i5#"
+
+  "f5~":  # FKR079
+    "f5~i0#": "n~k"
+    "f5~i2#": "n~n"
+    "f5~i3#": "n~t"
+    "f5~i12#": "n~ch"
+
+  "f6~":  # FKR080
+    "f6~i0#": "f4~i15#"
+    "f6~i2#": "f4~i2#"
+    "f6~i3#": "f4~i16#"
+    "f6~i12#": "f4~i14#"
+
+  "f7~":  # FKR081
+    "f7~i2#": "f4~i2#"
+    "f7~i5#": "f4~i2#"
+    "f7~i6#": "f4~i6#"
+
+  "f8~":  # FKR082
+    "f8~i2#": "f8~i5#"
+
+  "f9~":  # FKR083
+    "f9~i0#": "l~k"
+    "f9~i2#": "ng~n"
+    "f9~i3#": "k~t"
+    "f9~i12#": "k~ch"
+
+  "f10~":  # FKR084
+    "f10~i0#": "m~k"
+    "f10~i2#": "m~n"
+    "f10~i3#": "m~t"
+    "f10~i12#": "m~ch"
+
+  "f11~":  # FKR085
+    "f11~i0#": "l~k"
+    "f11~i2#": "m~n"
+    "f11~i3#": "l~t"
+    "f11~i12#": "l~ch"
+
+  "f13~":  # FKR086
+    "f13~i0#": "l~k"
+    "f13~i2#": "l~l"
+    "f13~i3#": "l~t"
+    "f13~i12#": "l~ch"
+
+  "f14~":  # FKR087
+    "f14~i0#": "p~k"
+    "f14~i2#": "m~n"
+    "f14~i3#": "p~t"
+    "f14~i12#": "p~ch"
+
+  "f15~":  # FKR088
+    "f15~i0#": "f8~i15#"
+    "f15~i2#": "f8~i5#"
+    "f15~i3#": "f8~i16#"
+    "f15~i12#": "f8~i14#"
+
+  "f16~":  # FKR089
+     "f16~i5#": "f16~i2#"
+
+  "f17~":  # FKR090
+    "f17~i2#": "f16~i2#"
+    "f17~i5#": "f16~i2#"
+    "f17~i6#": "f16~i6#"
+
+  "f18~":  # FKR091
+    "f18~i0#": "f17~i0#"
+    "f18~i2#": "f16~i2#"
+    "f18~i3#": "f17~i3#"
+    "f18~i5#": "f16~i2#"
+    "f18~i6#": "f16~i6#"
+    "f18~i9#": "f17~i9#"
+    "f18~i11#": "f17~i9#"
+    "f18~i12#": "f17~i12#"
+
+  "f19~":  # FKR092
+    "f19~i2#": "f4~i2#"
+    "f19~i5#": "f4~i2#"
+    "f19~i6#": "f4~i6#"
+    "f19~i11#": "f0~i9#"
+
+  "f20~":  # FKR093
+    "f20~i2#": "f4~i2#"
+    "f20~i5#": "f4~i2#"
+    "f20~i6#": "f4~i6#"
+    "f20~i11#": "f0~i10#"
+
+  "f21~":  # FKR094
+    "f21~i5#": "f21~i2#"
+
+  "f22~":  # FKR095
+    "f22~i2#": "f4~i2#"
+    "f22~i5#": "f4~i2#"
+    "f22~i6#": "f4~i6#"
+    "f22~i11#": "f0~i12#"
+    "f22~i18#": "f0~i14#"
+
+  "f23~":  # FKR096
+    "f23~i2#": "f4~i2#"
+    "f23~i5#": "f4~i2#"
+    "f23~i6#": "f4~i6#"
+    "f23~i11#": "f0~i14#"
+    "f23~i18#": "f0~i14#"
+
+  "f24~":  # FKR097
+    "f24~i2#": "f21~i2#"
+    "f24~i6#": "f21~i6#"
+    "f24~i11#": "f0~i15#"
+
+  "f25~":  # FKR098
+    "f25~i2#": "f4~i2#"
+    "f25~i6#": "f4~i6#"
+    "f25~i11#": "f0~i16#"
+
+  "f26~":  # FKR099
+    "f26~i2#": "f16~i2#"
+    "f26~i6#": "f16~i6#"
+    "f26~i11#": "f0~i17#"
+
+  "f27~":  # FKR100
+    "f27~i0#": "f0~i15#"
+    "f27~i2#": "f4~i2#"
+    "f27~i3#": "f0~i16#"
+    "f27~i5#": "f4~i2#"
+    "f27~i6#": "f4~i6#"
+    "f27~i7#": "f0~i17#"
+    "f27~i11#": "f0~i11#"
+    "f27~i12#": "f0~i14#"
 
 fkr101:
   "f5~i11#": "f4~i12#"
@@ -3866,6 +3867,12 @@ fkr106:
   "f26E": "f17"
   "f27E": "f7"
 
+fkr107:
+  "i9#m16": "shwi"
+
+fkr108:
+  "n~g": "n'g"
+
 fkr109:
   initials:
     "i10": "ss"
@@ -5516,111 +5523,6 @@ fkr144:
   " 瑠": " 유"
   " 纍": " 유"
 
-#fkr144a:
-#  " 女": " 여"
-#  " 年": " 연"
-#  " 撚": " 연"
-#  " 秊": " 연"
-#  " 涅": " 열"
-#  " 念": " 염"
-#  " 拈": " 염"
-#  " 恬": " 염"
-#  " 捻": " 염"
-#  " 寧": " 영"
-#  " 寗": " 영"
-#  " 獰": " 영"
-#  " 甯": " 영"
-#  " 佞": " 영"
-#  " 娘": " 낭"
-#  " 羅": " 나"
-#  " 裸": " 나"
-#  " 邏": " 나"
-#  " 螺": " 나"
-#  " 懶": " 나"
-#  " 癩": " 나"
-#  " 喇": " 나"
-#  " 懰": " 유"
-#  " 罶": " 유"
-#  " 藟": " 유"
-#  " 虆": " 유"
-#  " 駵": " 유"
-#  " 六": " 육"
-#  " 陸": " 육"
-#  " 戮": " 육"
-#  " 僇": " 육"
-#  " 穋": " 육"
-#  " 輪": " 윤"
-#  " 倫": " 윤"
-#  " 崙": " 윤"
-#  " 淪": " 윤"
-#  " 綸": " 윤"
-#  " 侖": " 윤"
-#  " 隆": " 융"
-#  " 肋": " 늑"
-#  " 勒": " 늑"
-#  " 凜": " 늠"
-#  " 懍": " 늠"
-#  " 廩": " 늠"
-#  " 陵": " 능"
-#  " 楞": " 능"
-#  " 稜": " 능"
-#  " 綾": " 능"
-#  " 凌": " 능"
-#  " 菱": " 능"
-#  " 里": " 이"
-#  " 利": " 이"
-#  " 理": " 이"
-#  " 李": " 이"
-#  " 離": " 이"
-#  " 吏": " 이"
-#  " 履": " 이"
-#  " 裏": " 이"
-#  " 梨": " 이"
-#  " 悧": " 이"
-#  " 籬": " 이"
-#  " 釐": " 이"
-#  " 俚": " 이"
-#  " 裡": " 이"
-#  " 痢": " 이"
-#  " 罹": " 이"
-#  " 狸": " 이"
-#  " 羸": " 이"
-#  " 莉": " 이"
-#  " 厘": " 이"
-#  " 唎": " 이"
-#  " 鯉": " 이"
-#  " 浬": " 이"
-#  " 璃": " 이"
-#  " 纚": " 이"
-#  " 莅": " 이"
-#  " 詈": " 이"
-#  " 俐": " 이"
-#  " 縭": " 이"
-#  " 离": " 이"
-#  " 涖": " 이"
-#  " 梩": " 이"
-#  " 隣": " 인"
-#  " 麟": " 인"
-#  " 吝": " 인"
-#  " 鱗": " 인"
-#  " 燐": " 인"
-#  " 躪": " 인"
-#  " 璘": " 인"
-#  " 藺": " 인"
-#  " 潾": " 인"
-#  " 粼": " 인"
-#  " 鄰": " 인"
-#  " 林": " 임"
-#  " 臨": " 임"
-#  " 淋": " 임"
-#  " 霖": " 임"
-#  " 琳": " 임"
-#  " 立": " 입"
-#  " 笠": " 입"
-#  " 粒": " 입"
-#  " 砬": " 입"
-#  " 苙": " 입"
-
 fkr145:
   "尚": "상"
   "奬": "장"
@@ -5695,7 +5597,7 @@ fkr146:
   "肹": "힐"
   "黠": "힐"
 
-fkr147:
+fkr147a:
   "家": "가"
   "歌": "가"
   "價": "가"
@@ -5947,7 +5849,7 @@ fkr147:
   "鴃": "격"
   "見": "견"
 
-#fkr147a:
+fkr147b:
   "堅": "견"
   "犬": "견"
   "遣": "견"
@@ -11769,8 +11671,17 @@ fkr170:
   "駆": "구"
   "験": "험"
 
-# Katakana
-fkr180:
+fkr172-179:
+  - "列"  # FKR172
+  - "烈"  # FKR173
+  - "裂"  # FKR174
+  - "劣"  # FKR175
+  - "律"  # FKR176
+  - "率"  # FKR177
+  - "慄"  # FKR178
+  - "栗"  # FKR179
+
+katakana:
   " カン": " 간"
   " キャ": "갸"
   " キュ": "규"

+ 87 - 29
scriptshifter/hooks/korean/romanizer.py

@@ -27,12 +27,14 @@ from scriptshifter.exceptions import BREAK
 from scriptshifter.hooks.korean import KCONF
 
 
+CP_MIN = 44032
+
 logger = logging.getLogger(__name__)
 
 
 def s2r_nonames_post_config(ctx):
     """ Romanize a regular string NOT containing personal names. """
-    ctx.dest = _romanize_nonames(ctx.src)
+    ctx.dest, ctx.warnings = _romanize_nonames(ctx.src)
 
     return BREAK
 
@@ -44,14 +46,15 @@ def s2r_names_post_config(ctx):
     One or more names can be transcribed. A comma or middle dot (U+00B7) is
     to be used as separator for multiple names.
     """
-    ctx.dest = _romanize_names(ctx.src)
+    ctx.dest, ctx.warnings = _romanize_names(ctx.src)
 
     return BREAK
 
 
-def _romanize_nonames(src):
+def _romanize_nonames(src, hancha=False):
     # FKR038
-    # TODO Address Marc8Hancha() and Hancha2Hangul() (both defs missing)
+    if hancha:
+        src = _hancha2hangul(_marc8_hancha(src))
 
     data = f" {src} "
 
@@ -63,10 +66,36 @@ def _romanize_nonames(src):
     # NOTE This is slightly different from LL 929-930 in that it doesn't
     # result in double spaces.
     data = data.replace("\r\n", " ").replace("\r", " ").replace("\n", " ")
+    # This is more compact but I'm unsure if the replacement order is kept.
+    # data = data.replace({"\r\n": " ", "\r": " ", "\n": " "})
 
     data = _romanize_oclc_auto(data)
 
-    return data
+    # FKR042
+    if capitalize = "all":
+        data = data.title()
+    # FKR043
+    elif capitalize = "first":
+        data = data.capitalize()
+
+    # FKR044
+    ambi = re.sub("[,.\";: ]+", " ", data)
+
+    # TODO See https://github.com/lcnetdev/scriptshifter/issues/20
+    no_oclc_breve = False
+
+    if no_oclc_breve:
+        data = data.replace({"ŏ": "ŏ", "ŭ": "ŭ", "Ŏ": "Ŏ", "Ŭ": "Ŭ"})
+
+    # TODO Decide what to do with these. There is no facility for outputting
+    # warnings or notes to the user yet.
+    warnings = []
+    for exp, warn in KCONF["fkr045"].items():
+        if exp in ambi:
+            warnings.append(ambi if warn == "" else warn)
+
+
+    return data, warnings
 
 
 def _romanize_names(src):
@@ -85,7 +114,7 @@ def _romanize_oclc_auto(data):
     if re.match(" 제[0-9]", data):
         data = data.replace(" 제", " 제 ")
     # NOTE: Maybe this was meant:
-    #data = re.sub(" 제([0-9])", "제 \\1", data):
+    # data = re.sub(" 제([0-9])", "제 \\1", data):
 
     # FKR052
     for rname, rule in KCONF["fkr052"].items():
@@ -93,7 +122,7 @@ def _romanize_oclc_auto(data):
         data = data.replace(rule)
 
     # Strip end and multiple whitespace.
-    data = re.sub("\W{2,}", " ", data.strip())
+    data = re.sub("\\W{2,}", " ", data.strip())
 
     data = data.replace("^", " GLOTTAL ")
 
@@ -109,9 +138,9 @@ def _romanize_oclc_auto(data):
     # TODO Add leading whitespace as per L1221? L1202 already added one.
     data = data.replace(KCONF["fkr060"])
 
-    data = re.sub("\W{2,}", " ", f" {data.strip()} ")
+    data = re.sub("\\W{2,}", " ", f" {data.strip()} ")
 
-    #FKR061 FKR063 FKR064 FKR065
+    # FKR061 FKR063 FKR064 FKR065
     logger.debug("Applying FKR062-065")
     data = data.replace(KCONF["fkr061"]).replace(KCONF["fkr063"]).replace(
             KCONF["fkr064"]).replace(KCONF["fkr065"])
@@ -121,12 +150,12 @@ def _romanize_oclc_auto(data):
         logger.debug(f"Applying FKR066[{rname}]")
         data = data.replace(rule)
 
-    data = re.sub("\W{2,}", " ", data.strip())
+    data = re.sub("\\W{2,}", " ", data.strip())
 
     return data
 
 
-def kor_rom(data):
+def _kor_rom(data):
     # FKR069
     data = data.replace(KCONF["fkr069"])
 
@@ -137,15 +166,14 @@ def kor_rom(data):
         orig = data
 
     non_kor = 0
-    CP_MIN = 44032
     cpoints = tuple(ord(c) for c in data)
     for cp in cpoints:
         if cp < CP_MIN:
-            data = data[1:] # TODO Really?
+            non_kor += 1
+            data = data[1:]
 
     rom_ls = []
-    # TODO verify cap to 9
-    for i in range(min(9,len(data))):
+    for i in range(len(data)):
         cp = cpoints[i] - CP_MIN
         ini = "i" + str(cp // 588)
         med = "m" + str((cp // 28) % 21)
@@ -155,23 +183,41 @@ def kor_rom(data):
 
     # FKR071
     if niun:
-        niun_loc = rom.find("~")
-        rom_niun_a = rom[:niun_loc - 1]
-        rom_niun_b = rom[niun_loc:]
-
-        if "i11#m2" in rom_niun_b:
-            rom_niun_b = rom_niun_b.replace("i11#m2", "i2#m2")
+        rom_niun_a, rom_niun_b = rom.split("~", 1)
+        if re.match("ill#m(?:2|6|12|17|20)", rom_niun_b):
+            logger.debug("Applying FKR071")
+            rom_niun_b = rom_niun_b.replace("i11#m", "i2#m", 1)
+
+        # FKR072
+        if rom_niun_b.startswith("i5#") and rom_niun_a.endswith("f4"):
+            logger.debug("Applying FKR072")
+            rom_niun_b = rom_niun_b.replace("i5#", "i2", 1)
+
+        rom = f"{rom_niun_a}~{rom_niun_b}"
+
+    # FKR073-100
+    fkr_i = 73
+    for k, cmap in KCONF["fkr073-100"].items():
+        if k in rom:
+            logger.debug(f"Applying FKR{fkr_i:03}")
+            rom.replace(cmap)
+        fkr_i += 1
+
+    # FKR101-108
+    for fkr_i in range(101, 109):
+        logger.debug(f"Applying FKR{fkr_i:03}")
+        rom = rom.replace(KCONF[f"fkr{fkr_i:03}"])
 
     return data
 
 
-def marc8_hancha(data):
+def _marc8_hancha(data):
     # FKR142
     logger.debug("Applying FKR142")
     return data.replace(KCONF["fkr142"])
 
 
-def hancha2hangul(data):
+def _hancha2hangul(data):
     data = " " + data.replace("\n", "\n ")
 
     # FKR143-170
@@ -180,15 +226,27 @@ def hancha2hangul(data):
         data = data.replace(KCONF[f"fkr{i}"])
 
     # FKR171
-    if "不" in data:
-        ct = data.count("不")
-        data = data.replace("不", "X")
-        for i in range(ct):
-            pass
+    # Write down indices of occurrences of "不"
+    idx = [i for i, item in enumerate(data) if item == "不"]
+    for i in idx:
+        val = ord(data[i + 1])
+        if (val > 45795 and val < 46384) or (val > 51087 and val < 51676):
+            data = data.replace("不", "부", 1)
+        else:
+            data = data.replace("不", "불", 1)
     # FKR172-179
+    for char in KCONF["fkr172-179"]:
+        idx = [i for i, item in enumerate(data) if item == char]
+        for i in idx:
+            val = ord(data[i + 1])
+            coda_value = (val - CP_MIN) % 28
+            if coda_value == 1 or coda_value == 4 or val < 100:  # TODO verify
+                data = data.replace(char, "열", 1)
+            else:
+                data = data.replace(char, "렬", 1)
 
     # FKR180
     logger.debug("Applying FKR180")
     data = data.replace(KCONF["fkr180"])
 
-    return re.sub("\W{2,}", " ", data.strip())
+    return re.sub("\\W{2,}", " ", data.strip())