Browse Source

WIP Korean

scossu 1 year ago
parent
commit
a361fbad7a

+ 162 - 162
scriptshifter/hooks/korean/Functions_KoreanRomanizer.au3

@@ -63,174 +63,174 @@
 ;~    EndIf
 $TrayTip = "OFF"
 
-Func RomanizerAllCap()
-   Global $CapitalizeAll = "On" ; <-- 모든 첫글자 대문자 -->
-   Global $Capitalize = "Off"
-   Romanizer()
-EndFunc
-
-Func RomanizerCap()
-   Global $CapitalizeAll = "Off"
-   Global $Capitalize = "On"; <-- 첫글자만 대문자 -->
-   Romanizer()
-EndFunc
-
-Func RomanizerNoCap()
-   Global $CapitalizeAll = "Off"
-   Global $Capitalize = "Off"; <-- 모든 첫글자 소문자 -->
-   Romanizer()
-EndFunc
-
-Func CapitalizeAll()
-   Sleep(50+20)
-   $Source = ClipGet()
-   Local $sArray=StringSplit($Source," ")
-   $AllCapOutput = ""
-   For $i = 1 To Ubound($sArray, 1)-1
-	  ClipPut(StringUpper(StringLeft($sArray[$i],1)) & StringTrimLeft($sArray[$i],1))
-	  $AllCapOutput=$AllCapOutput & " " & ClipGet()
-   Next
-   ClipPut(StringStripWS($AllCapOutput,1+4))
-EndFunc
+;Func RomanizerAllCap()
+;   Global $CapitalizeAll = "On" ; <-- 모든 첫글자 대문자 -->
+;   Global $Capitalize = "Off"
+;   Romanizer()
+;EndFunc
 
-Func RomanizerAuth()
-   Global $Auth="Yes"
-   KorNameRom20()
-EndFunc
+;Func RomanizerCap()
+;   Global $CapitalizeAll = "Off"
+;   Global $Capitalize = "On"; <-- 첫글자만 대문자 -->
+;   Romanizer()
+;EndFunc
 
-Func KorNameRomOCLC()
-   $KorNameRom = ""
-   Sleep(50+50)
-   $NClipB = ClipGet()
-   $ClipB = StringStripWS($NClipB,1+2+4)
-   Sleep(50+50)
-   ClipPut($ClipB)
-   Sleep(50+50)
-   ParseKorName()
-   Sleep(50+50)
-   If StringInStr(ClipGet(),"~") > 0 Then
-	  $aParsedNames = StringSplit(ClipGet(),"~")
-	  $LName = $aParsedNames[1]
-	  $FName = $aParsedNames[2]
-	  Sleep(50+50)
-	  ClipPut($FName)
-	  Sleep(50+50)
-	  KorFNameRom()
-	  Sleep(50+50)
-	  $FNameRom = ClipGet()
-	  Sleep(50+50)
-	  If StringInStr($LName,"+")>0 Then
-		 $aLastNames = StringSplit($LName,"+")
-		 $LName1 = $aLastNames[1]
-		 Sleep(50+50)
-		 ClipPut($LName1)
-		 Sleep(50+50)
-		 KorLNameRom()
-		 Sleep(50+50)
-		 $LNameRom1 = ClipGet()
-		 Sleep(50+50)
-		 $LName2 = $aLastNames[2]
-		 Sleep(50+50)
-		 ClipPut($LName2)
-		 Sleep(50+50)
-		 KorLNameRom()
-		 Sleep(50+50)
-		 $LNameRom2 = ClipGet()
-		 Sleep(50+50)
-		 $LNameRom = $LNameRom1 & " " & $LNameRom2
-		 Sleep(50+50)
-	  Else
-		 Sleep(50+50)
-		 ClipPut($LName)
-		 Sleep(50+50)
-		 KorLNameRom()
-		 Sleep(50+50)
-		 $LNameRom = ClipGet()
-		 Sleep(50+50)
-	  EndIf
-	  Sleep(50+50)
-	  $KorNameRom = $LNameRom & " " & $FNameRom
-	  Sleep(50+50)
-	  ClipPut($KorNameRom)
-	  Sleep(50+50)
-   EndIf
-   If StringRegExp(StringLeft($KorNameRom,1),"[a-z]|[A-Z]")=0 Then
-	  ClipPut($ClipB)
-	  KorCorpNameRomOCLC()
-   EndIf
-Sleep(50+20)
-EndFunc
+;Func RomanizerNoCap()
+;   Global $CapitalizeAll = "Off"
+;   Global $Capitalize = "Off"; <-- 모든 첫글자 소문자 -->
+;   Romanizer()
+;EndFunc
 
-Func NameRomanizer()
-   Global $Auth="No"
-   _CopyEX()
-   $ClipB=ClipGet()
-   $ClipB=StringReplace($ClipB,"·",", ")
-   $ClipB=StringReplace($ClipB,"・",", ")
-   If StringInStr($ClipB,",")>0 AND StringLen($ClipB)>4 AND StringLen(StringLeft($ClipB,StringInStr($ClipB,",")-1))>1 Then
-	  BatchRom()
-   Else
-	  KorNameRom20()
-   EndIf
-EndFunc
+;Func CapitalizeAll()
+;   Sleep(50+20)
+;   $Source = ClipGet()
+;   Local $sArray=StringSplit($Source," ")
+;   $AllCapOutput = ""
+;   For $i = 1 To Ubound($sArray, 1)-1
+;	  ClipPut(StringUpper(StringLeft($sArray[$i],1)) & StringTrimLeft($sArray[$i],1))
+;	  $AllCapOutput=$AllCapOutput & " " & ClipGet()
+;   Next
+;   ClipPut(StringStripWS($AllCapOutput,1+4))
+;EndFunc
 
-Func BatchRom()
-   _CopyEX()
-   TrayTip($TT_Title1,$TT_Text1,15)
-   $ClipB=ClipGet()
-   $ClipB=StringReplace($ClipB,"·",",")
-   $ClipB=StringReplace($ClipB,"・",", ")
-   $ClipB=StringReplace($ClipB,", ",",")
-   If StringRight($ClipB,1)="." Then
-	  $PUNC="p"
-	  ClipPut(StringTrimRight($ClipB,1))
-   Else
-	  If StringRight($ClipB,1)="," Then
-		 $PUNC="c"
-		 ClipPut(StringTrimRight($ClipB,1))
-	  Else
-		 $PUNC="0"
-	  EndIf
-   EndIf
-   $RomName=""
-   If StringInStr($ClipB,",")>0 Then
-	  $Names=StringRegExpReplace($ClipB,",","&")
-	  $Commas=@extended
-	  Local $aNames=StringSplit($Names,"&")
-   Else
-	  $Commas=0
-	  Local $aNames[2]=["0",$ClipB]
-   EndIf
+;Func RomanizerAuth()
+;   Global $Auth="Yes"
+;   KorNameRom20()
+;EndFunc
 
-   For $i=0 To $Commas+1
-	  ClipPut($aNames[$i])
-	  If StringIsInt($aNames[$i])=0 then
-		 SimpleRomanizer()
-	  $RomName=$RomName & ", " & ClipGet()
-	  EndIf
-   Next
+;Func KorNameRomOCLC()
+;   $KorNameRom = ""
+;   Sleep(50+50)
+;   $NClipB = ClipGet()
+;   $ClipB = StringStripWS($NClipB,1+2+4)
+;   Sleep(50+50)
+;   ClipPut($ClipB)
+;   Sleep(50+50)
+;   ParseKorName()
+;   Sleep(50+50)
+;   If StringInStr(ClipGet(),"~") > 0 Then
+;	  $aParsedNames = StringSplit(ClipGet(),"~")
+;	  $LName = $aParsedNames[1]
+;	  $FName = $aParsedNames[2]
+;	  Sleep(50+50)
+;	  ClipPut($FName)
+;	  Sleep(50+50)
+;	  KorFNameRom()
+;	  Sleep(50+50)
+;	  $FNameRom = ClipGet()
+;	  Sleep(50+50)
+;	  If StringInStr($LName,"+")>0 Then
+;		 $aLastNames = StringSplit($LName,"+")
+;		 $LName1 = $aLastNames[1]
+;		 Sleep(50+50)
+;		 ClipPut($LName1)
+;		 Sleep(50+50)
+;		 KorLNameRom()
+;		 Sleep(50+50)
+;		 $LNameRom1 = ClipGet()
+;		 Sleep(50+50)
+;		 $LName2 = $aLastNames[2]
+;		 Sleep(50+50)
+;		 ClipPut($LName2)
+;		 Sleep(50+50)
+;		 KorLNameRom()
+;		 Sleep(50+50)
+;		 $LNameRom2 = ClipGet()
+;		 Sleep(50+50)
+;		 $LNameRom = $LNameRom1 & " " & $LNameRom2
+;		 Sleep(50+50)
+;	  Else
+;		 Sleep(50+50)
+;		 ClipPut($LName)
+;		 Sleep(50+50)
+;		 KorLNameRom()
+;		 Sleep(50+50)
+;		 $LNameRom = ClipGet()
+;		 Sleep(50+50)
+;	  EndIf
+;	  Sleep(50+50)
+;	  $KorNameRom = $LNameRom & " " & $FNameRom
+;	  Sleep(50+50)
+;	  ClipPut($KorNameRom)
+;	  Sleep(50+50)
+;   EndIf
+;   If StringRegExp(StringLeft($KorNameRom,1),"[a-z]|[A-Z]")=0 Then
+;	  ClipPut($ClipB)
+;	  KorCorpNameRomOCLC()
+;   EndIf
+;Sleep(50+20)
+;EndFunc
 
-   If $PUNC="0" Then
-	  ClipPut(StringTrimLeft($RomName,2))
-   EndIf
-   If $PUNC="p" Then
-	  ClipPut(StringTrimLeft($RomName,2)&".")
-   EndIf
-   If $PUNC="c" Then
-	  ClipPut(StringTrimLeft($RomName,2)&",")
-   EndIf
+;Func NameRomanizer()
+;   Global $Auth="No"
+;   _CopyEX()
+;   $ClipB=ClipGet()
+;   $ClipB=StringReplace($ClipB,"·",", ")
+;   $ClipB=StringReplace($ClipB,"・",", ")
+;   If StringInStr($ClipB,",")>0 AND StringLen($ClipB)>4 AND StringLen(StringLeft($ClipB,StringInStr($ClipB,",")-1))>1 Then
+;	  BatchRom()
+;   Else
+;	  KorNameRom20()
+;   EndIf
+;EndFunc
 
-   Sleep4OCLC()
-   If StringRegExp(ClipGet(),"[0-9]")>0 Then
-  	  TrayTip($TT_Title2,$TT_Text2,10)
-	  ClipPut($ClipB)
-   Else
-	  If $TrayTip="On" Then
-		 TrayTip($TT_Title3,$TT_Text3,10)
-	  EndIf
-	  _PasteEx()
-   EndIf
-EndFunc
+;Func BatchRom()
+;   _CopyEX()
+;   TrayTip($TT_Title1,$TT_Text1,15)
+;   $ClipB=ClipGet()
+;   $ClipB=StringReplace($ClipB,"·",",")
+;   $ClipB=StringReplace($ClipB,"・",", ")
+;   $ClipB=StringReplace($ClipB,", ",",")
+;   If StringRight($ClipB,1)="." Then
+;	  $PUNC="p"
+;	  ClipPut(StringTrimRight($ClipB,1))
+;   Else
+;	  If StringRight($ClipB,1)="," Then
+;		 $PUNC="c"
+;		 ClipPut(StringTrimRight($ClipB,1))
+;	  Else
+;		 $PUNC="0"
+;	  EndIf
+;   EndIf
+;   $RomName=""
+;   If StringInStr($ClipB,",")>0 Then
+;	  $Names=StringRegExpReplace($ClipB,",","&")
+;	  $Commas=@extended
+;	  Local $aNames=StringSplit($Names,"&")
+;   Else
+;	  $Commas=0
+;	  Local $aNames[2]=["0",$ClipB]
+;   EndIf
+;
+;   For $i=0 To $Commas+1
+;	  ClipPut($aNames[$i])
+;	  If StringIsInt($aNames[$i])=0 then
+;		 SimpleRomanizer()
+;	  $RomName=$RomName & ", " & ClipGet()
+;	  EndIf
+;   Next
+;
+;   If $PUNC="0" Then
+;	  ClipPut(StringTrimLeft($RomName,2))
+;   EndIf
+;   If $PUNC="p" Then
+;	  ClipPut(StringTrimLeft($RomName,2)&".")
+;   EndIf
+;   If $PUNC="c" Then
+;	  ClipPut(StringTrimLeft($RomName,2)&",")
+;   EndIf
+;
+;   Sleep4OCLC()
+;   If StringRegExp(ClipGet(),"[0-9]")>0 Then
+;  	  TrayTip($TT_Title2,$TT_Text2,10)
+;	  ClipPut($ClipB)
+;   Else
+;	  If $TrayTip="On" Then
+;		 TrayTip($TT_Title3,$TT_Text3,10)
+;	  EndIf
+;	  _PasteEx()
+;   EndIf
+;EndFunc
 
 ; Only for simple syllables --sc
 Func SimpleRomanizer()
@@ -457,7 +457,7 @@ Func ParseKorName()
 
    ; <-- FKR005 -->
    If StringLen($TargetKor) > 7 OR StringLen($TargetKor) = 1 OR StringInStr($TargetKorOrig," ",0,1)>3 Then
-	  If $ForeignNameConversion = "Yes" Then
+	  If $ForeignNameConversion = "Yes" Then  ; Assuming yes in SS?
 		 ClipPut($TargetKorOrig)
 		 KorCorpNameRomOCLC()
 	  Else

+ 3 - 2
scriptshifter/hooks/korean/data.yml

@@ -1,7 +1,8 @@
-fkr001:
+fkr001-002:
+  # FKR001
   - [["金", "金"], "김"]
   - [["李", "李"], "이"]
-fkr002:
+  # FKR002
   # Use same logic as FKR001, hence single-element arrays.
   - [["리"], "이"]
   - [["라"], "나"]

+ 104 - 7
scriptshifter/hooks/korean/romanizer.py

@@ -52,6 +52,8 @@ def s2r_names_post_config(ctx):
 
 
 def _romanize_nonames(src, capitalize=False, hancha=False):
+    """ Main Romanization function for non-name strings. """
+
     # FKR038
     if hancha:
         src = _hancha2hangul(_marc8_hancha(src))
@@ -69,20 +71,20 @@ def _romanize_nonames(src, capitalize=False, hancha=False):
     # This is more compact but I'm unsure if the replacement order is kept.
     # data = data.replace({"\r\n": " ", "\r": " ", "\n": " "})
 
-    data = _romanize_oclc_auto(data)
+    rom = _romanize_oclc_auto(data)
 
     # FKR042
     if capitalize == "all":
-        data = data.title()
+        rom = data.title()
     # FKR043
     elif capitalize == "first":
-        data = data.capitalize()
+        rom = data.capitalize()
 
     # FKR044
-    ambi = re.sub("[,.\";: ]+", " ", data)
+    ambi = re.sub("[,.\";: ]+", " ", rom)
 
     # @TODO Move this to a generic normalization step (not only for K)
-    data = _replace_map(data, {"ŏ": "ŏ", "ŭ": "ŭ", "Ŏ": "Ŏ", "Ŭ": "Ŭ"})
+    rom = _replace_map(rom, {"ŏ": "ŏ", "ŭ": "ŭ", "Ŏ": "Ŏ", "Ŭ": "Ŭ"})
 
     # TODO Decide what to do with these. There is no facility for outputting
     # warnings or notes to the user yet.
@@ -91,11 +93,106 @@ def _romanize_nonames(src, capitalize=False, hancha=False):
         if exp in ambi:
             warnings.append(ambi if warn == "" else warn)
 
-    return data, warnings
+    return rom, warnings
 
 
 def _romanize_names(src):
-    return "Nothing Here Yet.", {}
+    """ Main Romanization function for names. """
+
+    warnings = []
+
+    if re.find("[a-z]|[A-Z]|[0-9]", src):
+        warnings.append("Source may not be a personal name.")
+        return None, warnings
+
+    # FKR001: Conversion, Family names in Chinese (dealing with 金 and 李)
+    # FKR002: Family names, Initial sound law
+    replaced = False
+    for ss, r in KCONF["fkr001-002"]:
+        if replaced:
+            break
+        for s in ss:
+            if src.startswith(s):
+                src = r + src[1:]
+                replaced = True
+                break
+
+    # FKR003: First name, Chinese Character Conversion
+    src = _hancha2hangul(_marc8_hancha(src))
+
+    src, warnings = _parse_kor_name(re.sub("\\W{2,}", " ", src.strip()))
+
+    return rom, warnings
+
+
+def _parse_kor_name(src):
+    warnings = []
+    # FKR004: Check first two characters. Two-syllable family name or not?
+    two_syl_fname = False
+    for ptn in KCONF["fkr004"]:
+        if src.startswith(ptn):
+            two_syl_fname = True
+            break
+
+    # FKR005: Error if more than 7 syllables
+    if len(src) > 7 or len(src) < 2 or " " in src[3:]:
+        return _kor_corp_name_rom(src), warnings
+
+    ct_spaces = src.count(" ")
+    # FKR0006: Error if more than 2 spaces
+    if ct_spaces > 2:
+        warnings.append("ERROR: not a name (too many spaces)")
+        return None, warnings
+
+    # FKR007: 2 spaces (two family names)
+    if ct_spaces == 2:
+        parsed = src.replace(" ", "+", 1).replace(" ", "~", 1)
+    elif ct_spaces == 1:
+        # FKR008: 1 space (2nd position)
+        if src[1] == " ":
+            parsed = src.replace(" ", "~")
+
+        # FKR009: 1 space (3nd position)
+        if src[2] == " ":
+            if two_syl_fname:
+                parsed = "+" + src.replace(" ", "~")
+
+    return parsed, warnings
+
+
+def _kor_corp_name_rom(src):
+    chu = yu = 0
+    if src.startswith("(주) "):
+        src = src[4:]
+        chu = "L"
+    if src.endswith(" (주)"):
+        src = src[:-4]
+        chu = "R"
+    if src.startswith("(유) "):
+        src = src[4:]
+        yu = "L"
+    if src.endswith(" (유)"):
+        src = src[:-4]
+        yu = "R"
+
+    rom_tok = []
+    for tok in src.split(" "):
+        rom_tok.append(_romanize_oclc_auto(tok))
+    rom = " ".join(rom_tok).title()
+
+    if chu == "L":
+        rom = "(Chu) " + rom
+    elif chu == "R":
+        rom = rom + " (Chu)"
+    if yu == "L":
+        rom = "(Yu) " + rom
+    elif yu == "R":
+        rom = rom + " (Yu)"
+
+    # FKR035: Replace established names
+    rom = _replace_map(rom, KCONF["fkr035"])
+
+    return rom
 
 
 def _romanize_oclc_auto(data):