scossu
/
scriptshifter


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728
							# @package ext.korean
#

__doc__ = """
Korean transcription functions.

Ported from K-Romanizer: https://library.princeton.edu/eastasian/k-romanizer

Only script-to-Roman is possible for Korean.

Note that Korean Romanization must be done separately for strings containing
only personal names and strings that do not contain personal names, due to
ambiguities in the language. A non-deterministic approach using machine
learning that separates words depending on context is being attempted by other
parties, and it may be possible to eventually integrate such services here in
the future, technology and licensing permitting. At the moment there are no
such plans.

Many thanks to Hyoungbae Lee for kindly providing the original K-Romanizer
program and assistance in porting it to Python.
"""

import logging
import re

from os import path
from csv import reader

from scriptshifter.exceptions import BREAK
from scriptshifter.hooks.korean import KCONF


PWD = path.dirname(path.realpath(__file__))
CP_MIN = 44032

# Buid FKR index for better logging.
with open(path.join(PWD, "FKR_index.csv"), newline='') as fh:
    csv = reader(fh)
    FKR_IDX = {row[0]: row[2] for row in csv}


logger = logging.getLogger(__name__)


def s2r_nonames_post_config(ctx):
    """ Romanize a regular string NOT containing personal names. """
    ctx.dest, ctx.warnings = _romanize_nonames(
            ctx.src, ctx.options)

    return BREAK


def s2r_names_post_config(ctx):
    """
    Romanize a string containing ONLY Korean personal names.

    One or more names can be transcribed. A comma or middle dot (U+00B7) is
    to be used as separator for multiple names.
    """
    ctx.dest, ctx.warnings = _romanize_names(ctx.src, ctx.options)

    return BREAK


def _romanize_nonames(src, options):
    """ Main Romanization function for non-name strings. """

    # FKR038: Convert Chinese characters to Hangul
    if options.get("hancha", True):
        kor = _hancha2hangul(_marc8_hancha(src))
    else:
        kor = src

    # Replace ideographic spaces with ASCII space.
    kor = re.sub(r"\s+", " ", kor)
    kor = f" {kor} "

    # FKR039: Replace Proper name with spaces in advance
    # FKR040: Replace Proper name with a hyphen in advance
    # FKR041: Romanize names of Hangul consonants
    for i in range(39, 42):
        _fkr_log(i)
        kor = _replace_map(kor, KCONF[f"fkr{i:03}"])

    # NOTE This is slightly different from LL 929-930 in that it doesn't
    # result in double spaces.
    kor = kor.replace("\r\n", " ").replace("\r", " ").replace("\n", " ")
    # This is more compact but I'm unsure if the replacement order is kept.
    # kor = kor.replace({"\r\n": " ", "\r": " ", "\n": " "})

    rom = _romanize_oclc_auto(kor)

    logger.debug(f"Before capitalization: {rom}")
    # FKR042: Capitalize all first letters
    if options["capitalize"] == "all":
        rom = _capitalize(rom)
    # FKR043: Capitalize the first letter
    elif options["capitalize"] == "first":
        rom = rom[0].upper() + rom[1:]

    # FKR044: Ambiguities
    ambi = re.sub("[,.\";: ]+", " ", rom)

    # TODO Decide what to do with these. There is no facility for outputting
    # warnings or notes to the user yet.
    warnings = []
    _fkr_log(45)
    for exp, warn in KCONF["fkr045"].items():
        if exp in ambi:
            warnings.append(ambi if warn == "" else warn)

    if rom:
        rom = rom.replace("kkk", "kk")

    return rom, warnings


def _romanize_names(src, options):
    """
    Main Romanization function for names.

    Separate and romanize multiple names sepearated by comma or middle dot.

    K-Romanizer: KorNameRom20
    """
    rom_ls = []
    warnings = []

    if "," in src and "·" in src:
        warnings.append(
                "both commas and middle dots are being used to separate "
                "names. Only one of the two types should be used, or "
                "unexpected results may occur.")

    kor_ls = src.split(",") if "," in src else src.split("·")

    for kor in kor_ls:
        rom, _warnings = _romanize_name(kor.strip(), options)
        rom_ls.append(rom)

        warnings.extend(_warnings)

    return ", ".join(rom_ls), warnings


def _romanize_name(src, options):
    warnings = []

    # FKR001: Conversion, Family names in Chinese (dealing with 金 and 李)
    # FKR002: Family names, Initial sound law
    replaced = False
    for ss, r in KCONF["fkr001-002"]:
        if replaced:
            break
        for s in ss:
            if src.startswith(s):
                src = r + src[1:]
                replaced = True
                break

    # FKR003: First name, Chinese Character Conversion
    src = _hancha2hangul(_marc8_hancha(src))

    if re.search("[a-zA-Z0-9]", src):
        warnings.append(f"{src} is not a recognized personal name.")
        return "", warnings

    # `parsed` can either be a modified Korean string with markers, or in case
    # of a foreign name, the final romanized name.
    parsed, _warnings = _parse_kor_name(re.sub(r"\s{2,}", " ", src.strip()))

    if len(_warnings):
        warnings += _warnings

    if parsed:
        if "~" in parsed:
            lname, fname = parsed.split("~", 1)
            fname_rom = _kor_fname_rom(fname)

            lname_rom_ls = [_kor_lname_rom(n) for n in lname.split("+")]

            if not any(lname_rom_ls):
                warnings.append(f"{parsed} is not a recognized Korean name.")
                return "", warnings

            lname_rom = " ".join(lname_rom_ls)

            # Add comma after the last name for certain MARC fields.
            marc_field_str = options.get("marc_field", "0")
            try:
                marc_field = int(marc_field_str)
            except TypeError:
                raise ValueError(
                        f"{marc_field_str} is not a valid MARC field code.")
            if marc_field in (100, 600, 700, 800):
                rom = f"{lname_rom}, {fname_rom}"
            else:
                rom = f"{lname_rom} {fname_rom}"

            if False:
                # TODO add option for authoritative name.
                rom_ls = rom.rsplit(" ", 1)
                rom = ", ".join(rom_ls)

            return rom, warnings

        else:
            warnings.append("Romanized as a foreign name.")
            return parsed, warnings

    warnings.append(f"{src} is not a recognized Korean name.")
    return "", warnings


def _parse_kor_name(src):
    parsed = None
    warnings = []

    # FKR004: Check first two characters. Two-syllable family name or not?
    two_syl_fname = False
    for ptn in KCONF["fkr004"]:
        if src.startswith(ptn):
            two_syl_fname = True
            break

    src_len = len(src)

    # FKR005: Error if more than 7 syllables
    if src_len > 7 or src_len < 2 or " " in src[3:]:
        return _kor_corp_name_rom(src), warnings

    ct_spaces = src.count(" ")
    # FKR0006: Error if more than 2 spaces
    if ct_spaces > 2:
        warnings.append("ERROR: not a name (too many spaces)")
        return parsed, warnings

    # FKR007: 2 spaces (two family names)
    if ct_spaces == 2:
        parsed = src.replace(" ", "+", 1).replace(" ", "~", 1)
    elif ct_spaces == 1:
        # FKR008: 1 space (2nd position)
        if src[1] == " ":
            parsed = src.replace(" ", "~")

        # FKR009: 1 space (3nd position)
        if src[2] == " ":
            if two_syl_fname:
                parsed = "+" + src.replace(" ", "~")

    # FKR010: When there is no space
    else:
        if src_len == 2:
            parsed = src[0] + "~" + src[1:]
        elif src_len > 2:
            if two_syl_fname:
                parsed = src[:1] + "~" + src[2:]
            else:
                parsed = src[0] + "~" + src[1:]
    return parsed, warnings


def _kor_corp_name_rom(src):
    chu = yu = 0
    if src.startswith("(주) "):
        src = src[4:]
        chu = "L"
    if src.endswith(" (주)"):
        src = src[:-4]
        chu = "R"
    if src.startswith("(유) "):
        src = src[4:]
        yu = "L"
    if src.endswith(" (유)"):
        src = src[:-4]
        yu = "R"

    rom_tok = []
    for tok in src.split(" "):
        rom_tok.append(_romanize_oclc_auto(tok))
    rom = _capitalize(" ".join(rom_tok))

    if chu == "L":
        rom = "(Chu) " + rom
    elif chu == "R":
        rom = rom + " (Chu)"
    if yu == "L":
        rom = "(Yu) " + rom
    elif yu == "R":
        rom = rom + " (Yu)"

    # FKR035: Replace established names
    rom = _replace_map(rom, KCONF["fkr035"])

    return rom


def _romanize_oclc_auto(kor):
    # FKR050: Starts preprocessing symbol
    _fkr_log(50)
    for rname, rule in KCONF["fkr050"].items():
        logger.debug(f"Applying fkr050[{rname}]")
        kor = _replace_map(kor, rule)

    # See https://github.com/lcnetdev/scriptshifter/issues/19
    kor = re.sub("제([0-9])", "제 \\1", kor)

    # FKR052: Replace Che+number
    _fkr_log(52)
    for rname, rule in KCONF["fkr052"].items():
        logger.debug(f"Applying fkr052[{rname}]")
        kor = _replace_map(kor, rule)

    # Strip end and multiple whitespace.
    kor = re.sub(r"\s{2,}", " ", kor.strip())

    kor = kor.replace("^", " GLOTTAL ")

    logger.debug(f"Korean before romanization: {kor}")

    rom_ls = []
    for word in kor.split(" "):
        rom_ls.append(_kor_rom(word))
    rom = " ".join(rom_ls)

    # FKR059: Apply glottalization
    rom = _replace_map(
            f" {rom.strip()} ", {" GLOTTAL ": "", "*": "", "^": ""})

    # FKR060: Process number + -년/-년도/-년대
    # TODO Add leading whitespace as per L1221? L1202 already added one.
    rom = _replace_map(rom, KCONF["fkr060"])

    rom = re.sub(r"\s{2,}", " ", f" {rom.strip()} ")

    # FKR061: Jurisdiction (시)
    # FKR062: Historical place names
    # FKR063: Jurisdiction (국,도,군,구)
    # FKR064: Temple names of Kings, Queens, etc. (except 조/종)
    # FKR065: Frequent historical names
    for i in range(61, 66):
        _fkr_log(i)
        rom = _replace_map(rom, KCONF[f"fkr{i:03}"])

    # FKR066: Starts restore symbols
    _fkr_log(66)
    for rname, rule in KCONF["fkr066"].items():
        logger.debug(f"Applying FKR066[{rname}]")
        rom = _replace_map(rom, rule)

    # Remove spaces from before punctuation signs.
    rom = re.sub(r" (?=[,.;:?!])", "", rom.strip())
    rom = re.sub(r"\s{2,}", " ", rom)

    return rom


# FKR068: Exceptions, Exceptions to initial sound law, Proper names
def _kor_rom(kor):
    kor = re.sub(r"\s{2,}", " ", kor.strip())
    orig = kor

    # FKR069: Irregular sound change list
    kor = _replace_map(kor, KCONF["fkr069"])

    # FKR070: [n] insertion position mark +
    niun = kor.find("+")
    if niun > -1:
        kor = kor.replace("+", "")
        orig = kor

    non_kor = 0
    cpoints = tuple(ord(c) for c in kor)
    for cp in cpoints:
        if cp < CP_MIN:
            non_kor += 1
            kor = kor[1:]

    rom_ls = []
    if non_kor > 0:
        # Rebuild code point list with non_kor removed.
        cpoints = tuple(ord(c) for c in kor)
    for i in range(len(kor)):
        cp = cpoints[i] - CP_MIN
        ini = "i" + str(cp // 588)
        med = "m" + str((cp // 28) % 21)
        fin = "f" + str(cp % 28)
        rom_ls.append("#".join((ini, med, fin)))
    rom = "~".join(rom_ls)
    if len(rom):
        rom = rom + "E"

    # FKR071: [n] insertion
    if niun > -1:
        niun_loc = rom.find("~")
        # Advance until the niun'th occurrence of ~
        # If niun is 0 or 1 the loop will be skipped.
        for i in range(niun - 1):
            niun_loc = rom.find("~", niun_loc + 1)
        rom_niun_a = rom[:niun_loc]
        rom_niun_b = rom[niun_loc + 1:]
        if re.match("i11#m(?:2|6|12|17|20)", rom_niun_b):
            _fkr_log(71)
            rom_niun_b = rom_niun_b.replace("i11#m", "i2#m", 1)

        # FKR072: [n]+[l] >[l] + [l]
        if rom_niun_b.startswith("i5#") and rom_niun_a.endswith("f4"):
            _fkr_log(72)
            rom_niun_b = rom_niun_b.replace("i5#", "i2", 1)

        rom = f"{rom_niun_a}~{rom_niun_b}"

    # FKR073: Palatalization: ㄷ+이,ㄷ+여,ㄷ+히,ㄷ+혀
    # FKR074: Palatalization: ㅌ+이,ㅌ+히,ㅌ+히,ㅌ+혀
    # FKR075: Consonant assimilation ㄱ
    # FKR076: Consonant assimilation ㄲ
    # FKR077: Consonant assimilation ㄳ : ㄱ,ㄴ,ㄹ,ㅁ,ㅇ
    # FKR078: Consonant assimilation ㄴ
    # FKR079: Consonant assimilation ㄵ: ㄱ,ㄴ,ㄷ,ㅈ"
    # FKR080: Consonant assimilation ㄶ : ㄱ,ㄴ,ㄷ,ㅈ
    # FKR081: Consonant assimilation ㄷ
    # FKR082: Consonant assimilation ㄹ
    # FKR083: Consonant assimilation ㄺ : ㄱ,ㄴ,ㄷ,ㅈ
    # FKR084: Consonant assimilation ㄻ : ㄱ,ㄴ,ㄷ,ㅈ
    # FKR085: Consonant assimilation ㄼ : ㄱ,ㄴ,ㄷ,ㅈ
    # FKR086: Consonant assimilation ㄾ : ㄱ,ㄴ,ㄷ,ㅈ
    # FKR087: Consonant assimilation ㄿ : ㄱ,ㄴ,ㄷ,ㅈ
    # FKR088: Consonant assimilation ㅀ : ㄱ,ㄴ,ㄷ,ㅈ
    # FKR089: Consonant assimilation ㅁ
    # FKR090: Consonant assimilation ㅂ
    # FKR091: Consonant assimilation ㅄ
    # FKR092: Consonant assimilation ㅅ
    # FKR093: Consonant assimilation ㅆ
    # FKR094: Consonant assimilation ㅇ
    # FKR095: Consonant assimilation ㅈ
    # FKR096: Consonant assimilation ㅊ
    # FKR097: Consonant assimilation ㅋ
    # FKR098: Consonant assimilation ㅌ
    # FKR099: Consonant assimilation ㅍ
    # FKR100: Consonant assimilation ㅎ
    # FKR101: digraphic coda + ㅇ: ㄵ,ㄶ,ㄺ,ㄻ,ㄼ,ㄽ,ㄾ,ㄿ,ㅀ
    # FKR102: digraphic coda + ㅎ: ㄵ,ㄶ,ㄺ,ㄻ,ㄼ,(ㄽ),ㄾ,ㄿ,ㅀ
    # FKR103: Vocalization 1 (except ㄹ+ㄷ, ㄹ+ㅈ 제외) voiced + unvoiced
    # FKR104: Vocalization 2 (except ㄹ+ㄷ, ㄹ+ㅈ 제외) unvoiced + voiced
    # FKR105: Vocalization 3 (ㄹ+ㄷ, ㄹ+ㅈ)
    # FKR106: Final sound law
    # FKR107: Exception for '쉬' = shi
    # FKR108: Exception for 'ㄴㄱ'= n'g
    for fkr_i in range(73, 109):
        _fkr_log(fkr_i)
        _bk = rom
        rom = _replace_map(rom, KCONF[f"fkr{fkr_i:03}"])
        if _bk != rom:
            logger.debug(f"FKR{fkr_i} substitution: {rom} (was: {_bk})")

    # FKR109: Convert everything else
    _fkr_log(109)
    for pos, data in KCONF["fkr109"].items():
        rom = _replace_map(rom, data)

    # FKR110: Convert symbols
    rom = _replace_map(rom, {"#": "", "~": ""})

    if non_kor > 0:
        # Modified from K-Romanizer:1727 in that it does not append a hyphen
        # if the whole word is non-Korean.
        rom = f"{orig[:non_kor]}-{rom}" if len(rom) else orig

    # FKR111: ㄹ + 모음/ㅎ/ㄹ, ["lr","ll"] must be in the last of the array
    rom = _replace_map(rom, KCONF["fkr111"])

    # FKR112: Exceptions to initial sound law
    is_non_kor = False
    # FKR113: Check loan words by the first 1 letter
    # FKR114: Check loan words by the first 2 letters
    # FKR115: Check loan words by the first 3 letters
    if orig.startswith(tuple(KCONF["fkr113-115"])):
        is_non_kor = True

    # FKR116: Exceptions to initial sound law - particles
    is_particle = False
    if orig.startswith(tuple(KCONF["fkr116"]["particles"])):
        is_particle = True

    if len(orig) > 1 and not is_non_kor and not is_particle:
        if rom.startswith(tuple(KCONF["fkr116"]["replace_initials"].keys())):
            rom = _replace_map(rom, KCONF["fkr116"]["replace_initials"])

    # FKR117: Proper names _StringPoper Does not work because of breves
    if (
            # FKR118
            orig in KCONF["fkr118"] or
            # FKR119
            orig in KCONF["fkr119"]["word"] or
            (
                orig[:-1] in KCONF["fkr119"]["word"] and
                orig.endswith(tuple(KCONF["fkr119"]["suffix"]))
            ) or
            # FKR120
            orig.endswith(tuple(KCONF["fkr120"]))):
        rom = rom[0].upper() + rom[1:]

    # FKR121: Loan words beginning with L
    if f" {orig} " in KCONF["fkr121"]:
        rom = _replace_map(rom[0], {"R": "L", "r": "l"}) + rom[1:]

    # @TODO Move this to a generic normalization step (not only for K)
    rom = _replace_map(rom, {"ŏ": "ŏ", "ŭ": "ŭ", "Ŏ": "Ŏ", "Ŭ": "Ŭ"})

    return rom


def _marc8_hancha(data):
    # FKR142: Chinese character list
    _fkr_log(142)
    return _replace_map(data, KCONF["fkr142"])


def _hancha2hangul(data):
    data = " " + data.replace("\n", "\n ")

    # FKR143: Process exceptions first
    # FKR144: Apply initial sound law (Except: 列, 烈, 裂, 劣)
    # FKR145: Simplified characters, variants
    # FKR146: Some characters from expanded list
    # FKR147: Chinese characters 1-500 車=차
    # FKR148: Chinese characters 501-750 串=관
    # FKR149: Chinese characters 751-1000 金=금, 娘=랑
    # FKR150: Chinese characters 1001-1250
    # FKR151: Chinese characters 1251-1500 제외: 列, 烈, 裂, 劣
    # FKR152: Chinese characters 1501-1750 제외: 律, 率, 栗, 慄
    # FKR153: Chinese characters 1751-2000
    # FKR154: 不,Chinese characters 2001-2250 제외: 不
    # FKR155: Chinese characters 2251-2500 塞=색
    # FKR156: Chinese characters 2501-2750
    # FKR157: Chinese characters 2751-3000
    # FKR158: Chinese characters 3001-2250
    # FKR159: Chinese characters 3251-3500
    # FKR160: Chinese characters 3501-3750
    # FKR161: Chinese characters 3751-4000
    # FKR162: Chinese characters 4001-4250
    # FKR163: Chinese characters 4251-4500
    # FKR164: Chinese characters 4501-4750
    # FKR165: Chinese characters 4751-5000
    # FKR166: Chinese characters 5001-5250
    # FKR167: Chinese characters 5251-5500
    # FKR168: Chinese characters 5501-5750
    # FKR169: Chinese characters 5751-5978
    # FKR170: Chinese characters 일본Chinese characters
    for i in range(143, 171):
        _fkr_log(i)
        data = _replace_map(data, KCONF[f"fkr{i}"])

    # FKR171: Chinese characters 不(부)의 발음 처리
    # Write down indices of occurrences of "不"
    idx = [i for i, item in enumerate(data) if item == "不"]
    for i in idx:
        val = ord(data[i + 1])
        if (val > 45795 and val < 46384) or (val > 51087 and val < 51676):
            data = data.replace("不", "부", 1)
        else:
            data = data.replace("不", "불", 1)
    # FKR172: Chinese characters 列(렬)의 발음 처리
    # FKR173: Chinese characters 烈(렬)의 발음 처리
    # FKR174: Chinese characters 裂(렬)의 발음 처리
    # FKR175: Chinese characters 劣(렬)의 발음 처리
    # FKR176: Chinese characters 律(률)의 발음 처리
    # FKR177: Chinese characters 率(률)의 발음 처리
    # FKR178: Chinese characters 慄(률)의 발음 처리
    # FKR179: Chinese characters 栗(률)의 발음 처리
    for char in KCONF["fkr172-179"]:
        idx = [i for i, item in enumerate(data) if item == char]
        for i in idx:
            val = ord(data[i + 1])
            coda_value = (val - CP_MIN) % 28
            if coda_value == 1 or coda_value == 4 or val < 100:  # TODO verify
                data = data.replace(char, "열", 1)
            else:
                data = data.replace(char, "렬", 1)

    # FKR180: Katakana
    _fkr_log(180)
    data = _replace_map(data, KCONF["fkr180"])

    return re.sub(r"\s{2,}", " ", data.strip())


def _replace_map(src, rmap, *args, **kw):
    """ Replace occurrences in a string according to a map. """
    for k, v in rmap.items():
        src = src.replace(k, v, *args, **kw)

    return src


def _kor_fname_rom(fname):
    rom_ls = []
    cpoints = tuple(ord(c) for c in fname)
    for i in range(len(fname)):
        cp = cpoints[i] - CP_MIN
        ini = "i" + str(cp // 588)
        med = "m" + str((cp // 28) % 21)
        fin = "f" + str(cp % 28)
        rom_ls.append("#".join((ini, med, fin)))
    rom = "~".join(rom_ls) + "E"

    # FKR011: Check native Korean name, by coda
    origin_by_fin = "sino"
    for tok in KCONF["fkr011"]["nat_fin"]:
        if tok in rom:
            origin_by_fin = "native"
            break

    j = False
    for tok in KCONF["fkr011"]["nat_ini"]:
        if tok in rom:
            j = True

    k = False
    for tok in KCONF["fkr011"]["sino_ini"]:
        if tok in rom:
            k = True

    if j:
        if k:
            origin_by_ini = "sino"
        else:
            origin_by_ini = "native"
    else:
        origin_by_ini = "sino"

    # FKR012: Check native Korean name, by vowel & coda
    origin_by_med = "sino"
    for tok in KCONF["fkr011"]:
        if tok in rom:
            origin_by_med = "native"
            break

    # FKR013: Check native Korean name, by ㅢ
    if "m19#" in rom:
        if "의" in fname or "희" in fname:
            origin_by_med = "sino"
        else:
            origin_by_med = "native"

    # FKR014: Consonant assimilation ㄱ
    # FKR015: Consonant assimilation ㄲ
    # FKR016: Consonant assimilation ㄴ
    # FKR017: Consonant assimilation ㄷ
    # FKR018: Consonant assimilation ㄹ
    # FKR019: Consonant assimilation ㅁ
    # FKR020: Consonant assimilation ㅂ
    # FKR021: Consonant assimilation ㅅ
    # FKR022: Consonant assimilation ㅆ
    # FKR023: Consonant assimilation ㅇ
    # FKR024: Consonant assimilation ㅈ
    # FKR025: Consonant assimilation ㅊ
    # FKR026: Consonant assimilation ㅎ
    # FKR027: Final sound law
    # FKR028: Vocalization 1 (except ㄹ+ㄷ, ㄹ+ㅈ): voiced+unvoiced
    # FKR029: Vocalization 2 unvoiced+voiced
    for i in range(14, 30):
        _fkr_log(i)
        rom = _replace_map(rom, KCONF[f"fkr{i:03}"])

    # FKR030: Convert everything else
    _fkr_log(30)
    for k, cmap in KCONF["fkr030"].items():
        logger.debug(f"Applying FKR030[\"{k}\"]")
        rom = _replace_map(rom, cmap)

    rom = _replace_map(rom.replace("#", ""), {"swi": "shwi", "Swi": "Shwi"}, 1)

    if len(fname) == 2:
        rom = rom.replace("~", "-")
    else:
        rom = _replace_map(rom, {"n~g": "n'g", "~": ""})

    # FKR031: ㄹ + vowels/ㅎ/ㄹ ["l-r","l-l"] does not work USE alternative
    _fkr_log(31)
    for k, cmap in KCONF["fkr031"].items():
        logger.debug(f"Applying FKR031[\"{k}\"]")
        rom = _replace_map(rom, cmap)

    # FKR032: Capitalization
    rom = rom[0].upper() + rom[1:]

    # FKR033: Remove hyphen in bisyllabic native Korean first name
    if (
            len(fname) == 2
            and "native" in (origin_by_ini, origin_by_fin, origin_by_med)):
        rom = _replace_map(rom, {"n-g": "n'g", "-": ""})

    # FKR034: First name, initial sound law
        for k, v in KCONF["fkr034"].items():
            if rom.startswith(k):
                rom = rom.replace(k, v)

    return rom


def _kor_lname_rom(lname):
    if len(lname) == 2:
        # FKR181: 2-charater names.
        _fkr_log(181)
        rom = _replace_map(lname, KCONF["fkr181"])
    else:
        # FKR182: 1-charater Chinese names.
        _fkr_log(182)
        lname = _replace_map(lname, KCONF["fkr182"])
        # FKR183: 1-charater names.
        _fkr_log(183)
        rom = _replace_map(lname, KCONF["fkr183"])

    return rom if lname != rom else False


def _capitalize(src):
    """ Only capitalize first word and words preceded by space."""
    orig_ls = src.split(" ")
    cap_ls = [orig[0].upper() + orig[1:] for orig in orig_ls]

    return " ".join(cap_ls)


def _fkr_log(fkr_i):
    fkr_k = f"FKR{fkr_i:03}"
    logger.debug(f"Applying {fkr_k}: {FKR_IDX[fkr_k]}")