6 月之前 · 5eea9a9571
--- a/scriptshifter/trans.py
+++ b/scriptshifter/trans.py
@@ -2,10 +2,11 @@ import logging
 
				 
			
 
				 from importlib import import_module
			
 
				 from re import Pattern, compile
			
 
				+from unicodedata import normalize as precomp_normalize
			
 
				 
			
 
				 from scriptshifter.exceptions import BREAK, CONT
			
 
				 from scriptshifter.tables import (
			
 
				-        BOW, EOW, WORD_BOUNDARY, FEAT_CASEI, FEAT_R2S, FEAT_S2R, HOOK_PKG_PATH,
			
 
				+        BOW, EOW, WORD_BOUNDARY, FEAT_R2S, FEAT_S2R, HOOK_PKG_PATH,
			
 
				         get_connection, get_lang_dcap, get_lang_general, get_lang_hooks,
			
 
				         get_lang_ignore, get_lang_map, get_lang_normalize)
			
 
				 
			
@@ -345,6 +346,17 @@ def _normalize_src(ctx, norm_rules):
 
				     NOTE: this manipluates the protected source attribute so it may not
			
 
				     correspond to the originally provided source.
			
 
				     """
			
 
				+    # Normalize precomposed Unicode characters.
			
 
				+    #
			
 
				+    # In using diacritics, LC standards prefer the decomposed form (combining
			
 
				+    # diacritic + base character) to the pre-composed form (single Unicode
			
 
				+    # symbol for the letter with diacritic).
			
 
				+    #
			
 
				+    # Note: only safe for R2S.
			
 
				+    if ctx.t_dir == FEAT_R2S:
			
 
				+        logger.debug("Normalizing pre-composed symbols.")
			
 
				+        ctx._src = precomp_normalize("NFD", ctx.src)
			
 
				+
			
 
				     for nk, nv in norm_rules.items():
			
 
				         ctx._src = ctx.src.replace(nk, nv)