Przeglądaj źródła

Merge pull request #188 from lcnetdev/capitalization

Capitalization
Stefano Cossu 2 miesięcy temu
rodzic
commit
3135e24997

+ 9 - 2
README.md

@@ -114,8 +114,9 @@ the drop-down automatically. The value must be one of the keys found in
 ## Command-line interface
 
 Various Scriptshifter commands can be accessed via the shell command `sscli`.
-At the moment only a few essential admin and testing tools are available. More
-commands can be made avaliable on an as-needed basis.
+At the moment a few essential admin and testing tools are available, as well as
+a transliteration function. More commands can be made available on an as-needed
+basis.
 
 Help menu:
 
@@ -129,6 +130,12 @@ Section help:
 /path/to/sscli admin --help
 ```
 
+Transliteration:
+
+```
+echo "王正强" | /path/to/sscli trans chinese -c first -o "marc_field=100"
+```
+
 
 ## Contributing
 

+ 34 - 0
scriptshifter/hooks/general/__init__.py

@@ -27,6 +27,25 @@ NORM8_RE = compile(r"([.,;:\(\[\{\)\]}])\s+([.,;:\(\[\{\)\]}])")
 logger = getLogger(__name__)
 
 
+def capitalize_pre_assembly(ctx):
+    """
+    Capitalize a not-yet-assembled result list according to user options.
+    """
+    ctx.dest_ls = _capitalize(ctx.dest_ls, ctx.options.get("capitalize"))
+
+
+def capitalize_post_assembly(ctx):
+    """
+    Capitalize an already assembled result string according to user options.
+    """
+    dest_ls = ctx.dest.split(" ")
+
+    dest_ls = _capitalize(dest_ls, ctx.options.get("capitalize"))
+
+    return " ".join(dest_ls)
+
+
+
 def normalize_spacing_post_assembly(ctx):
     """
     Remove duplicate and unwanted whitespace around punctuation.
@@ -53,3 +72,18 @@ def normalize_spacing_post_assembly(ctx):
     # norm = NORM8_RE.sub(r"\1\2", norm)
 
     return norm
+
+
+def _capitalize(src, which):
+    """
+    Only capitalize first word and words preceded by space.
+
+    NOTE: this function is only used for capitalizing hook-generated
+    transliterations, which are not normally processed. Double cap rules are
+    not applicable here.
+    """
+    if which == "first":
+        ctx.dest_ls[0] = ctx.dest_ls[0].upper()
+
+    elif which == "all":
+        ctx.dest_ls = [tk[0].upper() + tk[1:] for tk in ctx.dest_ls]

+ 1 - 9
scriptshifter/hooks/yiddish_/__init__.py

@@ -16,24 +16,16 @@ external package name.
 from yiddish import detransliterate, transliterate
 
 from scriptshifter.exceptions import BREAK
-from scriptshifter.tools import capitalize
 
 
 def s2r_post_config(ctx):
     """
     Script to Roman.
     """
-    rom = transliterate(
+    ctx.dest = transliterate(
             ctx.src, loc=True,
             loshn_koydesh=ctx.options.get("loshn_koydesh"))
 
-    if ctx.options["capitalize"] == "all":
-        rom = capitalize(rom)
-    elif ctx.options["capitalize"] == "first":
-        rom = rom[0].upper() + rom[1:]
-
-    ctx.dest = rom
-
     return BREAK
 
 

+ 0 - 9
scriptshifter/tools.py

@@ -1,9 +0,0 @@
-__doc__ = """ Common tools for core and hooks. """
-
-
-def capitalize(src):
-    """ Only capitalize first word and words preceded by space."""
-    orig_ls = src.split(" ")
-    cap_ls = [orig[0].upper() + orig[1:] for orig in orig_ls]
-
-    return " ".join(cap_ls)

+ 5 - 2
scriptshifter/trans.py

@@ -112,7 +112,7 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
             )
 
         # Normalize case before post_config and rule-based normalization.
-        if not ctx.general["case_sensitive"]:
+        if t_dir == FEAT_R2S and not ctx.general["case_sensitive"]:
             ctx._src = ctx.src.lower()
 
         # This hook may take over the whole transliteration process or delegate
@@ -270,7 +270,10 @@ def transliterate(src, lang, t_dir="s2r", capitalize=False, options={}):
                     # A match is found. Stop scanning tokens, append result,
                     # and proceed scanning the source.
 
-                    # Capitalization.
+                    # Capitalization. This applies double capitalization
+                    # rules. The external function in
+                    # scriptshifter.tools.capitalize used for non-table
+                    # languages does not.
                     if (
                         (ctx.options["capitalize"] == "first" and ctx.cur == 0)
                         or

+ 4 - 11
sscli

@@ -65,15 +65,9 @@ def samples(lang):
     return test_sample(lang)
 
 
-@cli.group(name="trans")
-def trans_grp():
-    """ Transliteration and transcription operations. """
-    pass
-
-
-@trans_grp.command()
-@click.argument("src", type=click.File("r"))
+@cli.command(name="trans")
 @click.argument("lang")
+@click.argument("src", type=click.File("r"), default="-")
 @click.option(
         "-c", "--capitalize", default=None,
         help="Capitalize output: `first`, `all`, ot none (the default).")
@@ -85,12 +79,11 @@ def trans_grp():
         help=(
             "Language=specific option. Format: key=value. Multiple -o entries "
             "are possible."))
-def transliterate(src, lang, t_dir, capitalize, option):
+def trans_(src, lang, t_dir, capitalize, option):
     """
     Transliterate text from standard input.
 
-    e.g.: `echo "王正强" | /path/to/sscli trans transliterate chinese
-    -o "marc_field=100"'
+    e.g.: `echo "王正强" | /path/to/sscli trans chinese -o "marc_field=100"'
     """
     options = {}
     for opt in option: