Pārlūkot izejas kodu

Options (#51)

* Add basic facility for extra options in GUI and API.

* Add MARC field option for Korean names.
Stefano Cossu 1 gadu atpakaļ
vecāks
revīzija
a391ab3d1b

+ 23 - 12
scriptshifter/hooks/korean/romanizer.py

@@ -57,16 +57,16 @@ def s2r_names_post_config(ctx):
     One or more names can be transcribed. A comma or middle dot (U+00B7) is
     to be used as separator for multiple names.
     """
-    ctx.dest, ctx.warnings = _romanize_names(ctx.src)
+    ctx.dest, ctx.warnings = _romanize_names(ctx.src, ctx.options)
 
     return BREAK
 
 
-def _romanize_nonames(src, capitalize="first", hancha=True):
+def _romanize_nonames(src, options):
     """ Main Romanization function for non-name strings. """
 
     # FKR038: Convert Chinese characters to Hangul
-    if hancha:
+    if options.get("hancha", True):
         kor = _hancha2hangul(_marc8_hancha(src))
     else:
         kor = src
@@ -92,10 +92,10 @@ def _romanize_nonames(src, capitalize="first", hancha=True):
 
     logger.debug(f"Before capitalization: {rom}")
     # FKR042: Capitalize all first letters
-    if capitalize == "all":
+    if options["capitalize"] == "all":
         rom = _capitalize(rom)
     # FKR043: Capitalize the first letter
-    elif capitalize == "first":
+    elif options["capitalize"] == "first":
         rom = rom[0].upper() + rom[1:]
 
     # FKR044: Ambiguities
@@ -115,7 +115,7 @@ def _romanize_nonames(src, capitalize="first", hancha=True):
     return rom, warnings
 
 
-def _romanize_names(src):
+def _romanize_names(src, options):
     """
     Main Romanization function for names.
 
@@ -135,14 +135,15 @@ def _romanize_names(src):
     kor_ls = src.split(",") if "," in src else src.split("·")
 
     for kor in kor_ls:
-        rom, _warnings = _romanize_name(kor.strip())
+        rom, _warnings = _romanize_name(kor.strip(), options)
         rom_ls.append(rom)
+
         warnings.extend(_warnings)
 
     return ", ".join(rom_ls), warnings
 
 
-def _romanize_name(src):
+def _romanize_name(src, options):
     warnings = []
 
     # FKR001: Conversion, Family names in Chinese (dealing with 金 and 李)
@@ -162,7 +163,7 @@ def _romanize_name(src):
 
     if re.search("[a-zA-Z0-9]", src):
         warnings.append(f"{src} is not a recognized personal name.")
-        return None, warnings
+        return "", warnings
 
     # `parsed` can either be a modified Korean string with markers, or in case
     # of a foreign name, the final romanized name.
@@ -180,11 +181,21 @@ def _romanize_name(src):
 
             if not any(lname_rom_ls):
                 warnings.append(f"{parsed} is not a recognized Korean name.")
-                return None, warnings
+                return "", warnings
 
             lname_rom = " ".join(lname_rom_ls)
 
-            rom = f"{lname_rom} {fname_rom}"
+            # Add comma after the last name for certain MARC fields.
+            marc_field_str = options.get("marc_field", "0")
+            try:
+                marc_field = int(marc_field_str)
+            except TypeError:
+                raise ValueError(
+                        f"{marc_field_str} is not a valid MARC field code.")
+            if marc_field in (100, 600, 700, 800):
+                rom = f"{lname_rom}, {fname_rom}"
+            else:
+                rom = f"{lname_rom} {fname_rom}"
 
             if False:
                 # TODO add option for authoritative name.
@@ -198,7 +209,7 @@ def _romanize_name(src):
             return parsed, warnings
 
     warnings.append(f"{src} is not a recognized Korean name.")
-    return None, warnings
+    return "", warnings
 
 
 def _parse_kor_name(src):

+ 20 - 10
scriptshifter/rest_api.py

@@ -2,9 +2,10 @@ import logging
 
 from base64 import b64encode
 from copy import deepcopy
+from json import loads
 from os import environ, urandom
 
-from flask import Flask, Response, jsonify, render_template, request
+from flask import Flask, jsonify, render_template, request
 
 from scriptshifter.tables import list_tables, load_table
 from scriptshifter.trans import transliterate
@@ -60,23 +61,32 @@ def dump_table(lang):
     return jsonify(tbl)
 
 
-@app.route("/transliterate", methods=["POST"])
-def transliterate_form():
-    """ UI version of the `trans` endpoint. Passes everything via form. """
-    return transliterate_req(
-            request.form["lang"], request.form.get("r2s", False))
+@app.route("/options/<lang>", methods=["GET"])
+def get_options(lang):
+    """
+    Get extra options for a table.
+    """
+    tbl = load_table(lang)
 
+    return jsonify(tbl.get("options", []))
 
-@app.route("/trans/<lang>/r2s", methods=["POST"], defaults={"r2s": True})
-@app.route("/trans/<lang>", methods=["POST"])
-def transliterate_req(lang, r2s=False):
+
+@app.route("/trans", methods=["POST"])
+def transliterate_req():
+    lang = request.form["lang"]
     in_txt = request.form["text"]
     capitalize = request.form.get("capitalize", False)
+    t_dir = request.form.get("t_dir", "s2r")
+    if t_dir not in ("s2r", "r2s"):
+        return f"Invalid direction: {t_dir}", 400
+
     if not len(in_txt):
         return ("No input text provided! ", 400)
+    options = loads(request.form.get("options", {}))
+    logger.debug(f"Extra options: {options}")
 
     try:
-        out, warnings = transliterate(in_txt, lang, r2s, capitalize)
+        out, warnings = transliterate(in_txt, lang, t_dir, options, capitalize)
     except (NotImplementedError, ValueError) as e:
         return (str(e), 400)
 

+ 7 - 0
scriptshifter/tables/data/korean_names.yml

@@ -2,6 +2,13 @@ general:
   name: Korean (Names)
   description: Korean names S2R.
 
+options:
+  - id: marc_field
+    label: MARC field
+    description: Romanize according to a specific MARC field format. Enter 0 if not applicable.
+    type: int
+    default: 0
+
 script_to_roman:
   hooks:
     post_config:

+ 1 - 1
scriptshifter/tables/data/korean_nonames.yml

@@ -1,5 +1,5 @@
 general:
-  name: Korean (Nnon-names)
+  name: Korean (Non-names)
   description: Korean S2R.
 
 script_to_roman:

+ 64 - 16
scriptshifter/templates/index.html

@@ -27,12 +27,18 @@
             display: none;
         }
 
+        p.input_descr {
+            font-size: 80%;
+            font-style: italic;
+            margin-bottom: .5rem;
+        }
+
     </style>
 
 
 
 
-    <form id="transliterate" action="/transliterate" method="POST">
+    <form id="transliterate" action="/trans" method="POST">
         <fieldset>
             <label for="text">Input text</label>
             <textarea id="text" name="text"></textarea>
@@ -42,9 +48,19 @@
                     <option value="{{ k }}">{{ v["name"] }}</option>
                 {% endfor %}
             </select>
+        </fieldset>
+        <fieldset>
+            <legend>Direction</legend>
             <div>
-                <label class="label-inline" for="r2s">Roman to Script</label>
-                <input type="checkbox" id="r2s" name="r2s">
+                <label class="label-inline" for="s2r">Script to Roman</label>
+                <input
+                        type="radio" id="opt_s2r" name="t_dir" value="s2r"
+                        checked>
+            </div>
+            <div>
+                <label class="label-inline" for="r2s">Roman to script</label>
+                <input
+                        type="radio" id="opt_r2s" name="t_dir" value="r2s">
             </div>
         </fieldset>
         <fieldset>
@@ -64,6 +80,7 @@
                 <input type="radio" id="all" name="capitalize" value="all">
             </div>
         </fieldset>
+        <div id="options"></div>
         <fieldset>
             <input class="button-primary" type="submit" value="Transliterate!">
         </fieldset>
@@ -77,26 +94,63 @@
     </div>
 
     <script type="text/javascript">
-        document.getElementById('transliterate').addEventListener('submit',(event)=>{
+        document.getElementById('lang').addEventListener('change',(event)=>{
+            let lang = document.getElementById("lang").value;
+
+            fetch('/options/' + lang)
+              .then(response=>response.json())
+                .then((data) => {
+                    document.getElementById("options").replaceChildren();
+                    data.forEach((opt)=>{
+                        let fset = document.createElement("fieldset");
+                        let label = document.createElement("label");
+                        label.setAttribute("for", opt.id);
+                        label.append(opt.label);
+
+                        let input = document.createElement("input");
+                        input.setAttribute("id", opt.id);
+                        input.setAttribute("name", opt.id);
+                        input.classList.add("option_i");
+                        input.value = opt.default;
+
+                        let descr = document.createElement("p");
+                        descr.setAttribute("class", "input_descr");
+                        descr.append(opt.description);
+
+                        fset.append(label, descr, input);
+                        document.getElementById("options").append(fset);
+                    });
+                });
+
+            event.preventDefault();
+            return false;
+        })
+        document.getElementById('lang').dispatchEvent(new Event('change'));
 
 
+        document.getElementById('transliterate').addEventListener('submit',(event)=>{
 
             const data = new URLSearchParams();
 
+            let t_dir = Array.from(document.getElementsByName("t_dir")).find(r => r.checked).value;
+
             let capitalize = Array.from(document.getElementsByName("capitalize")).find(r => r.checked).value;
 
 
             data.append('text',document.getElementById('text').value)
             data.append('lang',document.getElementById('lang').value)
-
-            if (document.getElementById('r2s').checked){
-                data.append('r2s','on')
-            }
-
+            data.append('t_dir',t_dir)
             data.append('capitalize',capitalize)
 
+            let options = {};
+            let option_inputs = document.getElementsByClassName("option_i");
+            for (i = 0; i < option_inputs.length; i++) {
+                let el = option_inputs[i];
+                options[el.getAttribute('id')] = el.value;
+            };
+            data.append('options', JSON.stringify(options));
 
-            fetch('/transliterate', {
+            fetch('/trans', {
                 method: 'post',
                 body: data,
             })
@@ -117,12 +171,6 @@
               alert("Error:\n" + error)
             });
 
-
-
-
-
-
-
             event.preventDefault()
             return false
 

+ 19 - 7
scriptshifter/trans.py

@@ -36,7 +36,7 @@ class Context:
         self.dest_ls = []
 
 
-def transliterate(src, lang, r2s=False, capitalize=False):
+def transliterate(src, lang, t_dir="s2r", options={}, capitalize=False):
     """
     Transliterate a single string.
 
@@ -45,6 +45,15 @@ def transliterate(src, lang, r2s=False, capitalize=False):
 
         lang (str): Language name.
 
+        t_dir (str): Transliteration direction. Either `s2r` for
+            script-to-Roman (default) or `r2s`  for Roman-to-script.
+
+        capitalize: capitalize words: one of `False` (no change - default),
+            `"first"` (only first letter), or `"all"` (first letter of each
+            word).
+
+        options: extra script-dependent options. Defaults to the empty map.
+
     Keyword args:
         r2s (bool): If False (the default), the source is considered to be a
         non-latin script in the language and script specified, and the output
@@ -54,8 +63,8 @@ def transliterate(src, lang, r2s=False, capitalize=False):
     Return:
         str: The transliterated string.
     """
-    source_str = "Latin" if r2s else lang
-    target_str = lang if r2s else "Latin"
+    source_str = "Latin" if t_dir == "r2s" else lang
+    target_str = lang if t_dir == "r2s" else "Latin"
     logger.info(f"Transliteration is from {source_str} to {target_str}.")
 
     cfg = load_table(lang)
@@ -64,21 +73,24 @@ def transliterate(src, lang, r2s=False, capitalize=False):
     # General directives.
     general = cfg.get("general", {})
 
-    if not r2s and "script_to_roman" not in cfg:
+    if t_dir == "s2r" and "script_to_roman" not in cfg:
         raise NotImplementedError(
             f"Script-to-Roman transliteration not yet supported for {lang}."
         )
-    elif r2s and "roman_to_script" not in cfg:
+    elif t_dir == "r2s" and "roman_to_script" not in cfg:
         raise NotImplementedError(
             f"Roman-to-script transliteration not yet supported for {lang}."
         )
 
-    langsec = cfg["script_to_roman"] if not r2s else cfg["roman_to_script"]
+    langsec = (
+            cfg["script_to_roman"] if t_dir == "s2r"
+            else cfg["roman_to_script"])
     # langsec_dir = langsec.get("directives", {})
     langsec_hooks = langsec.get("hooks", {})
 
     src = src.strip()
-    ctx = Context(src, general, langsec, {"capitalize": capitalize})
+    options["capitalize"] = capitalize
+    ctx = Context(src, general, langsec, options)
 
     # This hook may take over the whole transliteration process or delegate it
     # to some external process, and return the output string directly.