Переглянути джерело

Test (#101)

* Yiddish transliteration via submodules.

* Update checkout workflow.

* Change refs for Yiddish submodules.

* Fix WORKDIR in Dockerfile

* Do not remove yiddish module.

* Manually add yiddish submodules.

* Use git clone instead of submodule.

* Move ext checkout to github actions.

* Chinese numerals (#97)

* WIP Parse Chinese numerals.

* WIP complete number parsing.

* Complete Chinese numerals:

* Use standard table override instead of pre-config hooks.
* Add few test strings.

* Complete numerals:

* Transliterate all numeric examples correctly
* Modify hook return logic for consistency
* WIP partial spacing fix.

* Some cleanup; upgrade docker OS.

* Add dependency for uwsgi.

* Squashed commit of the following: (#98)

commit 30859a52b9cc325c323b414133856d0af3ffc2a6
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 22:17:36 2024 -0500

    Move ext checkout to github actions.

commit 6d8da6df68ac764f90deb15861089095043fd4ba
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 21:45:01 2024 -0500

    Use git clone instead of submodule.

commit ade9da589179870d331b703ff526d7fff33e88bb
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 21:42:45 2024 -0500

    Manually add yiddish submodules.

commit 77cb9ef2959f611d0220cc405e0b584ece71147c
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 21:23:37 2024 -0500

    Do not remove yiddish module.

commit e405b3605dd2629ed5557ccc5fdd5fe8812799ed
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 09:11:41 2024 -0500

    Fix WORKDIR in Dockerfile

commit 95445ba642163e28b94df6736ad6946ad7dc76c0
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 09:07:50 2024 -0500

    Change refs for Yiddish submodules.

commit 208ea095e792195981f644497ccd5fcd55e15c1b
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 08:45:58 2024 -0500

    Update checkout workflow.

* Add debug output to /trans response.

* Split docker files and requirements.

* Add bad request debug handler.

* Add bad request debug handler.

* Adjust CI workflows.

* Fix image name typo.

* Refine triggers.

* Fix typo on test workflow trigger.

* Use JSON in POST body.

* Also use JSON in feedback request; update docs.

* Return json data in 400 debug.
Stefano Cossu 1 рік тому
батько
коміт
fa5b48dba6

+ 13 - 4
.github/workflows/push-docker-image.yml → .github/workflows/push-app-image.yml

@@ -1,8 +1,15 @@
-name: Push image to Docker Hub.
+name: Push app image
 on:
+  # This runs on v *.*.0 after the base image has been
+  # built and pushed, or on patch version tag.
   push:
     tags:
-      - "v*.*.*"
+      - "v*.*.[1-9]*"
+  workflow_run:
+    workflows: 
+      - "Push base image"
+    types:
+      - "completed"
 
 env:
   DOCKER_USER: lcnetdev
@@ -13,13 +20,15 @@ jobs:
   push-image-to-docker-hub:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - name: checkout repo
+        uses: actions/checkout@v4
         with:
           submodules: recursive
 
       - name: Build the Docker image
         run: >
-          docker build . --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }}
+          docker build -f Dockerfile .
+          --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }}
           --tag $DOCKER_USER/$REPO_NAME:latest
 
       - name: Login to Docker Hub

+ 46 - 0
.github/workflows/push-base-image.yml

@@ -0,0 +1,46 @@
+name: Push base image
+on:
+  push:
+    tags:
+      - "v*.*.0"
+
+env:
+  DOCKER_USER: lcnetdev
+  DOCKER_PASSWORD: ${{secrets.DOCKER_HUB}}
+  REPO_NAME: scriptshifter-base
+
+jobs:
+  push-image-to-docker-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: checkout yiddish submodules (1/2)
+        uses: actions/checkout@v4
+        with:
+          repository: ibleaman/loshn-koydesh-pronunciation
+          path: ext/yiddish/yiddish/submodules/loshn-koydesh-pronunciation
+
+      - name: checkout yiddish submodules (2/2)
+        uses: actions/checkout@v4
+        with:
+          repository: ibleaman/hasidify_lexicon
+          path: ext/yiddish/yiddish/submodules/hasidify_lexicon
+
+      - name: Build the Docker image
+        run: >
+          docker build -f scriptshifter_base.Dockerfile .
+          --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }}
+          --tag $DOCKER_USER/$REPO_NAME:latest
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: lcnetdev
+          password: ${{ secrets.DOCKER_HUB }}
+
+      - name: Push to Docker Hub
+        run: docker push $DOCKER_USER/$REPO_NAME --all-tags

+ 9 - 5
.github/workflows/push-test-image.yml

@@ -1,8 +1,8 @@
-name: Push test image to Docker Hub.
+name: Push test image
 on:
   push:
-    branch:
-      - "main"
+    branches:
+      - "test"
 
 env:
   DOCKER_USER: lcnetdev
@@ -13,12 +13,16 @@ jobs:
   push-image-to-docker-hub:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - name: checkout repo
+        uses: actions/checkout@v4
         with:
           submodules: recursive
 
       - name: Build the Docker image
-        run: docker build . --tag $DOCKER_USER/$REPO_NAME:test
+        run: >
+          docker build -f Dockerfile .
+          --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }}
+          --tag $DOCKER_USER/$REPO_NAME:test
 
       - name: Login to Docker Hub
         uses: docker/login-action@v3

+ 4 - 0
.gitmodules

@@ -1,3 +1,7 @@
 [submodule "ext/arabic_rom"]
 	path = ext/arabic_rom
 	url = https://github.com/fadhleryani/Arabic_ALA-LC_Romanization.git
+[submodule "ext/yiddish"]
+	path = ext/yiddish
+	url = https://github.com/scossu/yiddish.git
+	branch = loc

+ 7 - 21
Dockerfile

@@ -1,29 +1,15 @@
-FROM python:3.10-slim-bookworm
-
-RUN apt update
-RUN apt install -y build-essential tzdata gfortran libopenblas-dev libboost-all-dev libpcre2-dev
-
-ENV TZ=America/New_York
-ENV _workroot "/usr/local/scriptshifter/src"
-
-WORKDIR ${_workroot}
-COPY requirements.txt ./
-RUN pip install --no-cache-dir -r requirements.txt
-
-# Remove development packages.
-RUN apt remove -y build-essential
-RUN apt autoremove -y
-
-RUN addgroup --system www
-RUN adduser --system www
-RUN gpasswd -a www www
+FROM lcnetdev/scriptshifter-base:latest
+ARG WORKROOT "/usr/local/scriptshifter/src"
 
+# Copy core application files.
+WORKDIR ${WORKROOT}
 COPY entrypoint.sh uwsgi.ini wsgi.py ./
-COPY ext ./ext/
 COPY scriptshifter ./scriptshifter/
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
 
 RUN chmod +x ./entrypoint.sh
-RUN chown -R www:www ${_workroot} .
+#RUN chown -R www:www ${WORKROOT} .
 
 EXPOSE 8000
 

+ 7 - 0
deps.txt

@@ -0,0 +1,7 @@
+# External dependencies.
+aksharamukha>=2.1,<3
+camel-tools>=1.5
+funcy>=1.15,<2
+pymarc>=4.0,<5
+repackage>=0.7.3
+./ext/yiddish

+ 23 - 0
doc/rest_api.md

@@ -69,6 +69,10 @@ Transliterate an input string into a given language.
 
 ### POST body
 
+MIME type: `application/json`
+
+Content: JSON object with the following keys:
+
 - `lang`: Language code as given by the `/languages` endpoint. 
 - `text`: Input text to be transliterated.
 - `capitalize`: One of `first` (capitalize the first letter of the input),
@@ -92,3 +96,22 @@ Content: JSON object containing two keys: `ouput` containing the transliterated
 string; and `warnings` containing a list of warnings. Characters not found in
 the mapping are copied verbatim in the transliterated string (see
 "Configuration files" section for more information).
+
+## `POST /feedback`
+
+Send a feedback form about a transliteration result.
+
+### POST body
+
+MIME type: `application/json`
+
+Content: JSON object with the following keys:
+
+    `lang`: language of the transliteration. Mandatory.
+    `src`: source text. Mandatory.
+    `t_dir`: transliteration direction. If omitted, it defaults to `s2r`.
+    `result`: result of the transliteration. Mandatory.
+    `expected`: expected result. Mandatory.
+    `options`: options passed to the request, if any.
+    `notes`: optional user notes.
+    `contact`: contact email for feedback. Optional.

+ 1 - 0
ext/yiddish

@@ -0,0 +1 @@
+Subproject commit 9bf22c55ca76710940e141de5d88922a9f55ed1f

+ 1 - 5
requirements.txt

@@ -1,9 +1,5 @@
-aksharamukha>=2.1,<3
-camel-tools>=1.5
+# Core application dependencies.
 flask>=2.3,<3
-funcy>=1.15,<2
-pymarc>=4.0,<5
 python-dotenv>=1.0,<2
 pyyaml>=6.0,<7
-repackage>=0.7.3
 uwsgi>=2.0,<2.1

+ 51 - 0
scriptshifter/hooks/yiddish_/__init__.py

@@ -0,0 +1,51 @@
+# @package ext
+
+__doc__ = """
+Yiddish transliteration module.
+
+Courtesy of Isaac Bleaman and Asher Lewis.
+
+https://github.com/ibleaman/yiddish.git
+
+Note the underscore in the module name to disambiguate with the `yiddish`
+external package name.
+"""
+
+
+from yiddish import detransliterate, transliterate
+
+from scriptshifter.exceptions import BREAK
+from scriptshifter.tools import capitalize
+
+
+def s2r_post_config(ctx):
+    """
+    Script to Roman.
+    """
+
+    rom = transliterate(
+            ctx.src, loc=True,
+            loshn_koydesh=ctx.options.get("loshn_koydesh"))
+
+    if ctx.options["capitalize"] == "all":
+        rom = capitalize(rom)
+    elif ctx.options["capitalize"] == "first":
+        rom = rom[0].upper() + rom[1:]
+
+    ctx.dest = rom
+
+    return BREAK
+
+
+def r2s_post_config(ctx):
+    """
+    Roman to script.
+
+    NOTE: This doesn't support the `loc` option.
+    """
+
+    ctx.dest = detransliterate(
+            ctx.src,
+            loshn_koydesh=ctx.options.get("loshn_koydesh"))
+
+    return BREAK

+ 29 - 19
scriptshifter/rest_api.py

@@ -3,11 +3,12 @@ import logging
 from base64 import b64encode
 from copy import deepcopy
 from email.message import EmailMessage
-from json import dumps, loads
+from json import dumps
 from os import environ, urandom
 from smtplib import SMTP
 
 from flask import Flask, jsonify, render_template, request
+from werkzeug.exceptions import BadRequest
 
 from scriptshifter import EMAIL_FROM, EMAIL_TO, SMTP_HOST, SMTP_PORT
 from scriptshifter.exceptions import ApiError
@@ -46,6 +47,20 @@ def handle_exception(e: ApiError):
     }, e.status_code)
 
 
+@app.errorhandler(BadRequest)
+def handle_400(e):
+    if logging.DEBUG >= logging.root.level:
+        body = {
+            "debug": {
+                "form_data": request.json or request.form,
+            }
+        }
+    else:
+        body = ""
+
+    return body, 400
+
+
 @app.route("/", methods=["GET"])
 def index():
     return render_template(
@@ -91,16 +106,16 @@ def get_options(lang):
 
 @app.route("/trans", methods=["POST"])
 def transliterate_req():
-    lang = request.form["lang"]
-    in_txt = request.form["text"]
-    capitalize = request.form.get("capitalize", False)
-    t_dir = request.form.get("t_dir", "s2r")
+    lang = request.json["lang"]
+    in_txt = request.json["text"]
+    capitalize = request.json.get("capitalize", False)
+    t_dir = request.json.get("t_dir", "s2r")
     if t_dir not in ("s2r", "r2s"):
         return f"Invalid direction: {t_dir}", 400
 
     if not len(in_txt):
         return ("No input text provided! ", 400)
-    options = loads(request.form.get("options", "{}"))
+    options = request.json.get("options", {})
     logger.debug(f"Extra options: {options}")
 
     try:
@@ -116,14 +131,9 @@ def feedback():
     """
     Allows users to provide feedback to improve a specific result.
     """
-    lang = request.form["lang"]
-    src = request.form["src"]
-    t_dir = request.form.get("t_dir", "s2r")
-    result = request.form["result"]
-    expected = request.form["expected"]
-    options = request.form.get("options", {})
-    notes = request.form.get("notes")
-    contact = request.form.get("contact")
+    t_dir = request.json.get("t_dir", "s2r")
+    options = request.json.get("options", {})
+    contact = request.json.get("contact")
 
     msg = EmailMessage()
     msg["subject"] = "Scriptshifter feedback report"
@@ -133,16 +143,16 @@ def feedback():
         msg["cc"] = contact
     msg.set_content(f"""
         *Scriptshifter feedback report from {contact or 'anonymous'}*\n\n
-        *Language:* {lang}\n
+        *Language:* {request.json['lang']}\n
         *Direction:* {
                     'Roman to Script' if t_dir == 'r2s'
                     else 'Script to Roman'}\n
-        *Source:* {src}\n
-        *Result:* {result}\n
-        *Expected result:* {expected}\n
+        *Source:* {request.json['src']}\n
+        *Result:* {request.json['result']}\n
+        *Expected result:* {request.json['expected']}\n
         *Applied options:* {dumps(options)}\n
         *Notes:*\n
-        {notes}""")
+        {request.json['notes']}""")
 
     # TODO This uses a test SMTP server:
     # python -m smtpd -n -c DebuggingServer localhost:1025

+ 26 - 25
scriptshifter/static/ss.js

@@ -94,33 +94,33 @@ document.getElementById('transliterate').addEventListener('submit',(event)=>{
     }
     document.getElementById('loader_results').classList.remove("hidden");
 
-    const data = new URLSearchParams();
-
     let t_dir = Array.from(document.getElementsByName("t_dir")).find(r => r.checked).value;
 
     let capitalize = Array.from(document.getElementsByName("capitalize")).find(r => r.checked).value;
 
 
-    data.append('text',document.getElementById('text').value)
-    data.append('lang',document.getElementById('lang').value)
-    data.append('t_dir',t_dir)
-    data.append('capitalize',capitalize)
+    const data = {
+        'text': document.getElementById('text').value,
+        'lang': document.getElementById('lang').value,
+        't_dir': t_dir,
+        'capitalize': capitalize,
+        'options': {}
+    }
 
-    let options = {};
     let option_inputs = document.getElementsByClassName("option_i");
     for (i = 0; i < option_inputs.length; i++) {
         let el = option_inputs[i];
         if (el.type == "checkbox") {
-            options[el.id] = el.checked;
+            data['options'][el.id] = el.checked;
         } else {
-            options[el.id] = el.value;
+            data['options'][el.id] = el.value;
         }
     };
-    data.append('options', JSON.stringify(options));
 
     fetch('/trans', {
         method: 'post',
-        body: data,
+        body: JSON.stringify(data),
+        headers: {"Content-Type": "application/json"}
     })
     .then(response=>response.json())
     .then((results)=>{
@@ -133,7 +133,7 @@ document.getElementById('transliterate').addEventListener('submit',(event)=>{
             fb_btn.classList.remove("hidden");
         }
 
-        if (results.warnings.length>0){
+        if (results.warnings && results.warnings.length>0){
             document.getElementById('warnings-toggle').classList.remove("hidden");
             document.getElementById('warnings').innerText = "WARNING:\n" + results.warnings.join("\n")
         }
@@ -167,26 +167,27 @@ if (fb_active) {
     })
 
     document.getElementById('feedback_form').addEventListener('submit',(event)=>{
-        const data = new URLSearchParams();
-        data.append('lang', document.getElementById('lang_fb_input').value);
-        data.append('src', document.getElementById('src_fb_input').value);
-        data.append('t_dir', document.getElementById('t_dir_fb_input').value);
-        data.append('result', document.getElementById('result_fb_input').value);
-        data.append('expected', document.getElementById('expected_fb_input').value);
-        data.append('contact', document.getElementById('contact_fb_input').value);
-        data.append('notes', document.getElementById('notes_fb_input').value);
-
-        let options = {};
+        const data = {
+            'lang': document.getElementById('lang_fb_input').value,
+            'src': document.getElementById('src_fb_input').value,
+            't_dir': document.getElementById('t_dir_fb_input').value,
+            'result': document.getElementById('result_fb_input').value,
+            'expected': document.getElementById('expected_fb_input').value,
+            'contact': document.getElementById('contact_fb_input').value,
+            'notes': document.getElementById('notes_fb_input').value,
+            'options': {}
+        };
+
         let option_inputs = document.getElementsByClassName("option_i");
         for (i = 0; i < option_inputs.length; i++) {
             let el = option_inputs[i];
-            options[el.getAttribute('id')] = el.value;
+            data['options'][el.getAttribute('id')] = el.value;
         };
-        data.append('options', JSON.stringify(options));
 
         fetch('/feedback', {
             method: 'post',
-            body: data,
+            body: JSON.stringify(data),
+            headers: {"Content-Type": "application/json"}
         })
         .then(response=>response.json())
         .then((results)=>{

+ 2 - 0
scriptshifter/tables/data/index.yml

@@ -154,5 +154,7 @@ uzbek_cyrillic:
   name: Uzbek (Cyrillic)
 yakut_cyrillic:
   name: Yakut (Cyrillic)
+yiddish:
+  name: Yiddish
 yuit_cyrillic:
   name: Yuit (Cyrillic)

+ 21 - 0
scriptshifter/tables/data/yiddish.yml

@@ -0,0 +1,21 @@
+general:
+  name: Yiddish
+
+options:
+  - id: loshn_koydesh
+    label: Loshn Koydesh
+    description: [TODO]
+    type: boolean
+    default: false
+
+script_to_roman:
+  hooks:
+    post_config:
+      -
+        - yiddish_.s2r_post_config
+
+roman_to_script:
+  hooks:
+    post_config:
+      -
+        - yiddish_.r2s_post_config

+ 21 - 0
scriptshifter_base.Dockerfile

@@ -0,0 +1,21 @@
+FROM python:3.10-slim-bookworm
+
+RUN apt update
+RUN apt install -y build-essential tzdata gfortran libopenblas-dev libboost-all-dev libpcre2-dev
+
+ENV TZ=America/New_York
+ARG WORKROOT "/usr/local/scriptshifter/src"
+
+RUN addgroup --system www
+RUN adduser --system www
+RUN gpasswd -a www www
+
+# Copy external dependencies.
+WORKDIR ${WORKROOT}
+COPY ext ./ext/
+COPY deps.txt ./
+RUN pip install --no-cache-dir -r deps.txt
+
+# Remove development packages.
+RUN apt remove -y build-essential git
+RUN apt autoremove -y

+ 7 - 0
test.Dockerfile

@@ -0,0 +1,7 @@
+FROM python:3.10-slim-bookworm
+
+RUN apt update
+RUN apt install -y build-essential libpcre2-dev
+
+RUN pip install uwsgi
+