Browse Source

Test (#101)

* Yiddish transliteration via submodules.

* Update checkout workflow.

* Change refs for Yiddish submodules.

* Fix WORKDIR in Dockerfile

* Do not remove yiddish module.

* Manually add yiddish submodules.

* Use git clone instead of submodule.

* Move ext checkout to github actions.

* Chinese numerals (#97)

* WIP Parse Chinese numerals.

* WIP complete number parsing.

* Complete Chinese numerals:

* Use standard table override instead of pre-config hooks.
* Add few test strings.

* Complete numerals:

* Transliterate all numeric examples correctly
* Modify hook return logic for consistency
* WIP partial spacing fix.

* Some cleanup; upgrade docker OS.

* Add dependency for uwsgi.

* Squashed commit of the following: (#98)

commit 30859a52b9cc325c323b414133856d0af3ffc2a6
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 22:17:36 2024 -0500

    Move ext checkout to github actions.

commit 6d8da6df68ac764f90deb15861089095043fd4ba
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 21:45:01 2024 -0500

    Use git clone instead of submodule.

commit ade9da589179870d331b703ff526d7fff33e88bb
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 21:42:45 2024 -0500

    Manually add yiddish submodules.

commit 77cb9ef2959f611d0220cc405e0b584ece71147c
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 21:23:37 2024 -0500

    Do not remove yiddish module.

commit e405b3605dd2629ed5557ccc5fdd5fe8812799ed
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 09:11:41 2024 -0500

    Fix WORKDIR in Dockerfile

commit 95445ba642163e28b94df6736ad6946ad7dc76c0
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 09:07:50 2024 -0500

    Change refs for Yiddish submodules.

commit 208ea095e792195981f644497ccd5fcd55e15c1b
Author: scossu <stefano@cossu.cc>
Date:   Wed Feb 28 08:45:58 2024 -0500

    Update checkout workflow.

* Add debug output to /trans response.

* Split docker files and requirements.

* Add bad request debug handler.

* Add bad request debug handler.

* Adjust CI workflows.

* Fix image name typo.

* Refine triggers.

* Fix typo on test workflow trigger.

* Use JSON in POST body.

* Also use JSON in feedback request; update docs.

* Return json data in 400 debug.
Stefano Cossu 1 year ago
parent
commit
fa5b48dba6

+ 13 - 4
.github/workflows/push-docker-image.yml → .github/workflows/push-app-image.yml

@@ -1,8 +1,15 @@
-name: Push image to Docker Hub.
+name: Push app image
 on:
 on:
+  # This runs on v *.*.0 after the base image has been
+  # built and pushed, or on patch version tag.
   push:
   push:
     tags:
     tags:
-      - "v*.*.*"
+      - "v*.*.[1-9]*"
+  workflow_run:
+    workflows: 
+      - "Push base image"
+    types:
+      - "completed"
 
 
 env:
 env:
   DOCKER_USER: lcnetdev
   DOCKER_USER: lcnetdev
@@ -13,13 +20,15 @@ jobs:
   push-image-to-docker-hub:
   push-image-to-docker-hub:
     runs-on: ubuntu-latest
     runs-on: ubuntu-latest
     steps:
     steps:
-      - uses: actions/checkout@v3
+      - name: checkout repo
+        uses: actions/checkout@v4
         with:
         with:
           submodules: recursive
           submodules: recursive
 
 
       - name: Build the Docker image
       - name: Build the Docker image
         run: >
         run: >
-          docker build . --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }}
+          docker build -f Dockerfile .
+          --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }}
           --tag $DOCKER_USER/$REPO_NAME:latest
           --tag $DOCKER_USER/$REPO_NAME:latest
 
 
       - name: Login to Docker Hub
       - name: Login to Docker Hub

+ 46 - 0
.github/workflows/push-base-image.yml

@@ -0,0 +1,46 @@
+name: Push base image
+on:
+  push:
+    tags:
+      - "v*.*.0"
+
+env:
+  DOCKER_USER: lcnetdev
+  DOCKER_PASSWORD: ${{secrets.DOCKER_HUB}}
+  REPO_NAME: scriptshifter-base
+
+jobs:
+  push-image-to-docker-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - name: checkout repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: checkout yiddish submodules (1/2)
+        uses: actions/checkout@v4
+        with:
+          repository: ibleaman/loshn-koydesh-pronunciation
+          path: ext/yiddish/yiddish/submodules/loshn-koydesh-pronunciation
+
+      - name: checkout yiddish submodules (2/2)
+        uses: actions/checkout@v4
+        with:
+          repository: ibleaman/hasidify_lexicon
+          path: ext/yiddish/yiddish/submodules/hasidify_lexicon
+
+      - name: Build the Docker image
+        run: >
+          docker build -f scriptshifter_base.Dockerfile .
+          --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }}
+          --tag $DOCKER_USER/$REPO_NAME:latest
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: lcnetdev
+          password: ${{ secrets.DOCKER_HUB }}
+
+      - name: Push to Docker Hub
+        run: docker push $DOCKER_USER/$REPO_NAME --all-tags

+ 9 - 5
.github/workflows/push-test-image.yml

@@ -1,8 +1,8 @@
-name: Push test image to Docker Hub.
+name: Push test image
 on:
 on:
   push:
   push:
-    branch:
-      - "main"
+    branches:
+      - "test"
 
 
 env:
 env:
   DOCKER_USER: lcnetdev
   DOCKER_USER: lcnetdev
@@ -13,12 +13,16 @@ jobs:
   push-image-to-docker-hub:
   push-image-to-docker-hub:
     runs-on: ubuntu-latest
     runs-on: ubuntu-latest
     steps:
     steps:
-      - uses: actions/checkout@v3
+      - name: checkout repo
+        uses: actions/checkout@v4
         with:
         with:
           submodules: recursive
           submodules: recursive
 
 
       - name: Build the Docker image
       - name: Build the Docker image
-        run: docker build . --tag $DOCKER_USER/$REPO_NAME:test
+        run: >
+          docker build -f Dockerfile .
+          --tag $DOCKER_USER/$REPO_NAME:${{ github.ref_name }}
+          --tag $DOCKER_USER/$REPO_NAME:test
 
 
       - name: Login to Docker Hub
       - name: Login to Docker Hub
         uses: docker/login-action@v3
         uses: docker/login-action@v3

+ 4 - 0
.gitmodules

@@ -1,3 +1,7 @@
 [submodule "ext/arabic_rom"]
 [submodule "ext/arabic_rom"]
 	path = ext/arabic_rom
 	path = ext/arabic_rom
 	url = https://github.com/fadhleryani/Arabic_ALA-LC_Romanization.git
 	url = https://github.com/fadhleryani/Arabic_ALA-LC_Romanization.git
+[submodule "ext/yiddish"]
+	path = ext/yiddish
+	url = https://github.com/scossu/yiddish.git
+	branch = loc

+ 7 - 21
Dockerfile

@@ -1,29 +1,15 @@
-FROM python:3.10-slim-bookworm
-
-RUN apt update
-RUN apt install -y build-essential tzdata gfortran libopenblas-dev libboost-all-dev libpcre2-dev
-
-ENV TZ=America/New_York
-ENV _workroot "/usr/local/scriptshifter/src"
-
-WORKDIR ${_workroot}
-COPY requirements.txt ./
-RUN pip install --no-cache-dir -r requirements.txt
-
-# Remove development packages.
-RUN apt remove -y build-essential
-RUN apt autoremove -y
-
-RUN addgroup --system www
-RUN adduser --system www
-RUN gpasswd -a www www
+FROM lcnetdev/scriptshifter-base:latest
+ARG WORKROOT "/usr/local/scriptshifter/src"
 
 
+# Copy core application files.
+WORKDIR ${WORKROOT}
 COPY entrypoint.sh uwsgi.ini wsgi.py ./
 COPY entrypoint.sh uwsgi.ini wsgi.py ./
-COPY ext ./ext/
 COPY scriptshifter ./scriptshifter/
 COPY scriptshifter ./scriptshifter/
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
 
 
 RUN chmod +x ./entrypoint.sh
 RUN chmod +x ./entrypoint.sh
-RUN chown -R www:www ${_workroot} .
+#RUN chown -R www:www ${WORKROOT} .
 
 
 EXPOSE 8000
 EXPOSE 8000
 
 

+ 7 - 0
deps.txt

@@ -0,0 +1,7 @@
+# External dependencies.
+aksharamukha>=2.1,<3
+camel-tools>=1.5
+funcy>=1.15,<2
+pymarc>=4.0,<5
+repackage>=0.7.3
+./ext/yiddish

+ 23 - 0
doc/rest_api.md

@@ -69,6 +69,10 @@ Transliterate an input string into a given language.
 
 
 ### POST body
 ### POST body
 
 
+MIME type: `application/json`
+
+Content: JSON object with the following keys:
+
 - `lang`: Language code as given by the `/languages` endpoint. 
 - `lang`: Language code as given by the `/languages` endpoint. 
 - `text`: Input text to be transliterated.
 - `text`: Input text to be transliterated.
 - `capitalize`: One of `first` (capitalize the first letter of the input),
 - `capitalize`: One of `first` (capitalize the first letter of the input),
@@ -92,3 +96,22 @@ Content: JSON object containing two keys: `ouput` containing the transliterated
 string; and `warnings` containing a list of warnings. Characters not found in
 string; and `warnings` containing a list of warnings. Characters not found in
 the mapping are copied verbatim in the transliterated string (see
 the mapping are copied verbatim in the transliterated string (see
 "Configuration files" section for more information).
 "Configuration files" section for more information).
+
+## `POST /feedback`
+
+Send a feedback form about a transliteration result.
+
+### POST body
+
+MIME type: `application/json`
+
+Content: JSON object with the following keys:
+
+    `lang`: language of the transliteration. Mandatory.
+    `src`: source text. Mandatory.
+    `t_dir`: transliteration direction. If omitted, it defaults to `s2r`.
+    `result`: result of the transliteration. Mandatory.
+    `expected`: expected result. Mandatory.
+    `options`: options passed to the request, if any.
+    `notes`: optional user notes.
+    `contact`: contact email for feedback. Optional.

+ 1 - 0
ext/yiddish

@@ -0,0 +1 @@
+Subproject commit 9bf22c55ca76710940e141de5d88922a9f55ed1f

+ 1 - 5
requirements.txt

@@ -1,9 +1,5 @@
-aksharamukha>=2.1,<3
-camel-tools>=1.5
+# Core application dependencies.
 flask>=2.3,<3
 flask>=2.3,<3
-funcy>=1.15,<2
-pymarc>=4.0,<5
 python-dotenv>=1.0,<2
 python-dotenv>=1.0,<2
 pyyaml>=6.0,<7
 pyyaml>=6.0,<7
-repackage>=0.7.3
 uwsgi>=2.0,<2.1
 uwsgi>=2.0,<2.1

+ 51 - 0
scriptshifter/hooks/yiddish_/__init__.py

@@ -0,0 +1,51 @@
+# @package ext
+
+__doc__ = """
+Yiddish transliteration module.
+
+Courtesy of Isaac Bleaman and Asher Lewis.
+
+https://github.com/ibleaman/yiddish.git
+
+Note the underscore in the module name to disambiguate with the `yiddish`
+external package name.
+"""
+
+
+from yiddish import detransliterate, transliterate
+
+from scriptshifter.exceptions import BREAK
+from scriptshifter.tools import capitalize
+
+
+def s2r_post_config(ctx):
+    """
+    Script to Roman.
+    """
+
+    rom = transliterate(
+            ctx.src, loc=True,
+            loshn_koydesh=ctx.options.get("loshn_koydesh"))
+
+    if ctx.options["capitalize"] == "all":
+        rom = capitalize(rom)
+    elif ctx.options["capitalize"] == "first":
+        rom = rom[0].upper() + rom[1:]
+
+    ctx.dest = rom
+
+    return BREAK
+
+
+def r2s_post_config(ctx):
+    """
+    Roman to script.
+
+    NOTE: This doesn't support the `loc` option.
+    """
+
+    ctx.dest = detransliterate(
+            ctx.src,
+            loshn_koydesh=ctx.options.get("loshn_koydesh"))
+
+    return BREAK

+ 29 - 19
scriptshifter/rest_api.py

@@ -3,11 +3,12 @@ import logging
 from base64 import b64encode
 from base64 import b64encode
 from copy import deepcopy
 from copy import deepcopy
 from email.message import EmailMessage
 from email.message import EmailMessage
-from json import dumps, loads
+from json import dumps
 from os import environ, urandom
 from os import environ, urandom
 from smtplib import SMTP
 from smtplib import SMTP
 
 
 from flask import Flask, jsonify, render_template, request
 from flask import Flask, jsonify, render_template, request
+from werkzeug.exceptions import BadRequest
 
 
 from scriptshifter import EMAIL_FROM, EMAIL_TO, SMTP_HOST, SMTP_PORT
 from scriptshifter import EMAIL_FROM, EMAIL_TO, SMTP_HOST, SMTP_PORT
 from scriptshifter.exceptions import ApiError
 from scriptshifter.exceptions import ApiError
@@ -46,6 +47,20 @@ def handle_exception(e: ApiError):
     }, e.status_code)
     }, e.status_code)
 
 
 
 
+@app.errorhandler(BadRequest)
+def handle_400(e):
+    if logging.DEBUG >= logging.root.level:
+        body = {
+            "debug": {
+                "form_data": request.json or request.form,
+            }
+        }
+    else:
+        body = ""
+
+    return body, 400
+
+
 @app.route("/", methods=["GET"])
 @app.route("/", methods=["GET"])
 def index():
 def index():
     return render_template(
     return render_template(
@@ -91,16 +106,16 @@ def get_options(lang):
 
 
 @app.route("/trans", methods=["POST"])
 @app.route("/trans", methods=["POST"])
 def transliterate_req():
 def transliterate_req():
-    lang = request.form["lang"]
-    in_txt = request.form["text"]
-    capitalize = request.form.get("capitalize", False)
-    t_dir = request.form.get("t_dir", "s2r")
+    lang = request.json["lang"]
+    in_txt = request.json["text"]
+    capitalize = request.json.get("capitalize", False)
+    t_dir = request.json.get("t_dir", "s2r")
     if t_dir not in ("s2r", "r2s"):
     if t_dir not in ("s2r", "r2s"):
         return f"Invalid direction: {t_dir}", 400
         return f"Invalid direction: {t_dir}", 400
 
 
     if not len(in_txt):
     if not len(in_txt):
         return ("No input text provided! ", 400)
         return ("No input text provided! ", 400)
-    options = loads(request.form.get("options", "{}"))
+    options = request.json.get("options", {})
     logger.debug(f"Extra options: {options}")
     logger.debug(f"Extra options: {options}")
 
 
     try:
     try:
@@ -116,14 +131,9 @@ def feedback():
     """
     """
     Allows users to provide feedback to improve a specific result.
     Allows users to provide feedback to improve a specific result.
     """
     """
-    lang = request.form["lang"]
-    src = request.form["src"]
-    t_dir = request.form.get("t_dir", "s2r")
-    result = request.form["result"]
-    expected = request.form["expected"]
-    options = request.form.get("options", {})
-    notes = request.form.get("notes")
-    contact = request.form.get("contact")
+    t_dir = request.json.get("t_dir", "s2r")
+    options = request.json.get("options", {})
+    contact = request.json.get("contact")
 
 
     msg = EmailMessage()
     msg = EmailMessage()
     msg["subject"] = "Scriptshifter feedback report"
     msg["subject"] = "Scriptshifter feedback report"
@@ -133,16 +143,16 @@ def feedback():
         msg["cc"] = contact
         msg["cc"] = contact
     msg.set_content(f"""
     msg.set_content(f"""
         *Scriptshifter feedback report from {contact or 'anonymous'}*\n\n
         *Scriptshifter feedback report from {contact or 'anonymous'}*\n\n
-        *Language:* {lang}\n
+        *Language:* {request.json['lang']}\n
         *Direction:* {
         *Direction:* {
                     'Roman to Script' if t_dir == 'r2s'
                     'Roman to Script' if t_dir == 'r2s'
                     else 'Script to Roman'}\n
                     else 'Script to Roman'}\n
-        *Source:* {src}\n
-        *Result:* {result}\n
-        *Expected result:* {expected}\n
+        *Source:* {request.json['src']}\n
+        *Result:* {request.json['result']}\n
+        *Expected result:* {request.json['expected']}\n
         *Applied options:* {dumps(options)}\n
         *Applied options:* {dumps(options)}\n
         *Notes:*\n
         *Notes:*\n
-        {notes}""")
+        {request.json['notes']}""")
 
 
     # TODO This uses a test SMTP server:
     # TODO This uses a test SMTP server:
     # python -m smtpd -n -c DebuggingServer localhost:1025
     # python -m smtpd -n -c DebuggingServer localhost:1025

+ 26 - 25
scriptshifter/static/ss.js

@@ -94,33 +94,33 @@ document.getElementById('transliterate').addEventListener('submit',(event)=>{
     }
     }
     document.getElementById('loader_results').classList.remove("hidden");
     document.getElementById('loader_results').classList.remove("hidden");
 
 
-    const data = new URLSearchParams();
-
     let t_dir = Array.from(document.getElementsByName("t_dir")).find(r => r.checked).value;
     let t_dir = Array.from(document.getElementsByName("t_dir")).find(r => r.checked).value;
 
 
     let capitalize = Array.from(document.getElementsByName("capitalize")).find(r => r.checked).value;
     let capitalize = Array.from(document.getElementsByName("capitalize")).find(r => r.checked).value;
 
 
 
 
-    data.append('text',document.getElementById('text').value)
-    data.append('lang',document.getElementById('lang').value)
-    data.append('t_dir',t_dir)
-    data.append('capitalize',capitalize)
+    const data = {
+        'text': document.getElementById('text').value,
+        'lang': document.getElementById('lang').value,
+        't_dir': t_dir,
+        'capitalize': capitalize,
+        'options': {}
+    }
 
 
-    let options = {};
     let option_inputs = document.getElementsByClassName("option_i");
     let option_inputs = document.getElementsByClassName("option_i");
     for (i = 0; i < option_inputs.length; i++) {
     for (i = 0; i < option_inputs.length; i++) {
         let el = option_inputs[i];
         let el = option_inputs[i];
         if (el.type == "checkbox") {
         if (el.type == "checkbox") {
-            options[el.id] = el.checked;
+            data['options'][el.id] = el.checked;
         } else {
         } else {
-            options[el.id] = el.value;
+            data['options'][el.id] = el.value;
         }
         }
     };
     };
-    data.append('options', JSON.stringify(options));
 
 
     fetch('/trans', {
     fetch('/trans', {
         method: 'post',
         method: 'post',
-        body: data,
+        body: JSON.stringify(data),
+        headers: {"Content-Type": "application/json"}
     })
     })
     .then(response=>response.json())
     .then(response=>response.json())
     .then((results)=>{
     .then((results)=>{
@@ -133,7 +133,7 @@ document.getElementById('transliterate').addEventListener('submit',(event)=>{
             fb_btn.classList.remove("hidden");
             fb_btn.classList.remove("hidden");
         }
         }
 
 
-        if (results.warnings.length>0){
+        if (results.warnings && results.warnings.length>0){
             document.getElementById('warnings-toggle').classList.remove("hidden");
             document.getElementById('warnings-toggle').classList.remove("hidden");
             document.getElementById('warnings').innerText = "WARNING:\n" + results.warnings.join("\n")
             document.getElementById('warnings').innerText = "WARNING:\n" + results.warnings.join("\n")
         }
         }
@@ -167,26 +167,27 @@ if (fb_active) {
     })
     })
 
 
     document.getElementById('feedback_form').addEventListener('submit',(event)=>{
     document.getElementById('feedback_form').addEventListener('submit',(event)=>{
-        const data = new URLSearchParams();
-        data.append('lang', document.getElementById('lang_fb_input').value);
-        data.append('src', document.getElementById('src_fb_input').value);
-        data.append('t_dir', document.getElementById('t_dir_fb_input').value);
-        data.append('result', document.getElementById('result_fb_input').value);
-        data.append('expected', document.getElementById('expected_fb_input').value);
-        data.append('contact', document.getElementById('contact_fb_input').value);
-        data.append('notes', document.getElementById('notes_fb_input').value);
-
-        let options = {};
+        const data = {
+            'lang': document.getElementById('lang_fb_input').value,
+            'src': document.getElementById('src_fb_input').value,
+            't_dir': document.getElementById('t_dir_fb_input').value,
+            'result': document.getElementById('result_fb_input').value,
+            'expected': document.getElementById('expected_fb_input').value,
+            'contact': document.getElementById('contact_fb_input').value,
+            'notes': document.getElementById('notes_fb_input').value,
+            'options': {}
+        };
+
         let option_inputs = document.getElementsByClassName("option_i");
         let option_inputs = document.getElementsByClassName("option_i");
         for (i = 0; i < option_inputs.length; i++) {
         for (i = 0; i < option_inputs.length; i++) {
             let el = option_inputs[i];
             let el = option_inputs[i];
-            options[el.getAttribute('id')] = el.value;
+            data['options'][el.getAttribute('id')] = el.value;
         };
         };
-        data.append('options', JSON.stringify(options));
 
 
         fetch('/feedback', {
         fetch('/feedback', {
             method: 'post',
             method: 'post',
-            body: data,
+            body: JSON.stringify(data),
+            headers: {"Content-Type": "application/json"}
         })
         })
         .then(response=>response.json())
         .then(response=>response.json())
         .then((results)=>{
         .then((results)=>{

+ 2 - 0
scriptshifter/tables/data/index.yml

@@ -154,5 +154,7 @@ uzbek_cyrillic:
   name: Uzbek (Cyrillic)
   name: Uzbek (Cyrillic)
 yakut_cyrillic:
 yakut_cyrillic:
   name: Yakut (Cyrillic)
   name: Yakut (Cyrillic)
+yiddish:
+  name: Yiddish
 yuit_cyrillic:
 yuit_cyrillic:
   name: Yuit (Cyrillic)
   name: Yuit (Cyrillic)

+ 21 - 0
scriptshifter/tables/data/yiddish.yml

@@ -0,0 +1,21 @@
+general:
+  name: Yiddish
+
+options:
+  - id: loshn_koydesh
+    label: Loshn Koydesh
+    description: [TODO]
+    type: boolean
+    default: false
+
+script_to_roman:
+  hooks:
+    post_config:
+      -
+        - yiddish_.s2r_post_config
+
+roman_to_script:
+  hooks:
+    post_config:
+      -
+        - yiddish_.r2s_post_config

+ 21 - 0
scriptshifter_base.Dockerfile

@@ -0,0 +1,21 @@
+FROM python:3.10-slim-bookworm
+
+RUN apt update
+RUN apt install -y build-essential tzdata gfortran libopenblas-dev libboost-all-dev libpcre2-dev
+
+ENV TZ=America/New_York
+ARG WORKROOT "/usr/local/scriptshifter/src"
+
+RUN addgroup --system www
+RUN adduser --system www
+RUN gpasswd -a www www
+
+# Copy external dependencies.
+WORKDIR ${WORKROOT}
+COPY ext ./ext/
+COPY deps.txt ./
+RUN pip install --no-cache-dir -r deps.txt
+
+# Remove development packages.
+RUN apt remove -y build-essential git
+RUN apt autoremove -y

+ 7 - 0
test.Dockerfile

@@ -0,0 +1,7 @@
+FROM python:3.10-slim-bookworm
+
+RUN apt update
+RUN apt install -y build-essential libpcre2-dev
+
+RUN pip install uwsgi
+