Browse Source

Add Aksharamukha integration and initial Thai support.

scossu 5 months ago
parent
commit
2fddb2e6b2

+ 2 - 2
doc/hooks.md

@@ -78,7 +78,7 @@ happens:
       current position is added verbatim to the output list, and the cursor
       advances by one position.
 5. When the end of the input text is reached, if the configuration indicates
-   that capitalization is required (this is true by default), te first element
+   that capitalization is required (this is true by default), the first element
    of the output list is capitalized.
 6. The output list is joined into one string.
 7. The string is compacted by removing excessive whitespace: Two or more
@@ -118,7 +118,7 @@ registered as constants under `scriptshifter.exceptions`.
 
 The following members of the context object are available in all the hooks:
 
-- `ctx.src`: Source text. It should not be reassigned.
+- `ctx.src`: Source text. Read only.
 - `ctx.general`: Configuration general options.
 - `ctx.langsec`: language section (S2R or R2S) of configuration.
 - `ctx.options`: language-specific options defined in configuration and set

+ 4 - 3
requirements.txt

@@ -1,3 +1,4 @@
-flask
-pyyaml
-uwsgi
+aksharamukha>=2.1,<2.2
+flask>=2.3,<2.4
+pyyaml>=6.0,<7
+uwsgi>=2.0,<2.1

+ 0 - 0
scriptshifter/hooks/aksharamukha/__init__.py


+ 30 - 0
scriptshifter/hooks/aksharamukha/romanizer.py

@@ -0,0 +1,30 @@
+# @package ext
+
+__doc__ = """
+Transliterate a number of Indian and other Asian scripts using Aksharamukha:
+https://github.com/virtualvinodh/aksharamukha-python """
+
+
+from logging import getLogger
+
+from aksharamukha.transliterate import process
+
+from scriptshifter.exceptions import BREAK
+
+
+logger = getLogger(__name__)
+
+
+def s2r_post_config(ctx, src_script):
+    # options = detect_preoptions(ctx.src, src_script)
+    options = [n for n, v in ctx.options.items() if v and n != "capitalize"]
+    logger.info(f"Options for {src_script}: {options}")
+    ctx.dest = process(src_script, "IAST", ctx.src, pre_options=options)
+
+    return BREAK
+
+
+def r2s_post_config(ctx, dest_script):
+    ctx.dest = process("IAST", dest_script, ctx.src)
+
+    return BREAK

+ 3 - 0
scriptshifter/tables/data/index.yml

@@ -56,6 +56,9 @@ serbian:
   name: Serbian
 pulaar:
   name: Pulaar (Adlam)
+thai:
+  name: Thai
+  note: Uses Aksharamukha.
 tajik:
   name: Tajik (Cyrillic)
 tatar:

+ 43 - 0
scriptshifter/tables/data/thai.yml

@@ -0,0 +1,43 @@
+general:
+  name: Thai
+
+options:
+  - id: ThaiTranscription
+    label: Thai Orthography
+    description: พุทฺธ → พุทธะ
+    type: boolean
+    default: false
+  - id: ThaiSajjhayaOrthography
+    label: Sajjhāya orthography
+    description: พุทฺธ → พุท์ธ
+    type: boolean
+    default: false
+  - id: ThaiSajjhayawithA
+    label: Nativized sajjhaya
+    description: พุทฺธํ → พุท์ธัง
+    type: boolean
+    default: false
+  - id: ThaiNativeConsonants
+    label: Thai phonetic
+    description: พุทฺธตฺว → บุดธะต͜วะ
+    type: boolean
+    default: false
+  - id: ThaiVisargaSaraA
+    label: Sara a ะ as Visarga
+    description: พุทฺธ → พุทธะ
+    type: boolean
+    default: false
+
+script_to_roman:
+  hooks:
+    post_config:
+      -
+        - aksharamukha.romanizer.s2r_post_config
+        - src_script: "Thai"
+
+roman_to_script:
+  hooks:
+    post_config:
+      -
+        - aksharamukha.romanizer.r2s_post_config
+        - dest_script: "Thai"

+ 5 - 1
scriptshifter/templates/index.html

@@ -155,7 +155,11 @@
             let option_inputs = document.getElementsByClassName("option_i");
             for (i = 0; i < option_inputs.length; i++) {
                 let el = option_inputs[i];
-                options[el.getAttribute('id')] = el.value;
+                if (el.type == "checkbox") {
+                    options[el.id] = el.checked;
+                } else {
+                    options[el.id] = el.value;
+                }
             };
             data.append('options', JSON.stringify(options));
 

+ 14 - 2
scriptshifter/trans.py

@@ -19,17 +19,29 @@ class Context:
     """
     Context used within the transliteration and passed to hook functions.
     """
+    @property
+    def src(self):
+        return self._src
+
+    @src.setter
+    def src(self):
+        raise NotImplementedError("Atribute is read-only.")
+
+    @src.deleter
+    def src(self):
+        raise NotImplementedError("Atribute is read-only.")
+
     def __init__(self, src, general, langsec, options={}):
         """
         Initialize a context.
 
         Args:
-            src (str): The original text. This is meant to never change.
+            src (str): The original text. Read-only.
             general (dict): general section of the current config.
             langsec (dict): Language configuration section being used.
             options (dict): extra options as a dict.
         """
-        self.src = src
+        self._src = src
         self.general = general
         self.options = options
         self.langsec = langsec