Browse Source

Add Aksharamukha integration and initial Thai support.

scossu 5 months ago
parent
commit
2fddb2e6b2

+ 2 - 2
doc/hooks.md

@@ -78,7 +78,7 @@ happens:
       current position is added verbatim to the output list, and the cursor
       current position is added verbatim to the output list, and the cursor
       advances by one position.
       advances by one position.
 5. When the end of the input text is reached, if the configuration indicates
 5. When the end of the input text is reached, if the configuration indicates
-   that capitalization is required (this is true by default), te first element
+   that capitalization is required (this is true by default), the first element
    of the output list is capitalized.
    of the output list is capitalized.
 6. The output list is joined into one string.
 6. The output list is joined into one string.
 7. The string is compacted by removing excessive whitespace: Two or more
 7. The string is compacted by removing excessive whitespace: Two or more
@@ -118,7 +118,7 @@ registered as constants under `scriptshifter.exceptions`.
 
 
 The following members of the context object are available in all the hooks:
 The following members of the context object are available in all the hooks:
 
 
-- `ctx.src`: Source text. It should not be reassigned.
+- `ctx.src`: Source text. Read only.
 - `ctx.general`: Configuration general options.
 - `ctx.general`: Configuration general options.
 - `ctx.langsec`: language section (S2R or R2S) of configuration.
 - `ctx.langsec`: language section (S2R or R2S) of configuration.
 - `ctx.options`: language-specific options defined in configuration and set
 - `ctx.options`: language-specific options defined in configuration and set

+ 4 - 3
requirements.txt

@@ -1,3 +1,4 @@
-flask
-pyyaml
-uwsgi
+aksharamukha>=2.1,<2.2
+flask>=2.3,<2.4
+pyyaml>=6.0,<7
+uwsgi>=2.0,<2.1

+ 0 - 0
scriptshifter/hooks/aksharamukha/__init__.py


+ 30 - 0
scriptshifter/hooks/aksharamukha/romanizer.py

@@ -0,0 +1,30 @@
+# @package ext
+
+__doc__ = """
+Transliterate a number of Indian and other Asian scripts using Aksharamukha:
+https://github.com/virtualvinodh/aksharamukha-python """
+
+
+from logging import getLogger
+
+from aksharamukha.transliterate import process
+
+from scriptshifter.exceptions import BREAK
+
+
+logger = getLogger(__name__)
+
+
+def s2r_post_config(ctx, src_script):
+    # options = detect_preoptions(ctx.src, src_script)
+    options = [n for n, v in ctx.options.items() if v and n != "capitalize"]
+    logger.info(f"Options for {src_script}: {options}")
+    ctx.dest = process(src_script, "IAST", ctx.src, pre_options=options)
+
+    return BREAK
+
+
+def r2s_post_config(ctx, dest_script):
+    ctx.dest = process("IAST", dest_script, ctx.src)
+
+    return BREAK

+ 3 - 0
scriptshifter/tables/data/index.yml

@@ -56,6 +56,9 @@ serbian:
   name: Serbian
   name: Serbian
 pulaar:
 pulaar:
   name: Pulaar (Adlam)
   name: Pulaar (Adlam)
+thai:
+  name: Thai
+  note: Uses Aksharamukha.
 tajik:
 tajik:
   name: Tajik (Cyrillic)
   name: Tajik (Cyrillic)
 tatar:
 tatar:

+ 43 - 0
scriptshifter/tables/data/thai.yml

@@ -0,0 +1,43 @@
+general:
+  name: Thai
+
+options:
+  - id: ThaiTranscription
+    label: Thai Orthography
+    description: พุทฺธ → พุทธะ
+    type: boolean
+    default: false
+  - id: ThaiSajjhayaOrthography
+    label: Sajjhāya orthography
+    description: พุทฺธ → พุท์ธ
+    type: boolean
+    default: false
+  - id: ThaiSajjhayawithA
+    label: Nativized sajjhaya
+    description: พุทฺธํ → พุท์ธัง
+    type: boolean
+    default: false
+  - id: ThaiNativeConsonants
+    label: Thai phonetic
+    description: พุทฺธตฺว → บุดธะต͜วะ
+    type: boolean
+    default: false
+  - id: ThaiVisargaSaraA
+    label: Sara a ะ as Visarga
+    description: พุทฺธ → พุทธะ
+    type: boolean
+    default: false
+
+script_to_roman:
+  hooks:
+    post_config:
+      -
+        - aksharamukha.romanizer.s2r_post_config
+        - src_script: "Thai"
+
+roman_to_script:
+  hooks:
+    post_config:
+      -
+        - aksharamukha.romanizer.r2s_post_config
+        - dest_script: "Thai"

+ 5 - 1
scriptshifter/templates/index.html

@@ -155,7 +155,11 @@
             let option_inputs = document.getElementsByClassName("option_i");
             let option_inputs = document.getElementsByClassName("option_i");
             for (i = 0; i < option_inputs.length; i++) {
             for (i = 0; i < option_inputs.length; i++) {
                 let el = option_inputs[i];
                 let el = option_inputs[i];
-                options[el.getAttribute('id')] = el.value;
+                if (el.type == "checkbox") {
+                    options[el.id] = el.checked;
+                } else {
+                    options[el.id] = el.value;
+                }
             };
             };
             data.append('options', JSON.stringify(options));
             data.append('options', JSON.stringify(options));
 
 

+ 14 - 2
scriptshifter/trans.py

@@ -19,17 +19,29 @@ class Context:
     """
     """
     Context used within the transliteration and passed to hook functions.
     Context used within the transliteration and passed to hook functions.
     """
     """
+    @property
+    def src(self):
+        return self._src
+
+    @src.setter
+    def src(self):
+        raise NotImplementedError("Atribute is read-only.")
+
+    @src.deleter
+    def src(self):
+        raise NotImplementedError("Atribute is read-only.")
+
     def __init__(self, src, general, langsec, options={}):
     def __init__(self, src, general, langsec, options={}):
         """
         """
         Initialize a context.
         Initialize a context.
 
 
         Args:
         Args:
-            src (str): The original text. This is meant to never change.
+            src (str): The original text. Read-only.
             general (dict): general section of the current config.
             general (dict): general section of the current config.
             langsec (dict): Language configuration section being used.
             langsec (dict): Language configuration section being used.
             options (dict): extra options as a dict.
             options (dict): extra options as a dict.
         """
         """
-        self.src = src
+        self._src = src
         self.general = general
         self.general = general
         self.options = options
         self.options = options
         self.langsec = langsec
         self.langsec = langsec