Browse Source

Several Hebrew and general changes:

* Add capitalization options
* Move capitalization function to new "tools" module
* Use env file
scossu 7 months ago
parent
commit
a1b95dd74c

+ 34 - 1
README.md

@@ -4,10 +4,43 @@ REST API service to convert non-Latin scripts to Latin, and vice versa.
 
 ## Environment variables
 
+The provided `example.env` can be renamed to `.env` in your deployment and/or
+moved to a location that is not under version control, and adjusted to fit the
+environment. The file will be parsed directly by the application if present,
+or it can be pre-loaded in a Docker environment.
+
+Currently, the following environment variables are defined:
+
 - `TXL_LOGLEVEL`: Application log level. Defaults to `WARN`.
+- `TXL_FLASK_SECRET`: Flask secret key.
 - `TXL_DICTA_EP`: Endpoint for the Dicta Hebrew transliteration service. This
   is mandatory for using the Hebrew module.
 
+## Local development server
+
+For local development, it is easiest to run Flask without the WSGI wrapper,
+possibly in a virtual environment:
+
+``` bash
+# python -m venv /path/to/venv
+# source /path/to/venv/bin/activate
+# pip install -r requirements.txt
+# flask run
+```
+
+It is advised to set `FLASK_DEBUG=true` to reload the web app on code changes
+and print detailed stack traces when exceptions are raised. Note that changes
+to any .yml file do NOT trigger a reload of Flask.
+
+Alternatively, the transliteration interface can be accessed directly from
+Python: 
+
+``` python
+from scriptshifter.trans import transliterate
+
+transliterate("some text", "some language")
+```
+
 ## Run on Docker
 
 Build container in current dir:
@@ -19,7 +52,7 @@ docker build -t scriptshifter:latest .
 Start container:
 
 ```
-docker run -e TXL_FLASK_SECRET=changeme -p 8000:8000 scriptshifter:latest
+docker run --env-file .env -p 8000:8000 scriptshifter:latest
 ```
 
 For running in development mode, add `-e FLASK_ENV=development` to the options.

+ 4 - 0
example.env

@@ -0,0 +1,4 @@
+FLASK_DEBUG=true
+TXL_DICTA_EP="changeme"
+TXL_FLASK_SECRET="changeme"
+TXL_LOGLEVEL="INFO"

+ 1 - 0
requirements.txt

@@ -1,3 +1,4 @@
 flask
+python-dotenv
 pyyaml
 uwsgi

+ 8 - 1
scriptshifter/__init__.py

@@ -2,10 +2,17 @@ import logging
 
 from os import environ, path
 
+from dotenv import load_dotenv
+
+
+env = load_dotenv()
 
 APP_ROOT = path.dirname(path.realpath(__file__))
 
 logging.basicConfig(
         # filename=environ.get("TXL_LOGFILE", "/dev/stdout"),
-        level=environ.get("TXL_LOGLEVEL", logging.INFO))
+        level=environ.get("TXL_LOGLEVEL", logging.WARN))
 logger = logging.getLogger(__name__)
+
+if not env:
+    logger.warn("No .env file found. Assuming env was passed externally.")

+ 9 - 2
scriptshifter/hooks/hebrew/dicta_api.py

@@ -4,6 +4,7 @@ from os import environ
 from requests import post
 
 from scriptshifter.exceptions import BREAK
+from scriptshifter.tools import capitalize
 
 EP = environ.get("TXL_DICTA_EP")
 DEFAULT_GENRE = "rabbinic"
@@ -23,9 +24,15 @@ def s2r_post_config(ctx):
     rsp.raise_for_status()
 
     rom = rsp.json().get("transliteration")
-    ctx.dest = rom
 
-    if not rom:
+    if rom:
+        if ctx.options["capitalize"] == "all":
+            rom = capitalize(rom)
+        elif ctx.options["capitalize"] == "first":
+            rom = rom[0].upper() + rom[1:]
+    else:
         ctx.warnings.append("Upstream service returned empty result.")
 
+    ctx.dest = rom
+
     return BREAK

+ 3 - 10
scriptshifter/hooks/korean/romanizer.py

@@ -28,6 +28,7 @@ from csv import reader
 
 from scriptshifter.exceptions import BREAK
 from scriptshifter.hooks.korean import KCONF
+from scriptshifter.tools import capitalize
 
 
 PWD = path.dirname(path.realpath(__file__))
@@ -93,7 +94,7 @@ def _romanize_nonames(src, options):
     logger.debug(f"Before capitalization: {rom}")
     # FKR042: Capitalize all first letters
     if options["capitalize"] == "all":
-        rom = _capitalize(rom)
+        rom = capitalize(rom)
     # FKR043: Capitalize the first letter
     elif options["capitalize"] == "first":
         rom = rom[0].upper() + rom[1:]
@@ -278,7 +279,7 @@ def _kor_corp_name_rom(src):
     rom_tok = []
     for tok in src.split(" "):
         rom_tok.append(_romanize_oclc_auto(tok))
-    rom = _capitalize(" ".join(rom_tok))
+    rom = capitalize(" ".join(rom_tok))
 
     if chu == "L":
         rom = "(Chu) " + rom
@@ -715,14 +716,6 @@ def _kor_lname_rom(lname):
     return rom if lname != rom else False
 
 
-def _capitalize(src):
-    """ Only capitalize first word and words preceded by space."""
-    orig_ls = src.split(" ")
-    cap_ls = [orig[0].upper() + orig[1:] for orig in orig_ls]
-
-    return " ".join(cap_ls)
-
-
 def _fkr_log(fkr_i):
     fkr_k = f"FKR{fkr_i:03}"
     logger.debug(f"Applying {fkr_k}: {FKR_IDX[fkr_k]}")

+ 9 - 0
scriptshifter/tools.py

@@ -0,0 +1,9 @@
+__doc__ = """ Common tools for core and hooks. """
+
+
+def capitalize(src):
+    """ Only capitalize first word and words preceded by space."""
+    orig_ls = src.split(" ")
+    cap_ls = [orig[0].upper() + orig[1:] for orig in orig_ls]
+
+    return " ".join(cap_ls)