|
@@ -1,28 +1,30 @@
|
|
|
-from camel_tools.utils.charmap import CharMapper
|
|
|
-from camel_tools.utils.dediac import dediac_ar
|
|
|
-from camel_tools.utils.normalize import (
|
|
|
- normalize_unicode,
|
|
|
- normalize_alef_maksura_ar,
|
|
|
- normalize_alef_ar,
|
|
|
- normalize_teh_marbuta_ar)
|
|
|
+from os import path
|
|
|
+from sys import path as syspath
|
|
|
|
|
|
+from scriptshifter import APP_ROOT
|
|
|
from scriptshifter.exceptions import BREAK
|
|
|
|
|
|
|
|
|
-def s2r_post_config(ctx):
|
|
|
-
|
|
|
- src = normalize_unicode(ctx.src)
|
|
|
+CAMEL_DIR = path.join(path.dirname(APP_ROOT), "ext", "arabic_rom")
|
|
|
+MODULE_DIR = path.join(CAMEL_DIR, "src")
|
|
|
+MODEL_DIR = path.join(CAMEL_DIR, "models", "mle")
|
|
|
+MODEL_PATH = path.join(MODEL_DIR, "size1.0.tsv")
|
|
|
+
|
|
|
+syspath.append(MODULE_DIR)
|
|
|
|
|
|
-
|
|
|
- src = normalize_alef_maksura_ar(src)
|
|
|
- src = normalize_alef_ar(src)
|
|
|
- src = normalize_teh_marbuta_ar(src)
|
|
|
|
|
|
-
|
|
|
- src = dediac_ar(src)
|
|
|
+def s2r_post_config(ctx):
|
|
|
+ from predict import mle_predict as mle
|
|
|
+ from predict import translit_rules as tr
|
|
|
+
|
|
|
+ loc_exceptional = tr.load_exceptional_spellings()
|
|
|
+ loc_mappings = tr.load_loc_mappings()
|
|
|
|
|
|
-
|
|
|
- ar2bw = CharMapper.builtin_mapper("ar2bw")
|
|
|
- ctx.dest = ar2bw(src)
|
|
|
+ mle_model = mle.load_mle_model(mle_model_tsv=MODEL_PATH)
|
|
|
+ ctx.dest = mle.apply_mle_translit_simple_backoff(
|
|
|
+ ctx.src,
|
|
|
+ mle_model,
|
|
|
+ loc_mappings,
|
|
|
+ loc_exceptional)
|
|
|
|
|
|
return BREAK
|