Browse Source

Changes to the arabic table mapping

Matthew Miller 3 weeks ago
parent
commit
53553371c9
1 changed files with 396 additions and 395 deletions
  1. 396 395
      scriptshifter/tables/data/arabic.yml

+ 396 - 395
scriptshifter/tables/data/arabic.yml

@@ -1,395 +1,396 @@
-# Arabic S2R using the 3rd-party ArabicTransliterator library:
-# https://github.com/MTG/ArabicTransliterator
-
----
-general:
-  name: Arabic
-  description: >
-    Arabic R2S using a conversion table and S2R using a 3rd party library.
-  case_sensitive: false
-
-  parents:
-    - _ignore_base
-
-
-roman_to_script:
-  map:
-
-    # Original table by David Bucknum, 5 April 2010
-    # Updated, 25 January 2019
-    # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
-    # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
-    # Scripts Conceptually"
-    # Updated, 26 March 2025 by Randall K. Barry to reverse truncation marks for ScriptShifter
-
-    # Punctuation marks:
-    "*": "\u066D"
-    ",": "\u060C"
-    ";": "\u061B"
-    "?": "\u061F"
-
-    # Exceptions for specific words
-    # Allah
-    "Alla\u0304h": "\u0627\u0644\u0644\u0647"
-
-    # Qur'an
-    "Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646"
-
-    # lillah
-    "lilla\u0304h": "\u0644\u0644\u0647"
-
-    # billah
-    "billa\u0304h": "\u0628\u0644\u0644\u0647"
-
-    # Rahman
-    "Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646"
-
-    # Ruwat
-    "Ruwa\u0304t": "\u0631\u0648\u0627\u0629"
-    "ruwa\u0304t": "\u0631\u0648\u0627\u0629"
-
-    # Hadha
-    "Ha\u0304dha\u0304": "\u0647\u0630\u0627"
-    "ha\u0304dha\u0304": "\u0647\u0630\u0627"
-
-    # Hadhihi
-    "Ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
-    "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
-
-    # dhalika
-    "dha\u0304lika": "\u0630\u0644\u0643"
-
-    # Ibn when it appears in the middle of a name sequence
-    "ibn": "\u0628\u0646"
-
-    # H[dot below]aya[macron]t
-    "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
-    "H\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
-
-    # "sh[dot below] as in "Ishaq"
-
-    "sh\u0323": "\u0633\u062D"
-
-    # "s[prime]h" combos
-
-    "s\u02B9h": "\u0633\u0647"
-
-    # "th[dot below]"
-
-    "th\u0323": "\u062A\u062D"
-
-    # dh[dot under]
-
-    "dh\u0323": "\u062F\u062D"
-
-    # La-hu
-
-    "la-hu": "\u0644\u0647"
-
-    # Mi'ah
-    "Mi\u02BEah": "\u0645\u0627\u0626\u0629"
-    "Mi\u02BCah": "\u0645\u0627\u0626\u0629"
-    "mi\u02BEah": "\u0645\u0627\u0626\u0629"
-    "mi\u02BCah": "\u0645\u0627\u0626\u0629"
-
-    # Mi'at
-    "Mi\u02BEat": "\u0645\u0627\u0626\u0629"
-    "Mi\u02BCat": "\u0645\u0627\u0626\u0629"
-    "mi\u02BEat": "\u0645\u0627\u0626\u0629"
-    "mi\u02BCat": "\u0645\u0627\u0626\u0629"
-
-    # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu
-    # will technically use \u06F0-06F9. This needs further discussion with PSD
-    # as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
-
-    # Edition statements with Latin number
-    "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1"
-    "al-T\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2"
-    "al-T\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3"
-    "al-T\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4"
-    "al-T\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5"
-    "al-T\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6"
-    "al-T\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7"
-    "al-T\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8"
-    "al-T\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9"
-
-    # Use Basic Arabic-Indic \u0660-0669
-    "0": "\u0660"
-    "1": "\u0661"
-    "2": "\u0662"
-    "3": "\u0663"
-    "4": "\u0664"
-    "5": "\u0665"
-    "6": "\u0666"
-    "7": "\u0667"
-    "8": "\u0668"
-    "9": "\u0669"
-
-    # Hyphenated prefixes:
-    "wa-": "\u0648"
-    "bi-": "\u0628"
-    "al-": "\u0627\u0644"
-    "lil-": "\u0644\u0644"
-    "li-": "\u0644"
-    "la\u0304-": "\u0644"
-    "fi\u0304-": "\u0641\u064A"
-    "ka-": "\u0643"
-
-    # Vowels and vowel/consonant combinations - ta-marbutah at end of word
-    "ah%": "\u0629"
-    "at%": "\u0629"
-
-    # tanwin at end of word
-    "an%": "\u0627"
-
-    # ayn-alif combo
-    "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
-    "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
-
-    "\u02BBA\u0304": "\u0639\u0627"
-    "\u02BBa\u0304": "\u0639\u0627"
-
-    "\u02BBI\u0304Y": "\u0639\u064A"
-    "\u02BBi\u0304y": "\u0639\u064A"
-    "\u02BBI\u0304": "\u0639\u064A"
-    "\u02BBi\u0304": "\u0639\u064A"
-
-    "\u02BBU\u0304": "\u0639\u0648"
-    "\u02BBu\u0304": "\u0639\u0648"
-    "\u02BBU": "\u0639"
-    "\u02BBu": "\u0639"
-
-    "%\u02BBA": "\u0639"
-    # "%\u02BBa": "\u0639"
-
-    # alif and hamzas for all occasions
-
-    # truncation necessary? It seems to work fine with.
-
-    "i\u0304\u02BEah%": "\u064A\u0626\u0629"
-    "i\u0304\u02BCah%": "\u064A\u0626\u0629"
-
-    "i\u0304\u02BEat%": "\u064A\u0626\u0629"
-    "i\u0304\u02BCat%": "\u064A\u0626\u0629"
-
-    "i\u02BEa\u0304%": "\u0626\u0627"
-    "i\u02BCa\u0304%": "\u0626\u0627"
-
-    "i\u02BE%": "\u0626"
-    "i\u02BC%": "\u0626"
-    "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
-    "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"
-
-    "a\u02BE": "\u0623"
-    "a\u02BC": "\u0623"
-    "\u02BEi": "\u0626"
-    "\u02BCi": "\u0626"
-    "\u02BEa\u0304": "\u0622"
-    "\u02BCa\u0304": "\u0622"
-    "\u02BEa": "\u0623"
-    "\u02BCa": "\u0623"
-
-    "y\u02BCah": "\u064A\u0626\u0629"
-    "y\u02BEah": "\u064A\u0626\u0629"
-
-    "y\u02BCat": "\u064A\u0626\u0629"
-    "y\u02BEat": "\u064A\u0626\u0629"
-
-    # A
-
-    "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A"
-    "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"
-
-    "a\u0304\u02BCi": "\u0627\u0626"
-    "a\u0304\u02BEi": "\u0627\u0626"
-    "a\u0304\u02BC": "\u0627\u0621"
-    "a\u0304\u02BE": "\u0627\u0621"
-    "%A\u0304": "\u0622"
-    "%a\u0304": "\u0622"
-    "A\u0304": "\u0627"
-    "a\u0304": "\u0627"
-
-    # These next two lines were intended to convert to alif-ayn when it is at
-    # # the beginning of a word, definite or indefinine (i.e.
-    # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
-    "%A\u02BB": "\u0623\u0639"
-    "%a\u02BB": "\u0623\u0639"
-    "a\u02BB": "\u0639"
-    "A\u0301": "\u0649"
-    "a\u0301": "\u0649"
-
-    "ayy": "\u064A"
-    "%A": "\u0623"
-    "%a": "\u0627"
-    "A": "\u0623"
-    "a": ""
-
-    # I - Capital I at beginning of word is usually alif hamzah-below.
-
-    "i\u0304%": "\u064A"
-    "i\u0304y": "\u064A"
-    "iy": "\u064A"
-    "%I\u0304": "\u0625\u064A"
-    "i\u0304": "\u064A"
-    "%\u02BBI": "\u0639"
-
-    # "i\u02BB": "\u0625\u0639"
-
-    "I\u02BE": "\u0627\u0626"
-    "I\u02BC": "\u0627\u0626"
-    "i\u02BE": "\u0626"
-    "i\u02BC": "\u0627\u0626"
-
-    "%I": "\u0625"
-    "%i": "\u0625"
-    "I": "\u0625"
-    "i": ""
-
-    # U
-
-    "u\u0304\u02BE": "\u0624"
-    "u\u0304\u02BC": "\u0624"
-    "%U\u0304w": "\u0623\u0648"
-    "%u\u0304w": "\u0623\u0648"
-    "%U\u0304": "\u0623\u0648"
-    "%u\u0304": "\u0623\u0648"
-    "u\u0304w": "\u0648"
-    "u\u0304": "\u0648"
-    "u\u02BE": "\u0624"
-    "u\u02BC": "\u0624"
-
-    "%U": "\u0623"
-    "%u": "\u0623"
-    "U": "\u0623"
-    "u": ""
-
-    # Consonants, with tashdid added
-
-    "B": "\u0628"
-    "bb": "\u0628"
-    "b": "\u0628"
-    "Th": "\u062B"
-    "thth": "\u062B"
-    "th": "\u062B"
-    "T\u0323": "\u0637"
-    "t\u0323t\u0323": "\u0637"
-    "t\u0323": "\u0637"
-    "T": "\u062A"
-    "tt": "\u062A"
-    "t": "\u062A"
-    "J": "\u062C"
-    "jj": "\u062C"
-    "j": "\u062C"
-    "H\u0323": "\u062D"
-    "h\u0323h\u0323": "\u062D"
-    "h\u0323": "\u062D"
-    "H": "\u0647"
-    "hh": "\u0647"
-    "h": "\u0647"
-    "Kh": "\u062E"
-    "khkh": "\u062E"
-    "kh": "\u062E"
-    "K": "\u0643"
-    "kk": "\u0643"
-    "k": "\u0643"
-    "Dh": "\u0630"
-    "dhdh": "\u0630"
-    "dh": "\u0630"
-    "D\u0323": "\u0636"
-    "d\u0323d\u0323": "\u0636"
-    "d\u0323": "\u0636"
-    "D": "\u062F"
-    "dd": "\u062F"
-    "d": "\u062F"
-    "R": "\u0631"
-    "rr": "\u0631"
-    "r": "\u0631"
-    "Z\u0323": "\u0638"
-    "z\u0323z\u0323": "\u0638"
-    "z\u0323": "\u0638"
-    "Z": "\u0632"
-    "zz": "\u0632"
-    "z": "\u0632"
-    "Sh": "\u0634"
-    "shsh": "\u0634"
-    "sh": "\u0634"
-    "S\u0323": "\u0635"
-    "s\u0323s\u0323": "\u0635"
-    "s\u0323": "\u0635"
-    "S": "\u0633"
-    "ss": "\u0633"
-    "s": "\u0633"
-    "Gh": "\u063A"
-    "ghgh": "\u063A"
-    "gh": "\u063A"
-    "F": "\u0641"
-    "ff": "\u0641"
-    "f": "\u0641"
-    "Q": "\u0642"
-    "qq": "\u0642"
-    "q": "\u0642"
-    "L": "\u0644"
-    "ll": "\u0644"
-    "l": "\u0644"
-    "M": "\u0645"
-    "mm": "\u0645"
-    "m": "\u0645"
-    "N": "\u0646"
-    "nn": "\u0646"
-    "n": "\u0646"
-    "W": "\u0648"
-    "ww": "\u0648"
-    "w": "\u0648"
-    "Y": "\u064A"
-    "yy": "\u064A"
-    "y": "\u064A"
-
-    # non-Arabic consonants:
-    "P": "\u067E"
-    "p": "\u067E"
-    "Ch": "\u0686"
-    "ch": "\u0686"
-    "V": "\u06A4"
-    "v": "\u06A4"
-    "G": "\u06AF"
-    "g": "\u06AF"
-
-    # Diacritic characters:
-    # ain (\u0639) - not transliterated alone:
-    "\u02BB": "\u0639"
-    # hamza - not romanized
-    # "\u0621"
-    # hamza (alone in final position)
-    "\u02BE%": "\u0621"
-    "\u02BC%": "\u0621"
-
-    # Do not know what, if anything, is needed here:
-    # tatweel:
-    # "\u0640"
-    # fathatan:
-    # "\u064B"
-    # dammatan:
-    # "\u064C"
-    # kasratan:
-    # "\u064D"
-    # fatha:
-    # "\u064E"
-    # damma:
-    # "\u064F"
-    # kasra:
-    # "\u0650"
-    # shadda:
-    # "\u0651"
-    # sukun:
-    # "\u0652"
-    # superscript alef:
-    # "\u0670"
-    # alef wasla
-    # "\u0671"
-
-
-script_to_roman:
-  hooks:
-    post_config:
-      -
-        - arabic.arabic_romanizer.s2r_post_config
+# Arabic S2R using the 3rd-party ArabicTransliterator library:
+# https://github.com/MTG/ArabicTransliterator
+
+---
+general:
+  name: Arabic
+  description: >
+    Arabic R2S using a conversion table and S2R using a 3rd party library.
+  case_sensitive: false
+
+  parents:
+    - _ignore_base
+
+
+roman_to_script:
+  map:
+
+    # Original table by David Bucknum, 5 April 2010
+    # Updated, 25 January 2019
+    # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
+    # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
+    # Scripts Conceptually"
+    # Updated, 26 March 2025 by Randall K. Barry to reverse truncation marks for ScriptShifter
+
+
+    # Punctuation marks:
+    "*": "\u066D"
+    ",": "\u060C"
+    ";": "\u061B"
+    "?": "\u061F"
+
+    # Exceptions for specific words
+    # Allah
+    "Alla\u0304h": "\u0627\u0644\u0644\u0647"
+
+    # Qur'an
+    "Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646"
+
+    # lillah
+    "lilla\u0304h": "\u0644\u0644\u0647"
+
+    # billah
+    "billa\u0304h": "\u0628\u0644\u0644\u0647"
+
+    # Rahman
+    "Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646"
+
+    # Ruwat
+    "Ruwa\u0304t": "\u0631\u0648\u0627\u0629"
+    "ruwa\u0304t": "\u0631\u0648\u0627\u0629"
+
+    # Hadha
+    "Ha\u0304dha\u0304": "\u0647\u0630\u0627"
+    "ha\u0304dha\u0304": "\u0647\u0630\u0627"
+
+    # Hadhihi
+    "Ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
+    "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
+
+    # dhalika
+    "dha\u0304lika": "\u0630\u0644\u0643"
+
+    # Ibn when it appears in the middle of a name sequence
+    "ibn": "\u0628\u0646"
+
+    # H[dot below]aya[macron]t
+    "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
+    "H\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
+
+    # "sh[dot below] as in "Ishaq"
+
+    "sh\u0323": "\u0633\u062D"
+
+    # "s[prime]h" combos
+
+    "s\u02B9h": "\u0633\u0647"
+
+    # "th[dot below]"
+
+    "th\u0323": "\u062A\u062D"
+
+    # dh[dot under]
+
+    "dh\u0323": "\u062F\u062D"
+
+    # La-hu
+
+    "la-hu": "\u0644\u0647"
+
+    # Mi'ah
+    "Mi\u02BEah": "\u0645\u0627\u0626\u0629"
+    "Mi\u02BCah": "\u0645\u0627\u0626\u0629"
+    "mi\u02BEah": "\u0645\u0627\u0626\u0629"
+    "mi\u02BCah": "\u0645\u0627\u0626\u0629"
+
+    # Mi'at
+    "Mi\u02BEat": "\u0645\u0627\u0626\u0629"
+    "Mi\u02BCat": "\u0645\u0627\u0626\u0629"
+    "mi\u02BEat": "\u0645\u0627\u0626\u0629"
+    "mi\u02BCat": "\u0645\u0627\u0626\u0629"
+
+    # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu
+    # will technically use \u06F0-06F9. This needs further discussion with PSD
+    # as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
+
+    # Edition statements with Latin number
+    "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1"
+    "al-T\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2"
+    "al-T\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3"
+    "al-T\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4"
+    "al-T\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5"
+    "al-T\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6"
+    "al-T\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7"
+    "al-T\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8"
+    "al-T\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9"
+
+    # Use Basic Arabic-Indic \u0660-0669
+    "0": "\u0660"
+    "1": "\u0661"
+    "2": "\u0662"
+    "3": "\u0663"
+    "4": "\u0664"
+    "5": "\u0665"
+    "6": "\u0666"
+    "7": "\u0667"
+    "8": "\u0668"
+    "9": "\u0669"
+
+    # Hyphenated prefixes:
+    "wa-": "\u0648"
+    "bi-": "\u0628"
+    "al-": "\u0627\u0644"
+    "lil-": "\u0644\u0644"
+    "li-": "\u0644"
+    "la\u0304-": "\u0644"
+    "fi\u0304-": "\u0641\u064A"
+    "ka-": "\u0643"
+
+    # Vowels and vowel/consonant combinations - ta-marbutah at end of word
+    "%ah": "\u0629"
+    "%at": "\u0629"
+
+    # tanwin at end of word
+    "%an": "\u0627"
+
+    # ayn-alif combo
+    "%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621"
+    "%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621"
+
+    "\u02BBA\u0304": "\u0639\u0627"
+    "\u02BBa\u0304": "\u0639\u0627"
+
+    "\u02BBI\u0304Y": "\u0639\u064A"
+    "\u02BBi\u0304y": "\u0639\u064A"
+    "\u02BBI\u0304": "\u0639\u064A"
+    "\u02BBi\u0304": "\u0639\u064A"
+
+    "\u02BBU\u0304": "\u0639\u0648"
+    "\u02BBu\u0304": "\u0639\u0648"
+    "\u02BBU": "\u0639"
+    "\u02BBu": "\u0639"
+
+    "\u02BBA%": "\u0639"
+    # "\u02BBa%": "\u0639"
+
+    # alif and hamzas for all occasions
+
+    # truncation necessary? It seems to work fine with.
+
+    "%i\u0304\u02BEah": "\u064A\u0626\u0629"
+    "%i\u0304\u02BCah": "\u064A\u0626\u0629"
+
+    "%i\u0304\u02BEat": "\u064A\u0626\u0629"
+    "%i\u0304\u02BCat": "\u064A\u0626\u0629"
+
+    "%i\u02BEa\u0304": "\u0626\u0627"
+    "%i\u02BCa\u0304": "\u0626\u0627"
+
+    "%i\u02BE": "\u0626"
+    "%i\u02BC": "\u0626"
+    "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
+    "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"
+
+    "a\u02BE": "\u0623"
+    "a\u02BC": "\u0623"
+    "\u02BEi": "\u0626"
+    "\u02BCi": "\u0626"
+    "\u02BEa\u0304": "\u0622"
+    "\u02BCa\u0304": "\u0622"
+    "\u02BEa": "\u0623"
+    "\u02BCa": "\u0623"
+
+    "y\u02BCah": "\u064A\u0626\u0629"
+    "y\u02BEah": "\u064A\u0626\u0629"
+
+    "y\u02BCat": "\u064A\u0626\u0629"
+    "y\u02BEat": "\u064A\u0626\u0629"
+
+    # A
+
+    "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A"
+    "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"
+
+    "a\u0304\u02BCi": "\u0627\u0626"
+    "a\u0304\u02BEi": "\u0627\u0626"
+    "a\u0304\u02BC": "\u0627\u0621"
+    "a\u0304\u02BE": "\u0627\u0621"
+    "A\u0304%": "\u0622"
+    "a\u0304%": "\u0622"
+    "A\u0304": "\u0627"
+    "a\u0304": "\u0627"
+
+    # These next two lines were intended to convert to alif-ayn when it is at
+    # # the beginning of a word, definite or indefinine (i.e.
+    # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
+    "A\u02BB%": "\u0623\u0639"
+    "a\u02BB%": "\u0623\u0639"
+    "a\u02BB": "\u0639"
+    "A\u0301": "\u0649"
+    "a\u0301": "\u0649"
+
+    "ayy": "\u064A"
+    "A%": "\u0623"
+    "a%": "\u0627"
+    "A": "\u0623"
+    "a": ""
+
+    # I - Capital I at beginning of word is usually alif hamzah-below.
+
+    "i\u0304%": "\u064A"
+    "i\u0304y": "\u064A"
+    "iy": "\u064A"
+    "I\u0304%": "\u0625\u064A"
+    "i\u0304": "\u064A"
+    "\u02BBI%": "\u0639"
+
+    # "i\u02BB": "\u0625\u0639"
+
+    "I\u02BE": "\u0627\u0626"
+    "I\u02BC": "\u0627\u0626"
+    "i\u02BE": "\u0626"
+    "i\u02BC": "\u0627\u0626"
+
+    "I%": "\u0625"
+    "i%": "\u0625"
+    "I": "\u0625"
+    "i": ""
+
+    # U
+
+    "u\u0304\u02BE": "\u0624"
+    "u\u0304\u02BC": "\u0624"
+    "U\u0304w%": "\u0623\u0648"
+    "u\u0304w%": "\u0623\u0648"
+    "U\u0304%": "\u0623\u0648"
+    "u\u0304%": "\u0623\u0648"
+    "u\u0304w": "\u0648"
+    "u\u0304": "\u0648"
+    "u\u02BE": "\u0624"
+    "u\u02BC": "\u0624"
+
+    "U%": "\u0623"
+    "u%": "\u0623"
+    "U": "\u0623"
+    "u": ""
+
+    # Consonants, with tashdid added
+
+    "B": "\u0628"
+    "bb": "\u0628"
+    "b": "\u0628"
+    "Th": "\u062B"
+    "thth": "\u062B"
+    "th": "\u062B"
+    "T\u0323": "\u0637"
+    "t\u0323t\u0323": "\u0637"
+    "t\u0323": "\u0637"
+    "T": "\u062A"
+    "tt": "\u062A"
+    "t": "\u062A"
+    "J": "\u062C"
+    "jj": "\u062C"
+    "j": "\u062C"
+    "H\u0323": "\u062D"
+    "h\u0323h\u0323": "\u062D"
+    "h\u0323": "\u062D"
+    "H": "\u0647"
+    "hh": "\u0647"
+    "h": "\u0647"
+    "Kh": "\u062E"
+    "khkh": "\u062E"
+    "kh": "\u062E"
+    "K": "\u0643"
+    "kk": "\u0643"
+    "k": "\u0643"
+    "Dh": "\u0630"
+    "dhdh": "\u0630"
+    "dh": "\u0630"
+    "D\u0323": "\u0636"
+    "d\u0323d\u0323": "\u0636"
+    "d\u0323": "\u0636"
+    "D": "\u062F"
+    "dd": "\u062F"
+    "d": "\u062F"
+    "R": "\u0631"
+    "rr": "\u0631"
+    "r": "\u0631"
+    "Z\u0323": "\u0638"
+    "z\u0323z\u0323": "\u0638"
+    "z\u0323": "\u0638"
+    "Z": "\u0632"
+    "zz": "\u0632"
+    "z": "\u0632"
+    "Sh": "\u0634"
+    "shsh": "\u0634"
+    "sh": "\u0634"
+    "S\u0323": "\u0635"
+    "s\u0323s\u0323": "\u0635"
+    "s\u0323": "\u0635"
+    "S": "\u0633"
+    "ss": "\u0633"
+    "s": "\u0633"
+    "Gh": "\u063A"
+    "ghgh": "\u063A"
+    "gh": "\u063A"
+    "F": "\u0641"
+    "ff": "\u0641"
+    "f": "\u0641"
+    "Q": "\u0642"
+    "qq": "\u0642"
+    "q": "\u0642"
+    "L": "\u0644"
+    "ll": "\u0644"
+    "l": "\u0644"
+    "M": "\u0645"
+    "mm": "\u0645"
+    "m": "\u0645"
+    "N": "\u0646"
+    "nn": "\u0646"
+    "n": "\u0646"
+    "W": "\u0648"
+    "ww": "\u0648"
+    "w": "\u0648"
+    "Y": "\u064A"
+    "yy": "\u064A"
+    "y": "\u064A"
+
+    # non-Arabic consonants:
+    "P": "\u067E"
+    "p": "\u067E"
+    "Ch": "\u0686"
+    "ch": "\u0686"
+    "V": "\u06A4"
+    "v": "\u06A4"
+    "G": "\u06AF"
+    "g": "\u06AF"
+
+    # Diacritic characters:
+    # ain (\u0639) - not transliterated alone:
+    "\u02BB": "\u0639"
+    # hamza - not romanized
+    # "\u0621"
+    # hamza (alone in final position)
+    "%\u02BE": "\u0621"
+    "%\u02BC": "\u0621"
+
+    # Do not know what, if anything, is needed here:
+    # tatweel:
+    # "\u0640"
+    # fathatan:
+    # "\u064B"
+    # dammatan:
+    # "\u064C"
+    # kasratan:
+    # "\u064D"
+    # fatha:
+    # "\u064E"
+    # damma:
+    # "\u064F"
+    # kasra:
+    # "\u0650"
+    # shadda:
+    # "\u0651"
+    # sukun:
+    # "\u0652"
+    # superscript alef:
+    # "\u0670"
+    # alef wasla
+    # "\u0671"
+
+
+script_to_roman:
+  hooks:
+    post_config:
+      -
+        - arabic.arabic_romanizer.s2r_post_config