# Arabic S2R using the 3rd-party ArabicTransliterator library: # https://github.com/MTG/ArabicTransliterator --- general: name: Arabic description: > Arabic R2S using a conversion table and S2R using a 3rd party library. case_sensitive: false parents: - _ignore_base roman_to_script: map: # Original table by David Bucknum, 5 April 2010 # Updated, 25 January 2019 # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin # Scripts Conceptually" # Updated, 26 March 2025 by Randall K. Barry to reverse truncation marks for ScriptShifter # Punctuation marks: "*": "\u066D" ",": "\u060C" ";": "\u061B" "?": "\u061F" # Exceptions for specific words # Allah "Alla\u0304h": "\u0627\u0644\u0644\u0647" # Qur'an "Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646" # lillah "lilla\u0304h": "\u0644\u0644\u0647" # billah "billa\u0304h": "\u0628\u0644\u0644\u0647" # Rahman "Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646" # Ruwat "Ruwa\u0304t": "\u0631\u0648\u0627\u0629" "ruwa\u0304t": "\u0631\u0648\u0627\u0629" # Hadha "Ha\u0304dha\u0304": "\u0647\u0630\u0627" "ha\u0304dha\u0304": "\u0647\u0630\u0627" # Hadhihi "Ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647" # dhalika "dha\u0304lika": "\u0630\u0644\u0643" # Ibn when it appears in the middle of a name sequence "ibn": "\u0628\u0646" # H[dot below]aya[macron]t "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629" "H\u0323aya\u0304t": "\u062D\u064A\u0627\u0629" # "sh[dot below] as in "Ishaq" "sh\u0323": "\u0633\u062D" # "s[prime]h" combos "s\u02B9h": "\u0633\u0647" # "th[dot below]" "th\u0323": "\u062A\u062D" # dh[dot under] "dh\u0323": "\u062F\u062D" # La-hu "la-hu": "\u0644\u0647" # Mi'ah "Mi\u02BEah": "\u0645\u0627\u0626\u0629" "Mi\u02BCah": "\u0645\u0627\u0626\u0629" "mi\u02BEah": "\u0645\u0627\u0626\u0629" "mi\u02BCah": "\u0645\u0627\u0626\u0629" # Mi'at "Mi\u02BEat": "\u0645\u0627\u0626\u0629" "Mi\u02BCat": "\u0645\u0627\u0626\u0629" "mi\u02BEat": "\u0645\u0627\u0626\u0629" "mi\u02BCat": "\u0645\u0627\u0626\u0629" # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu # will technically use \u06F0-06F9. This needs further discussion with PSD # as RLIN21 used Hindi numbers, Connexion and Voyager does not.) # Edition statements with Latin number "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1" "al-T\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2" "al-T\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3" "al-T\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4" "al-T\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5" "al-T\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6" "al-T\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7" "al-T\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8" "al-T\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9" # Use Basic Arabic-Indic \u0660-0669 "0": "\u0660" "1": "\u0661" "2": "\u0662" "3": "\u0663" "4": "\u0664" "5": "\u0665" "6": "\u0666" "7": "\u0667" "8": "\u0668" "9": "\u0669" # Hyphenated prefixes: "wa-": "\u0648" "bi-": "\u0628" "al-": "\u0627\u0644" "lil-": "\u0644\u0644" "li-": "\u0644" "la\u0304-": "\u0644" "fi\u0304-": "\u0641\u064A" "ka-": "\u0643" # Vowels and vowel/consonant combinations - ta-marbutah at end of word "%ah": "\u0629" "%at": "\u0629" # tanwin at end of word "%an": "\u0627" # ayn-alif combo "%\u02BBa\u0304\u02BE": "\u0639\u0627\u0621" "%\u02BBa\u0304\u02BC": "\u0639\u0627\u0621" "\u02BBA\u0304": "\u0639\u0627" "\u02BBa\u0304": "\u0639\u0627" "\u02BBI\u0304Y": "\u0639\u064A" "\u02BBi\u0304y": "\u0639\u064A" "\u02BBI\u0304": "\u0639\u064A" "\u02BBi\u0304": "\u0639\u064A" "\u02BBU\u0304": "\u0639\u0648" "\u02BBu\u0304": "\u0639\u0648" "\u02BBU": "\u0639" "\u02BBu": "\u0639" "\u02BBA%": "\u0639" # "\u02BBa%": "\u0639" # alif and hamzas for all occasions # truncation necessary? It seems to work fine with. "%i\u0304\u02BEah": "\u064A\u0626\u0629" "%i\u0304\u02BCah": "\u064A\u0626\u0629" "%i\u0304\u02BEat": "\u064A\u0626\u0629" "%i\u0304\u02BCat": "\u064A\u0626\u0629" "%i\u02BEa\u0304": "\u0626\u0627" "%i\u02BCa\u0304": "\u0626\u0627" "%i\u02BE": "\u0626" "%i\u02BC": "\u0626" "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627" "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627" "a\u02BE": "\u0623" "a\u02BC": "\u0623" "\u02BEi": "\u0626" "\u02BCi": "\u0626" "\u02BEa\u0304": "\u0622" "\u02BCa\u0304": "\u0622" "\u02BEa": "\u0623" "\u02BCa": "\u0623" "y\u02BCah": "\u064A\u0626\u0629" "y\u02BEah": "\u064A\u0626\u0629" "y\u02BCat": "\u064A\u0626\u0629" "y\u02BEat": "\u064A\u0626\u0629" # A "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A" "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A" "a\u0304\u02BCi": "\u0627\u0626" "a\u0304\u02BEi": "\u0627\u0626" "a\u0304\u02BC": "\u0627\u0621" "a\u0304\u02BE": "\u0627\u0621" "A\u0304%": "\u0622" "a\u0304%": "\u0622" "A\u0304": "\u0627" "a\u0304": "\u0627" # These next two lines were intended to convert to alif-ayn when it is at # # the beginning of a word, definite or indefinine (i.e. # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l" "A\u02BB%": "\u0623\u0639" "a\u02BB%": "\u0623\u0639" "a\u02BB": "\u0639" "A\u0301": "\u0649" "a\u0301": "\u0649" "ayy": "\u064A" "A%": "\u0623" "a%": "\u0627" "A": "\u0623" "a": "" # I - Capital I at beginning of word is usually alif hamzah-below. "i\u0304%": "\u064A" "i\u0304y": "\u064A" "iy": "\u064A" "I\u0304%": "\u0625\u064A" "i\u0304": "\u064A" "\u02BBI%": "\u0639" # "i\u02BB": "\u0625\u0639" "I\u02BE": "\u0627\u0626" "I\u02BC": "\u0627\u0626" "i\u02BE": "\u0626" "i\u02BC": "\u0627\u0626" "I%": "\u0625" "i%": "\u0625" "I": "\u0625" "i": "" # U "u\u0304\u02BE": "\u0624" "u\u0304\u02BC": "\u0624" "U\u0304w%": "\u0623\u0648" "u\u0304w%": "\u0623\u0648" "U\u0304%": "\u0623\u0648" "u\u0304%": "\u0623\u0648" "u\u0304w": "\u0648" "u\u0304": "\u0648" "u\u02BE": "\u0624" "u\u02BC": "\u0624" "U%": "\u0623" "u%": "\u0623" "U": "\u0623" "u": "" # Consonants, with tashdid added "B": "\u0628" "bb": "\u0628" "b": "\u0628" "Th": "\u062B" "thth": "\u062B" "th": "\u062B" "T\u0323": "\u0637" "t\u0323t\u0323": "\u0637" "t\u0323": "\u0637" "T": "\u062A" "tt": "\u062A" "t": "\u062A" "J": "\u062C" "jj": "\u062C" "j": "\u062C" "H\u0323": "\u062D" "h\u0323h\u0323": "\u062D" "h\u0323": "\u062D" "H": "\u0647" "hh": "\u0647" "h": "\u0647" "Kh": "\u062E" "khkh": "\u062E" "kh": "\u062E" "K": "\u0643" "kk": "\u0643" "k": "\u0643" "Dh": "\u0630" "dhdh": "\u0630" "dh": "\u0630" "D\u0323": "\u0636" "d\u0323d\u0323": "\u0636" "d\u0323": "\u0636" "D": "\u062F" "dd": "\u062F" "d": "\u062F" "R": "\u0631" "rr": "\u0631" "r": "\u0631" "Z\u0323": "\u0638" "z\u0323z\u0323": "\u0638" "z\u0323": "\u0638" "Z": "\u0632" "zz": "\u0632" "z": "\u0632" "Sh": "\u0634" "shsh": "\u0634" "sh": "\u0634" "S\u0323": "\u0635" "s\u0323s\u0323": "\u0635" "s\u0323": "\u0635" "S": "\u0633" "ss": "\u0633" "s": "\u0633" "Gh": "\u063A" "ghgh": "\u063A" "gh": "\u063A" "F": "\u0641" "ff": "\u0641" "f": "\u0641" "Q": "\u0642" "qq": "\u0642" "q": "\u0642" "L": "\u0644" "ll": "\u0644" "l": "\u0644" "M": "\u0645" "mm": "\u0645" "m": "\u0645" "N": "\u0646" "nn": "\u0646" "n": "\u0646" "W": "\u0648" "ww": "\u0648" "w": "\u0648" "Y": "\u064A" "yy": "\u064A" "y": "\u064A" # non-Arabic consonants: "P": "\u067E" "p": "\u067E" "Ch": "\u0686" "ch": "\u0686" "V": "\u06A4" "v": "\u06A4" "G": "\u06AF" "g": "\u06AF" # Diacritic characters: # ain (\u0639) - not transliterated alone: "\u02BB": "\u0639" # hamza - not romanized # "\u0621" # hamza (alone in final position) "%\u02BE": "\u0621" "%\u02BC": "\u0621" # Do not know what, if anything, is needed here: # tatweel: # "\u0640" # fathatan: # "\u064B" # dammatan: # "\u064C" # kasratan: # "\u064D" # fatha: # "\u064E" # damma: # "\u064F" # kasra: # "\u0650" # shadda: # "\u0651" # sukun: # "\u0652" # superscript alef: # "\u0670" # alef wasla # "\u0671" script_to_roman: hooks: post_config: - - arabic.arabic_romanizer.s2r_post_config