123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393 |
- # Arabic S2R using the 3rd-party ArabicTransliterator library:
- # https://github.com/MTG/ArabicTransliterator
- ---
- general:
- name: Arabic
- description: >
- Arabic R2S using a conversion table and S2R using a 3rd party library.
- case_sensitive: false
- parents:
- - _ignore_base
- roman_to_script:
- map:
- # Original table by David Bucknum
- # Last updated 25 January 2019
- # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
- # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
- # Scripts Conceptually"
- # Punctuation marks:
- "*": "\u066D"
- ",": "\u060C"
- ";": "\u061B"
- "?": "\u061F"
- # Exceptions for specific words
- # Allah
- "Alla\u0304h": "\u0627\u0644\u0644\u0647"
- # Qur'an
- "Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646"
- # lillah
- "lilla\u0304h": "\u0644\u0644\u0647"
- # billah
- "billa\u0304h": "\u0628\u0644\u0644\u0647"
- # Rahman
- "Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646"
- # Ruwat
- "Ruwa\u0304t": "\u0631\u0648\u0627\u0629"
- "ruwa\u0304t": "\u0631\u0648\u0627\u0629"
- # Hadha
- "Ha\u0304dha\u0304": "\u0647\u0630\u0627"
- "ha\u0304dha\u0304": "\u0647\u0630\u0627"
- # Hadhihi
- "Ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
- "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
- # dhalika
- "dha\u0304lika": "\u0630\u0644\u0643"
- # Ibn when it appears in the middle of a name sequence
- "ibn": "\u0628\u0646"
- # H[dot below]aya[macron]t
- "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
- "H\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
- # "sh[dot below] as in "Ishaq"
- "sh\u0323": "\u0633\u062D"
- # "s[prime]h" combos
- "s\u02B9h": "\u0633\u0647"
- # "th[dot below]"
- "th\u0323": "\u062A\u062D"
- # dh[dot under]
- "dh\u0323": "\u062F\u062D"
- # La-hu
- "la-hu": "\u0644\u0647"
- # Mi'ah
- "Mi\u02BEah": "\u0645\u0627\u0626\u0629"
- "Mi\u02BCah": "\u0645\u0627\u0626\u0629"
- "mi\u02BEah": "\u0645\u0627\u0626\u0629"
- "mi\u02BCah": "\u0645\u0627\u0626\u0629"
- # Mi'at
- "Mi\u02BEat": "\u0645\u0627\u0626\u0629"
- "Mi\u02BCat": "\u0645\u0627\u0626\u0629"
- "mi\u02BEat": "\u0645\u0627\u0626\u0629"
- "mi\u02BCat": "\u0645\u0627\u0626\u0629"
- # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu
- # will technically use \u06F0-06F9. This needs further discussion with PSD
- # as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
- # Edition statements with Latin number
- "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1"
- "al-T\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2"
- "al-T\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3"
- "al-T\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4"
- "al-T\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5"
- "al-T\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6"
- "al-T\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7"
- "al-T\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8"
- "al-T\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9"
- # Use Basic Arabic-Indic \u0660-0669
- "0": "\u0660"
- "1": "\u0661"
- "2": "\u0662"
- "3": "\u0663"
- "4": "\u0664"
- "5": "\u0665"
- "6": "\u0666"
- "7": "\u0667"
- "8": "\u0668"
- "9": "\u0669"
- # Hyphenated prefixes:
- "wa-": "\u0648"
- "bi-": "\u0628"
- "al-": "\u0627\u0644"
- "lil-": "\u0644\u0644"
- "li-": "\u0644"
- "la\u0304-": "\u0644"
- "fi\u0304-": "\u0641\u064A"
- "ka-": "\u0643"
- # Vowels and vowel/consonant combinations
- "ah%": "\u0629"
- "at%": "\u0629"
- # tanwin
- "an%": "\u0627"
- # ayn-alif combo
- "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
- "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
- "\u02BBA\u0304": "\u0639\u0627"
- "\u02BBa\u0304": "\u0639\u0627"
- "\u02BBI\u0304": "\u0639\u064A"
- "\u02BBi\u0304": "\u0639\u064A"
- "\u02BBU\u0304": "\u0639\u0648"
- "\u02BBu\u0304": "\u0639\u0648"
- "\u02BBU": "\u0639"
- "\u02BBu": "\u0639"
- "%\u02BBA": "\u0639"
- # "%\u02BBa": "\u0639"
- # alif and hamzas for all occasions
- # truncation necessary? It seems to work fine with.
- "i\u0304\u02BEah%": "\u064A\u0626\u0629"
- "i\u0304\u02BCah%": "\u064A\u0626\u0629"
- "i\u0304\u02BEat%": "\u064A\u0626\u0629"
- "i\u0304\u02BCat%": "\u064A\u0626\u0629"
- "i\u02BEa\u0304%": "\u0626\u0627"
- "i\u02BCa\u0304%": "\u0626\u0627"
- "i\u02BE%": "\u0626"
- "i\u02BC%": "\u0626"
- "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
- "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"
- "a\u02BE": "\u0623"
- "a\u02BC": "\u0623"
- "\u02BEi": "\u0626"
- "\u02BCi": "\u0626"
- "\u02BEa\u0304": "\u0622"
- "\u02BCa\u0304": "\u0622"
- "\u02BEa": "\u0623"
- "\u02BCa": "\u0623"
- "y\u02BCah": "\u064A\u0626\u0629"
- "y\u02BEah": "\u064A\u0626\u0629"
- "y\u02BCat": "\u064A\u0626\u0629"
- "y\u02BEat": "\u064A\u0626\u0629"
- # A
- "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A"
- "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"
- "a\u0304\u02BCi": "\u0627\u0626"
- "a\u0304\u02BEi": "\u0627\u0626"
- "a\u0304\u02BC": "\u0627\u0621"
- "a\u0304\u02BE": "\u0627\u0621"
- "%A\u0304": "\u0622"
- "%a\u0304": "\u0622"
- "A\u0304": "\u0627"
- "a\u0304": "\u0627"
- # These next two lines were intended to convert to alif-ayn when it is at
- # # the beginning of a word, definite or indefinine (i.e.
- # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
- "%A\u02BB": "\u0623\u0639"
- "%a\u02BB": "\u0623\u0639"
- "a\u02BB": "\u0639"
- "A\u0301": "\u0649"
- "a\u0301": "\u0649"
- "ayy": "\u064A"
- "%A": "\u0623"
- "%a": "\u0627"
- "A": "\u0623"
- "a": ""
- # I - Capital I at beginning of word is usually alif hamzah-below.
- "i\u0304%": "\u064A"
- "i\u0304y": "\u064A"
- "iy": "\u064A"
- "%I\u0304": "\u0625\u064A"
- "i\u0304": "\u064A"
- "%\u02BBI": "\u0639"
- # "i\u02BB": "\u0625\u0639"
- "I\u02BE": "\u0627\u0626"
- "I\u02BC": "\u0627\u0626"
- "i\u02BE": "\u0626"
- "i\u02BC": "\u0627\u0626"
- "%I": "\u0625"
- "%i": "\u0625"
- "I": "\u0625"
- "i": ""
- # U
- "u\u0304\u02BE": "\u0624"
- "u\u0304\u02BC": "\u0624"
- "%U\u0304w": "\u0623\u0648"
- "%u\u0304w": "\u0623\u0648"
- "%U\u0304": "\u0623\u0648"
- "%u\u0304": "\u0623\u0648"
- "u\u0304w": "\u0648"
- "u\u0304": "\u0648"
- "u\u02BE": "\u0624"
- "u\u02BC": "\u0624"
- "%U": "\u0623"
- "%u": "\u0623"
- "U": "\u0623"
- "u": ""
- # Consonants, with tashdid added
- "B": "\u0628"
- "bb": "\u0628"
- "b": "\u0628"
- "Th": "\u062B"
- "thth": "\u062B"
- "th": "\u062B"
- "T\u0323": "\u0637"
- "t\u0323t\u0323": "\u0637"
- "t\u0323": "\u0637"
- "T": "\u062A"
- "tt": "\u062A"
- "t": "\u062A"
- "J": "\u062C"
- "jj": "\u062C"
- "j": "\u062C"
- "H\u0323": "\u062D"
- "h\u0323h\u0323": "\u062D"
- "h\u0323": "\u062D"
- "H": "\u0647"
- "hh": "\u0647"
- "h": "\u0647"
- "Kh": "\u062E"
- "khkh": "\u062E"
- "kh": "\u062E"
- "K": "\u0643"
- "kk": "\u0643"
- "k": "\u0643"
- "Dh": "\u0630"
- "dhdh": "\u0630"
- "dh": "\u0630"
- "D\u0323": "\u0636"
- "d\u0323d\u0323": "\u0636"
- "d\u0323": "\u0636"
- "D": "\u062F"
- "dd": "\u062F"
- "d": "\u062F"
- "R": "\u0631"
- "rr": "\u0631"
- "r": "\u0631"
- "Z\u0323": "\u0638"
- "z\u0323z\u0323": "\u0638"
- "z\u0323": "\u0638"
- "Z": "\u0632"
- "zz": "\u0632"
- "z": "\u0632"
- "Sh": "\u0634"
- "shsh": "\u0634"
- "sh": "\u0634"
- "S\u0323": "\u0635"
- "s\u0323s\u0323": "\u0635"
- "s\u0323": "\u0635"
- "S": "\u0633"
- "ss": "\u0633"
- "s": "\u0633"
- "Gh": "\u063A"
- "ghgh": "\u063A"
- "gh": "\u063A"
- "F": "\u0641"
- "ff": "\u0641"
- "f": "\u0641"
- "Q": "\u0642"
- "qq": "\u0642"
- "q": "\u0642"
- "L": "\u0644"
- "ll": "\u0644"
- "l": "\u0644"
- "M": "\u0645"
- "mm": "\u0645"
- "m": "\u0645"
- "N": "\u0646"
- "nn": "\u0646"
- "n": "\u0646"
- "W": "\u0648"
- "ww": "\u0648"
- "w": "\u0648"
- "Y": "\u064A"
- "yy": "\u064A"
- "y": "\u064A"
- # non-Arabic consonants:
- "P": "\u067E"
- "p": "\u067E"
- "Ch": "\u0686"
- "ch": "\u0686"
- "V": "\u06A4"
- "v": "\u06A4"
- "G": "\u06AF"
- "g": "\u06AF"
- # Diacritic characters:
- # ain (\u0639) - not transliterated alone:
- "\u02BB": "\u0639"
- # hamza - not romanized
- # "\u0621"
- # hamza (alone in final position)
- "\u02BE%": "\u0621"
- "\u02BC%": "\u0621"
- # Do not know what, if anything, is needed here:
- # tatweel:
- # "\u0640"
- # fathatan:
- # "\u064B"
- # dammatan:
- # "\u064C"
- # kasratan:
- # "\u064D"
- # fatha:
- # "\u064E"
- # damma:
- # "\u064F"
- # kasra:
- # "\u0650"
- # shadda:
- # "\u0651"
- # sukun:
- # "\u0652"
- # superscript alef:
- # "\u0670"
- # alef wasla
- # "\u0671"
- script_to_roman:
- hooks:
- post_config:
- -
- - arabic.camel_tools.s2r_post_config
|