arabic.yml 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. # Arabic S2R using the 3rd-party ArabicTransliterator library:
  2. # https://github.com/MTG/ArabicTransliterator
  3. ---
  4. general:
  5. name: Arabic
  6. description: >
  7. Arabic R2S using a conversion table and S2R using a 3rd party library.
  8. case_sensitive: false
  9. parents:
  10. - _ignore_base
  11. roman_to_script:
  12. map:
  13. # Original table by David Bucknum, 5 April 2010
  14. # Updated, 25 January 2019
  15. # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
  16. # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin
  17. # Scripts Conceptually"
  18. # Updated, 26 March 2025 by Randall K. Barry to reverse truncation marks for ScriptShifter
  19. # Punctuation marks:
  20. "*": "\u066D"
  21. ",": "\u060C"
  22. ";": "\u061B"
  23. "?": "\u061F"
  24. # Exceptions for specific words
  25. # Allah
  26. "Alla\u0304h": "\u0627\u0644\u0644\u0647"
  27. # Qur'an
  28. "Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646"
  29. # lillah
  30. "lilla\u0304h": "\u0644\u0644\u0647"
  31. # billah
  32. "billa\u0304h": "\u0628\u0644\u0644\u0647"
  33. # Rahman
  34. "Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646"
  35. # Ruwat
  36. "Ruwa\u0304t": "\u0631\u0648\u0627\u0629"
  37. "ruwa\u0304t": "\u0631\u0648\u0627\u0629"
  38. # Hadha
  39. "Ha\u0304dha\u0304": "\u0647\u0630\u0627"
  40. "ha\u0304dha\u0304": "\u0647\u0630\u0627"
  41. # Hadhihi
  42. "Ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
  43. "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
  44. # dhalika
  45. "dha\u0304lika": "\u0630\u0644\u0643"
  46. # Ibn when it appears in the middle of a name sequence
  47. "ibn": "\u0628\u0646"
  48. # H[dot below]aya[macron]t
  49. "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
  50. "H\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
  51. # "sh[dot below] as in "Ishaq"
  52. "sh\u0323": "\u0633\u062D"
  53. # "s[prime]h" combos
  54. "s\u02B9h": "\u0633\u0647"
  55. # "th[dot below]"
  56. "th\u0323": "\u062A\u062D"
  57. # dh[dot under]
  58. "dh\u0323": "\u062F\u062D"
  59. # La-hu
  60. "la-hu": "\u0644\u0647"
  61. # Mi'ah
  62. "Mi\u02BEah": "\u0645\u0627\u0626\u0629"
  63. "Mi\u02BCah": "\u0645\u0627\u0626\u0629"
  64. "mi\u02BEah": "\u0645\u0627\u0626\u0629"
  65. "mi\u02BCah": "\u0645\u0627\u0626\u0629"
  66. # Mi'at
  67. "Mi\u02BEat": "\u0645\u0627\u0626\u0629"
  68. "Mi\u02BCat": "\u0645\u0627\u0626\u0629"
  69. "mi\u02BEat": "\u0645\u0627\u0626\u0629"
  70. "mi\u02BCat": "\u0645\u0627\u0626\u0629"
  71. # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu
  72. # will technically use \u06F0-06F9. This needs further discussion with PSD
  73. # as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
  74. # Edition statements with Latin number
  75. "al-T\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1"
  76. "al-T\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2"
  77. "al-T\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3"
  78. "al-T\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4"
  79. "al-T\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5"
  80. "al-T\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6"
  81. "al-T\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7"
  82. "al-T\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8"
  83. "al-T\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9"
  84. # Use Basic Arabic-Indic \u0660-0669
  85. "0": "\u0660"
  86. "1": "\u0661"
  87. "2": "\u0662"
  88. "3": "\u0663"
  89. "4": "\u0664"
  90. "5": "\u0665"
  91. "6": "\u0666"
  92. "7": "\u0667"
  93. "8": "\u0668"
  94. "9": "\u0669"
  95. # Hyphenated prefixes:
  96. "wa-": "\u0648"
  97. "bi-": "\u0628"
  98. "al-": "\u0627\u0644"
  99. "lil-": "\u0644\u0644"
  100. "li-": "\u0644"
  101. "la\u0304-": "\u0644"
  102. "fi\u0304-": "\u0641\u064A"
  103. "ka-": "\u0643"
  104. # Vowels and vowel/consonant combinations - ta-marbutah at end of word
  105. "ah%": "\u0629"
  106. "at%": "\u0629"
  107. # tanwin at end of word
  108. "an%": "\u0627"
  109. # ayn-alif combo
  110. "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
  111. "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
  112. "\u02BBA\u0304": "\u0639\u0627"
  113. "\u02BBa\u0304": "\u0639\u0627"
  114. "\u02BBI\u0304Y": "\u0639\u064A"
  115. "\u02BBi\u0304y": "\u0639\u064A"
  116. "\u02BBI\u0304": "\u0639\u064A"
  117. "\u02BBi\u0304": "\u0639\u064A"
  118. "\u02BBU\u0304": "\u0639\u0648"
  119. "\u02BBu\u0304": "\u0639\u0648"
  120. "\u02BBU": "\u0639"
  121. "\u02BBu": "\u0639"
  122. "%\u02BBA": "\u0639"
  123. # "%\u02BBa": "\u0639"
  124. # alif and hamzas for all occasions
  125. # truncation necessary? It seems to work fine with.
  126. "i\u0304\u02BEah%": "\u064A\u0626\u0629"
  127. "i\u0304\u02BCah%": "\u064A\u0626\u0629"
  128. "i\u0304\u02BEat%": "\u064A\u0626\u0629"
  129. "i\u0304\u02BCat%": "\u064A\u0626\u0629"
  130. "i\u02BEa\u0304%": "\u0626\u0627"
  131. "i\u02BCa\u0304%": "\u0626\u0627"
  132. "i\u02BE%": "\u0626"
  133. "i\u02BC%": "\u0626"
  134. "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
  135. "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"
  136. "a\u02BE": "\u0623"
  137. "a\u02BC": "\u0623"
  138. "\u02BEi": "\u0626"
  139. "\u02BCi": "\u0626"
  140. "\u02BEa\u0304": "\u0622"
  141. "\u02BCa\u0304": "\u0622"
  142. "\u02BEa": "\u0623"
  143. "\u02BCa": "\u0623"
  144. "y\u02BCah": "\u064A\u0626\u0629"
  145. "y\u02BEah": "\u064A\u0626\u0629"
  146. "y\u02BCat": "\u064A\u0626\u0629"
  147. "y\u02BEat": "\u064A\u0626\u0629"
  148. # A
  149. "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A"
  150. "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"
  151. "a\u0304\u02BCi": "\u0627\u0626"
  152. "a\u0304\u02BEi": "\u0627\u0626"
  153. "a\u0304\u02BC": "\u0627\u0621"
  154. "a\u0304\u02BE": "\u0627\u0621"
  155. "%A\u0304": "\u0622"
  156. "%a\u0304": "\u0622"
  157. "A\u0304": "\u0627"
  158. "a\u0304": "\u0627"
  159. # These next two lines were intended to convert to alif-ayn when it is at
  160. # # the beginning of a word, definite or indefinine (i.e.
  161. # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
  162. "%A\u02BB": "\u0623\u0639"
  163. "%a\u02BB": "\u0623\u0639"
  164. "a\u02BB": "\u0639"
  165. "A\u0301": "\u0649"
  166. "a\u0301": "\u0649"
  167. "ayy": "\u064A"
  168. "%A": "\u0623"
  169. "%a": "\u0627"
  170. "A": "\u0623"
  171. "a": ""
  172. # I - Capital I at beginning of word is usually alif hamzah-below.
  173. "i\u0304%": "\u064A"
  174. "i\u0304y": "\u064A"
  175. "iy": "\u064A"
  176. "%I\u0304": "\u0625\u064A"
  177. "i\u0304": "\u064A"
  178. "%\u02BBI": "\u0639"
  179. # "i\u02BB": "\u0625\u0639"
  180. "I\u02BE": "\u0627\u0626"
  181. "I\u02BC": "\u0627\u0626"
  182. "i\u02BE": "\u0626"
  183. "i\u02BC": "\u0627\u0626"
  184. "%I": "\u0625"
  185. "%i": "\u0625"
  186. "I": "\u0625"
  187. "i": ""
  188. # U
  189. "u\u0304\u02BE": "\u0624"
  190. "u\u0304\u02BC": "\u0624"
  191. "%U\u0304w": "\u0623\u0648"
  192. "%u\u0304w": "\u0623\u0648"
  193. "%U\u0304": "\u0623\u0648"
  194. "%u\u0304": "\u0623\u0648"
  195. "u\u0304w": "\u0648"
  196. "u\u0304": "\u0648"
  197. "u\u02BE": "\u0624"
  198. "u\u02BC": "\u0624"
  199. "%U": "\u0623"
  200. "%u": "\u0623"
  201. "U": "\u0623"
  202. "u": ""
  203. # Consonants, with tashdid added
  204. "B": "\u0628"
  205. "bb": "\u0628"
  206. "b": "\u0628"
  207. "Th": "\u062B"
  208. "thth": "\u062B"
  209. "th": "\u062B"
  210. "T\u0323": "\u0637"
  211. "t\u0323t\u0323": "\u0637"
  212. "t\u0323": "\u0637"
  213. "T": "\u062A"
  214. "tt": "\u062A"
  215. "t": "\u062A"
  216. "J": "\u062C"
  217. "jj": "\u062C"
  218. "j": "\u062C"
  219. "H\u0323": "\u062D"
  220. "h\u0323h\u0323": "\u062D"
  221. "h\u0323": "\u062D"
  222. "H": "\u0647"
  223. "hh": "\u0647"
  224. "h": "\u0647"
  225. "Kh": "\u062E"
  226. "khkh": "\u062E"
  227. "kh": "\u062E"
  228. "K": "\u0643"
  229. "kk": "\u0643"
  230. "k": "\u0643"
  231. "Dh": "\u0630"
  232. "dhdh": "\u0630"
  233. "dh": "\u0630"
  234. "D\u0323": "\u0636"
  235. "d\u0323d\u0323": "\u0636"
  236. "d\u0323": "\u0636"
  237. "D": "\u062F"
  238. "dd": "\u062F"
  239. "d": "\u062F"
  240. "R": "\u0631"
  241. "rr": "\u0631"
  242. "r": "\u0631"
  243. "Z\u0323": "\u0638"
  244. "z\u0323z\u0323": "\u0638"
  245. "z\u0323": "\u0638"
  246. "Z": "\u0632"
  247. "zz": "\u0632"
  248. "z": "\u0632"
  249. "Sh": "\u0634"
  250. "shsh": "\u0634"
  251. "sh": "\u0634"
  252. "S\u0323": "\u0635"
  253. "s\u0323s\u0323": "\u0635"
  254. "s\u0323": "\u0635"
  255. "S": "\u0633"
  256. "ss": "\u0633"
  257. "s": "\u0633"
  258. "Gh": "\u063A"
  259. "ghgh": "\u063A"
  260. "gh": "\u063A"
  261. "F": "\u0641"
  262. "ff": "\u0641"
  263. "f": "\u0641"
  264. "Q": "\u0642"
  265. "qq": "\u0642"
  266. "q": "\u0642"
  267. "L": "\u0644"
  268. "ll": "\u0644"
  269. "l": "\u0644"
  270. "M": "\u0645"
  271. "mm": "\u0645"
  272. "m": "\u0645"
  273. "N": "\u0646"
  274. "nn": "\u0646"
  275. "n": "\u0646"
  276. "W": "\u0648"
  277. "ww": "\u0648"
  278. "w": "\u0648"
  279. "Y": "\u064A"
  280. "yy": "\u064A"
  281. "y": "\u064A"
  282. # non-Arabic consonants:
  283. "P": "\u067E"
  284. "p": "\u067E"
  285. "Ch": "\u0686"
  286. "ch": "\u0686"
  287. "V": "\u06A4"
  288. "v": "\u06A4"
  289. "G": "\u06AF"
  290. "g": "\u06AF"
  291. # Diacritic characters:
  292. # ain (\u0639) - not transliterated alone:
  293. "\u02BB": "\u0639"
  294. # hamza - not romanized
  295. # "\u0621"
  296. # hamza (alone in final position)
  297. "\u02BE%": "\u0621"
  298. "\u02BC%": "\u0621"
  299. # Do not know what, if anything, is needed here:
  300. # tatweel:
  301. # "\u0640"
  302. # fathatan:
  303. # "\u064B"
  304. # dammatan:
  305. # "\u064C"
  306. # kasratan:
  307. # "\u064D"
  308. # fatha:
  309. # "\u064E"
  310. # damma:
  311. # "\u064F"
  312. # kasra:
  313. # "\u0650"
  314. # shadda:
  315. # "\u0651"
  316. # sukun:
  317. # "\u0652"
  318. # superscript alef:
  319. # "\u0670"
  320. # alef wasla
  321. # "\u0671"
  322. script_to_roman:
  323. hooks:
  324. post_config:
  325. -
  326. - arabic.camel_tools.s2r_post_config