ArabicRomanization.cfg 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. # version 1.1.1
  2. # Original table by David Bucknum
  3. # Last updated 25 January 2019
  4. # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
  5. # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin Scripts Conceptually"
  6. [General]
  7. Name=Arabic
  8. Truncation=%
  9. [RomanToScript]
  10. FieldsIncluded=100 110 111 130 240 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
  11. SubfieldsAlwaysExcluded=uvxy0123456789
  12. OtherSubfieldsExcludedByTag=100/e 110/e 111/j 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
  13. Subfield6Code=(3
  14. IncludeFormattingCharactersLcPattern=True
  15. # RDA boilerplate phrases not transliterated:
  16. Place of publication not identified=Place of publication not identified
  17. publisher not identified=publisher not identified
  18. # Punctuation marks:
  19. # %=U+066A ; cannot transliterate the truncation character
  20. *=U+066D
  21. ,=U+060C
  22. ;=U+061B
  23. ?=U+061F
  24. # Exceptions for specific words
  25. # Allah
  26. AllaU+0304h=U+0627U+0644U+0644U+0647
  27. # Qur'an
  28. QurU+02BCaU+0304n=U+0642U+0631U+0622U+0646
  29. # lillah
  30. lillaU+0304h=U+0644U+0644U+0647
  31. # billah
  32. billaU+0304h=U+0628U+0644U+0644U+0647
  33. # Rahman
  34. RahU+0323maU+0304n=U+0631U+062DU+0645U+0646
  35. # Ruwat
  36. RuwaU+0304t=U+0631U+0648U+0627U+0629
  37. ruwaU+0304t=U+0631U+0648U+0627U+0629
  38. # Hadha
  39. HaU+0304dhaU+0304=U+0647U+0630U+0627
  40. haU+0304dhaU+0304=U+0647U+0630U+0627
  41. # Hadhihi
  42. HaU+0304dhiU+0304hi=U+0647U+0630U+0647
  43. haU+0304dhiU+0304hi=U+0647U+0630U+0647
  44. # dhalika
  45. dhaU+0304lika=U+0630U+0644U+0643
  46. # Ibn when it appears in the middle of a name sequence
  47. ibn=U+0628U+0646
  48. # H[dot below]aya[macron]t
  49. hU+0323ayau+0304t=U+062DU+064AU+0627U+0629
  50. HU+0323ayau+0304t=U+062DU+064AU+0627U+0629
  51. # "sh[dot below] as in "Ishaq"
  52. %shU+0323%=U+0633U+062D
  53. # "s[prime]h" combos
  54. %sU+02B9h%=U+0633U+0647
  55. # "th[dot below]"
  56. %thU+0323%=U+062AU+062D
  57. # dh[dot under]
  58. %dhU+0323%=U+062FU+062D
  59. # La-hu
  60. la-hu=U+0644U+0647
  61. # Mi'ah
  62. MiU+02BEah=U+0645U+0627U+0626U+0629
  63. MiU+02BCah=U+0645U+0627U+0626U+0629
  64. miU+02BEah=U+0645U+0627U+0626U+0629
  65. miU+02BCah=U+0645U+0627U+0626U+0629
  66. # Mi'at
  67. MiU+02BEat=U+0645U+0627U+0626U+0629
  68. MiU+02BCat=U+0645U+0627U+0626U+0629
  69. miU+02BEat=U+0645U+0627U+0626U+0629
  70. miU+02BCat=U+0645U+0627U+0626U+0629
  71. # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu will technically use U+06F0-06F9. This needs further discussion with PSD as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
  72. # Edition statements with Latin number
  73. al-TU+0323abU+02BBah 1=U+0627U+0644U+0637U+0628U+0639U+0629 1
  74. al-TU+0323abU+02BBah 2=U+0627U+0644U+0637U+0628U+0639U+0629 2
  75. al-TU+0323abU+02BBah 3=U+0627U+0644U+0637U+0628U+0639U+0629 3
  76. al-TU+0323abU+02BBah 4=U+0627U+0644U+0637U+0628U+0639U+0629 4
  77. al-TU+0323abU+02BBah 5=U+0627U+0644U+0637U+0628U+0639U+0629 5
  78. al-TU+0323abU+02BBah 6=U+0627U+0644U+0637U+0628U+0639U+0629 6
  79. al-TU+0323abU+02BBah 7=U+0627U+0644U+0637U+0628U+0639U+0629 7
  80. al-TU+0323abU+02BBah 8=U+0627U+0644U+0637U+0628U+0639U+0629 8
  81. al-TU+0323abU+02BBah 9=U+0627U+0644U+0637U+0628U+0639U+0629 9
  82. # Use Basic Arabic-Indic U+0660-0669
  83. 0=U+0660
  84. 1=U+0661
  85. 2=U+0662
  86. 3=U+0663
  87. 4=U+0664
  88. 5=U+0665
  89. 6=U+0666
  90. 7=U+0667
  91. 8=U+0668
  92. 9=U+0669
  93. # Hyphenated prefixes:
  94. wa-=U+0648
  95. bi-=U+0628
  96. al-=U+0627U+0644
  97. lil-=U+0644U+0644
  98. li-=U+0644
  99. laU+0304-=U+0644
  100. fiU+0304-=U+0641U+064A
  101. ka-=U+0643
  102. # Vowels and vowel/consonant combinations
  103. %ah=U+0629
  104. %at=U+0629
  105. #tanwin
  106. %an=U+0627
  107. # ayn-alif combo
  108. %U+02BBaU+0304U+02BE=U+0639U+0627U+0621
  109. %U+02BBaU+0304U+02BC=U+0639U+0627U+0621
  110. U+02BBAU+0304=U+0639U+0627
  111. U+02BBaU+0304=U+0639U+0627
  112. U+02BBIU+0304=U+0639U+064A
  113. U+02BBiU+0304=U+0639U+064A
  114. U+02BBUU+0304=U+0639U+0648
  115. U+02BBuU+0304=U+0639U+0648
  116. U+02BBU=U+0639
  117. U+02BBu=U+0639
  118. U+02BBA%=U+0639
  119. #U+02BBa%=U+0639
  120. # alif and hamzas for all occasions
  121. # truncation necessary? It seems to work fine with.
  122. %iU+0304U+02BEah=U+064AU+0626U+0629
  123. %iU+0304U+02BCah=U+064AU+0626U+0629
  124. %iU+0304U+02BEat=U+064AU+0626U+0629
  125. %iU+0304U+02BCat=U+064AU+0626U+0629
  126. %iU+02BEaU+0304=U+0626U+0627
  127. %iU+02BCaU+0304=U+0626U+0627
  128. %iU+02BE=U+0626
  129. %iU+02BC=U+0626
  130. aU+0304U+02BEaU+0304=U+0627U+0621U+0627
  131. aU+0304U+02BCaU+0304=U+0627U+0621U+0627
  132. aU+02BE=U+0623
  133. aU+02BC=U+0623
  134. U+02BEi=U+0626
  135. U+02BCi=U+0626
  136. U+02BEaU+0304=U+0622
  137. U+02BCaU+0304=U+0622
  138. U+02BEa=U+0623
  139. U+02BCa=U+0623
  140. yU+02BCah=U+064AU+0626U+0629
  141. yU+02BEah=U+064AU+0626U+0629
  142. yU+02BCat=U+064AU+0626U+0629
  143. yU+02BEat=U+064AU+0626U+0629
  144. # A
  145. aU+0304U+02BCiU+0304=U+0627U+0626U+064A
  146. aU+0304U+02BEiU+0304=U+0627U+0626U+064A
  147. aU+0304U+02BCi=U+0627U+0626
  148. aU+0304U+02BEi=U+0627U+0626
  149. aU+0304U+02BC=U+0627U+0621
  150. aU+0304U+02BE=U+0627U+0621
  151. AU+0304%=U+0622
  152. aU+0304%=U+0622
  153. AU+0304=U+0627
  154. aU+0304=U+0627
  155. # These next two lines were intended to convert to alif-ayn when it is at the beginning of a word, definite or indefinine (i.e. al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
  156. AU+02BB%=U+0623U+0639
  157. aU+02BB%=U+0623U+0639
  158. aU+02BB=U+0639
  159. AU+0301=U+0649
  160. aU+0301=U+0649
  161. ayy=U+064A
  162. A%=U+0623
  163. a%=U+0627
  164. A=U+0623
  165. a=
  166. # I - Capital I at beginning of word is usually alif hamzah-below.
  167. %iU+0304=U+064A
  168. iU+0304y=U+064A
  169. iy=U+064A
  170. IU+0304%=U+0625U+064A
  171. iU+0304=U+064A
  172. U+02BBI%=U+0639
  173. #iU+02BB=U+0625U+0639
  174. IU+02BE=U+0627U+0626
  175. IU+02BC=U+0627U+0626
  176. iU+02BE=U+0626
  177. iU+02BC=U+0627U+0626
  178. I%=U+0625
  179. i%=U+0625
  180. I=U+0625
  181. i=
  182. # U
  183. uU+0304U+02BE=U+0624
  184. uU+0304U+02BC=U+0624
  185. UU+0304w%=U+0623U+0648
  186. uU+0304w%=U+0623U+0648
  187. UU+0304%=U+0623U+0648
  188. uU+0304%=U+0623U+0648
  189. uU+0304w=U+0648
  190. uU+0304=U+0648
  191. uU+02BE=U+0624
  192. uU+02BC=U+0624
  193. U%=U+0623
  194. u%=U+0623
  195. U=U+0623
  196. u=
  197. # Consonants, with tashdid added
  198. B=U+0628
  199. bb=U+0628
  200. b=U+0628
  201. Th=U+062B
  202. thth=U+062B
  203. th=U+062B
  204. TU+0323=U+0637
  205. tU+0323tU+0323=U+0637
  206. tU+0323=U+0637
  207. T=U+062A
  208. tt=U+062A
  209. t=U+062A
  210. J=U+062C
  211. jj=U+062C
  212. j=U+062C
  213. HU+0323=U+062D
  214. hU+0323hU+0323=U+062D
  215. hU+0323=U+062D
  216. H=U+0647
  217. hh=U+0647
  218. h=U+0647
  219. Kh=U+062E
  220. khkh=U+062E
  221. kh=U+062E
  222. K=U+0643
  223. kk=U+0643
  224. k=U+0643
  225. Dh=U+0630
  226. dhdh=U+0630
  227. dh=U+0630
  228. DU+0323=U+0636
  229. dU+0323dU+0323=U+0636
  230. dU+0323=U+0636
  231. D=U+062F
  232. dd=U+062F
  233. d=U+062F
  234. R=U+0631
  235. rr=U+0631
  236. r=U+0631
  237. ZU+0323=U+0638
  238. zU+0323zU+0323=U+0638
  239. zU+0323=U+0638
  240. Z=U+0632
  241. zz=U+0632
  242. z=U+0632
  243. Sh=U+0634
  244. shsh=U+0634
  245. sh=U+0634
  246. SU+0323=U+0635
  247. sU+0323sU+0323=U+0635
  248. sU+0323=U+0635
  249. S=U+0633
  250. ss=U+0633
  251. s=U+0633
  252. Gh=U+063A
  253. ghgh=U+063A
  254. gh=U+063A
  255. F=U+0641
  256. ff=U+0641
  257. f=U+0641
  258. Q=U+0642
  259. qq=U+0642
  260. q=U+0642
  261. L=U+0644
  262. ll=U+0644
  263. l=U+0644
  264. M=U+0645
  265. mm=U+0645
  266. m=U+0645
  267. N=U+0646
  268. nn=U+0646
  269. n=U+0646
  270. W=U+0648
  271. ww=U+0648
  272. w=U+0648
  273. Y=U+064A
  274. yy=U+064A
  275. y=U+064A
  276. # non-Arabic consonants:
  277. P=U+067E
  278. p=U+067E
  279. Ch=U+0686
  280. ch=U+0686
  281. V=U+06A4
  282. v=U+06A4
  283. G=U+06AF
  284. g=U+06AF
  285. # Diacritic characters:
  286. # ain (U+0639) - not transliterated alone:
  287. U+02BB=U+0639
  288. # hamza - not romanized
  289. # =U+0621
  290. # hamza (alone in final position)
  291. %U+02BE=U+0621
  292. %U+02BC=U+0621
  293. # Do not know what, if anything, is needed here:
  294. # tatweel:
  295. # =U+0640
  296. # fathatan:
  297. # =U+064B
  298. # dammatan:
  299. # =U+064C
  300. # kasratan:
  301. # =U+064D
  302. # fatha:
  303. # =U+064E
  304. # damma:
  305. # =U+064F
  306. # kasra:
  307. # =U+0650
  308. # shadda:
  309. # =U+0651
  310. # sukun:
  311. # =U+0652
  312. # superscript alef:
  313. # =U+0670
  314. # alef wasla
  315. # =U+0671
  316. [ScriptToRoman]