urdu.yml 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. ---
  2. general:
  3. name: Urdu
  4. case_sensitive: false
  5. roman_to_script:
  6. map:
  7. # Punctuation marks:
  8. # "%": "\u066A"; cannot transliterate the truncation character
  9. "*": "\u066D"
  10. ",": "\u060C"
  11. ";": "\u061B"
  12. "?": "\u061F"
  13. # Exceptions for specific words
  14. # Allah
  15. "Alla\u0304h": "\u0627\u0644\u0644\u0647"
  16. "alla\u0304h": "\u0627\u0644\u0644\u0647"
  17. # Qur'an
  18. "Qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646"
  19. "qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646"
  20. # aur (with spaces)
  21. " aur ": " \u0627\u0648\u0631 "
  22. "Aur ": "\u0627\u0648\u0631 "
  23. ####
  24. # Abdurrahman
  25. "\u02BBAbdurrah\u0323ma\u0301n": "\u0639\u0628\u062F\u0627\u0644\u0631\u062D\u0645\u0670\u0646"
  26. "\u02BBAbdurrah\u0323ma\u0304n": "\u0639\u0628\u062F\u0627\u0644\u0631\u062D\u0645\u0670\u0646"
  27. # Abd names
  28. "\u02BBAbdul\u02BB": "\u0639\u0628\u062F\u0627\u0644\u0639"
  29. "\u02BBAbdula": "\u0639\u0628\u062F\u0627\u0644"
  30. "\u02BBAbdulb": "\u0639\u0628\u062F\u0627\u0644\u0628"
  31. "\u02BBAbdulf": "\u0639\u0628\u062F\u0627\u0644\u0641"
  32. "\u02BBAbdulg\u0332h\u0332": "\u0639\u0628\u062F\u0627\u0644\u063A"
  33. "\u02BBAbdulh\u0323": "\u0639\u0628\u062F\u0627\u0644\u062D"
  34. "\u02BBAbdulh": "\u0639\u0628\u062F\u0627\u0644\u0647 "
  35. "\u02BBAbdulj": "\u0639\u0628\u062F\u0627\u0644\u062C"
  36. "\u02BBAbdulk\u0332h\u0332": "\u0639\u0628\u062F\u0627\u0644\u062E"
  37. "\u02BBAbdulk": "\u0639\u0628\u062F\u0627\u0644\u0643 "
  38. "\u02BBAbdulm": "\u0639\u0628\u062F\u0627\u0644\u0645"
  39. "\u02BBAbdulq": "\u0639\u0628\u062F\u0627\u0644\u0642"
  40. "\u02BBAbdulv": "\u0639\u0628\u062F\u0627\u0644\u0648"
  41. "\u02BBAbdunn": "\u0639\u0628\u062F\u0627\u0644\u0646"
  42. "\u02BBAbdurr": "\u0639\u0628\u062F\u0627\u0644\u0631"
  43. "\u02BBAbdus\u0323s\u0323": "\u0639\u0628\u062F\u0627\u0644\u0325"
  44. "\u02BBAbduss": "\u0639\u0628\u062F\u0627\u0644\u0633"
  45. "\u02BBAbdushsh": "\u0639\u0628\u062F\u0627\u0644\u0634"
  46. "\u02BBAbdutt": "\u0639\u0628\u062F\u0627\u0644\u062A"
  47. "\u02BBAbduz\u0323z\u0323": "\u0639\u0628\u062F\u0627\u0644\u0636"
  48. "\u02BBAbduz\u0324z\u0324": "\u0639\u0628\u062F\u0627\u0644\u0638"
  49. # Abu names
  50. "Abu\u0304 ": "\u0627\u0628\u0648\u0020"
  51. "Abu\u0304": "\u0627\u0628\u0648\u200C\u0627\u0644"
  52. ####
  53. # lillah
  54. "lilla\u0304h": "\u0644\u0644\u0647"
  55. # billah
  56. "billa\u0304h": "\u0628\u0644\u0644\u0647"
  57. # Rahman
  58. "Rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646"
  59. # Nuzhat
  60. "Nuzhat": "\u0646\u0632\u0647\u062A"
  61. # Uddin names
  62. "i\u0304uddi\u0304n%": "\u0649\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
  63. "uddi\u0304n%": "\u200C\u0627\u0644\u062F\u0651\u064A\u0646"
  64. # ta'lif
  65. # Ibn when it appears in the middle of a name sequence
  66. "ibn": "\u0628\u0646"
  67. # Abbreviated name elements
  68. "# Ae": "\u0627\u06D2"
  69. # Parsing "sh[dot below] as in "Ishaq [name]"
  70. "sh\u0323": "\u0633\u062D"
  71. # Numbers (\u06F0-06F9 for Persian/Urdu)
  72. # currently *not* valid MARC21 characters
  73. "# 0": "\u06F0"
  74. "# 1": "\u06F1"
  75. "# 2": "\u06F2"
  76. "# 3": "\u06F3"
  77. "# 4": "\u06F4"
  78. "# 5": "\u06F5"
  79. "# 6": "\u06F6"
  80. "# 7": "\u06F7"
  81. "# 8": "\u06F8"
  82. "# 9": "\u06F9"
  83. # Postpositions
  84. # Aspirates [06BE] vs. heh [062D] combinations
  85. "bh\u0323": "\u0628\u062D"
  86. "Bh": "\u0628\u06BE"
  87. "bh": "\u0628\u06BE"
  88. "ph\u0323": "\u067E\u062D"
  89. "Ph": "\u067E\u06BE"
  90. "ph": "\u067E\u06BE"
  91. "th\u0323": "\u062A\u062D"
  92. "Th": "\u062A\u06BE"
  93. "th": "\u062A\u06BE"
  94. "t\u0323h\u0323": "\u0679\u062D"
  95. "T\u0323h": "\u0679\u06BE"
  96. "t\u0323h": "\u0679\u06BE"
  97. "jh\u0323": "\u062C\u062D"
  98. "Jh": "\u062C\u06BE"
  99. "jh": "\u062C\u06BE"
  100. "ch\u0323": "\u0686\u062D"
  101. "Ch": "\u0686\u06BE"
  102. "ch": "\u0686\u06BE"
  103. "dh\u0323": "\u062F\u062D"
  104. "Dh": "\u062F\u06BE"
  105. "dh": "\u062F\u06BE"
  106. "d\u0323h\u0323": "\u0688\u062D"
  107. "D\u0323h": "\u0688\u06BE"
  108. "d\u0323h": "\u0688\u06BE"
  109. "r\u0323h\u0323": "\u0691\u062D"
  110. "R\u0323h": "\u0691\u06BE"
  111. "r\u0323h": "\u0691\u06BE"
  112. "kh\u0323": "\u06A9\u062D"
  113. "Kh": "\u06A9\u06BE"
  114. "kh": "\u06A9\u06BE"
  115. "gh\u0323": "\u06AF\u062D"
  116. "Gh": "\u06AF\u06BE"
  117. "gh": "\u06AF\u06BE"
  118. # prime = ZWNJ"
  119. "\u02B9A\u0304": "\u200C\u0622"
  120. "\u02B9a\u0304": "\u200C\u0622"
  121. "a\u0304\u02BC\u02B9": "\u0627\u0621\u200C"
  122. "i\u0304\u02B9": "\u0649\u200C"
  123. "\u02B9": "\u200C"
  124. # Izafah here
  125. "a\u0304-yi%": "\u0627\u0626\u06D2"
  126. "u\u0304-yi%": "\u0648\u0626\u06D2"
  127. "o-yi%": "\u0648\u0626\u06D2"
  128. "e-yi%": "\u06D2"
  129. "i\u0304-yi%": "\u0649"
  130. "h-yi%": "\u06C0"
  131. "-yi%": "\u06C0"
  132. "al-i%": "\u0644"
  133. "ul-i%": "\u0644"
  134. "-i%": ""
  135. # Hyphenated prefixes:
  136. "bi-": "\u0628"
  137. "%al-a\u0304": "\u0627\u0644\u0627"
  138. "%ul-a\u0304": "\u0627\u0644\u0627"
  139. "al-": "\u0627\u0644"
  140. "ul-": "\u0627\u0644"
  141. "lil-i": "\u0644\u0644"
  142. "lil-": "\u0644\u0644"
  143. # al-/ul- plus sun letters
  144. "ar-r": "\u0627\u0644\u0631"
  145. "ur-r": "\u0627\u0644\u0631"
  146. "ar-R": "\u0627\u0644\u0631"
  147. "ur-R": "\u0627\u0644\u0631"
  148. "az\u0332-z\u0332": "\u0627\u0644\u0630"
  149. "uz\u0332-z\u0332": "\u0627\u0644\u0630"
  150. "az\u0332-Z\u0332": "\u0627\u0644\u0630"
  151. "uz\u0332-Z\u0332": "\u0627\u0644\u0630"
  152. "ad-d": "\u0627\u0644\u0627"
  153. "ud-d": "\u0627\u0644\u0627"
  154. "ad-D": "\u0627\u0644\u0627"
  155. "ud-D": "\u0627\u0644\u0627"
  156. "as\u0332-s\u0332": "\u0627\u0644\u062B"
  157. "us\u0332-s\u0332": "\u0627\u0644\u062B"
  158. "as\u0332-S\u0332": "\u0627\u0644\u062B"
  159. "us\u0332-S\u0332": "\u0627\u0644\u062B"
  160. "at-t": "\u0627\u0644\u062A"
  161. "ut-t": "\u0627\u0644\u062A"
  162. "at-T": "\u0627\u0644\u062A"
  163. "ut-T": "\u0627\u0644\u062A"
  164. "an-n": "\u0627\u0644\u0646"
  165. "un-n": "\u0627\u0644\u0646"
  166. "an-N": "\u0627\u0644\u0646"
  167. "un-N": "\u0627\u0644\u0646"
  168. "al-l": "\u0627\u0644\u0644"
  169. "ul-l": "\u0627\u0644\u0644"
  170. "al-L": "\u0627\u0644\u0644"
  171. "ul-L": "\u0627\u0644\u0644"
  172. "az\u0324-z\u0324": "\u0627\u0644\u0638"
  173. "uz\u0324-z\u0324": "\u0627\u0644\u0638"
  174. "az\u0324-Z\u0324": "\u0627\u0644\u0638"
  175. "uz\u0324-Z\u0324": "\u0627\u0644\u0638"
  176. "at\u0324-t\u0324": "\u0627\u0644\u0637"
  177. "ut\u0324-t\u0324": "\u0627\u0644\u0637"
  178. "at\u0324-T\u0324": "\u0627\u0644\u0637"
  179. "ut\u0324-T\u0324": "\u0627\u0644\u0637"
  180. "az\u0323-z\u0323": "\u0627\u0644\u0636"
  181. "uz\u0323-z\u0323": "\u0627\u0644\u0636"
  182. "az\u0323-Z\u0323": "\u0627\u0644\u0636"
  183. "uz\u0323-Z\u0323": "\u0627\u0644\u0636"
  184. "as\u0323-s\u0323": "\u0627\u0644\u0635"
  185. "us\u0323-s\u0323": "\u0627\u0644\u0635"
  186. "as\u0323-S\u0323": "\u0627\u0644\u0635"
  187. "us\u0323-S\u0323": "\u0627\u0644\u0635"
  188. "ash-sh": "\u0627\u0644\u0634"
  189. "ush-sh": "\u0627\u0644\u0634"
  190. "ash-Sh": "\u0627\u0644\u0634"
  191. "ush-Sh": "\u0627\u0644\u0634"
  192. "as-s": "\u0627\u0644\u0633"
  193. "us-s": "\u0627\u0644\u0633"
  194. "as-S": "\u0627\u0644\u0633"
  195. "us-S": "\u0627\u0644\u0633"
  196. "az-z": "\u0627\u0644\u0632"
  197. "uz-z": "\u0627\u0644\u0632"
  198. "az-Z": "\u0627\u0644\u0632"
  199. "uz-Z": "\u0627\u0644\u0632"
  200. # Diphthongs here
  201. "Ae": "\u0627\u06D2"
  202. "ai%": "\u06D2"
  203. "Ai": "\u0627\u064A"
  204. "%ai": "\u0627\u064A"
  205. "ai": "\u064A"
  206. "\u02BBAu": "\u0639\u0648"
  207. "\u02BBau": "\u0639\u0648"
  208. "Au": "\u0627\u0648"
  209. "au": "\u0648"
  210. # ayn-alif combo
  211. "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
  212. "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
  213. "\u02BBa\u0304": "\u0639\u0627"
  214. # hamza and vowel combo
  215. # [in final position]
  216. "u\u0304\u02BEi\u0304%": "\u0648\u0626\u0649"
  217. "u\u0304\u02BCi\u0304%": "\u0648\u0626\u0649"
  218. "\u02BEi\u0304%": "\u0626\u0649"
  219. "\u02BCi\u0304%": "\u0626\u0649"
  220. "\u02BEe%": "\u0626\u06D2"
  221. "\u02BCe%": "\u0626\u06D2"
  222. "\u02BEu\u0304%": "\u0624"
  223. "\u02BCu\u0304%": "\u0624"
  224. "\u02BEo%": "\u0624"
  225. "\u02BCo%": "\u0624"
  226. # [in medial position]
  227. "a\u02BEa": "\u0623"
  228. "a\u02BCa": "\u0623"
  229. "a\u0304\u02BEa": "\u0627\u0621"
  230. "a\u0304\u02BCa": "\u0627\u0621"
  231. "a\u02BEa\u0304": "\u0622"
  232. "a\u02BCa\u0304": "\u0622"
  233. "o\u02BEi\u0304": "\u0648\u0626\u064A"
  234. "o\u02BCi\u0304": "\u0648\u0626\u064A"
  235. "o\u02BEi": "\u0648\u0626"
  236. "o\u02BCi": "\u0648\u0626"
  237. "\u02BEi\u0304": "\u0626\u064A"
  238. "\u02BCi\u0304": "\u0626\u064A"
  239. "\u02BEi": "\u0626"
  240. "\u02BCi": "\u0626"
  241. "\u02BEe": "\u0626\u064A"
  242. "\u02BCe": "\u0626\u064A"
  243. "\u02BEu\u0304": "\u0624"
  244. "\u02BCu\u0304": "\u0624"
  245. "u\u0304\u02BE": "\u0624"
  246. "u\u0304\u02BC": "\u0624"
  247. "\u02BEo": "\u0624"
  248. "\u02BCo": "\u0624"
  249. "o\u02BE": "\u0624"
  250. "o\u02BC": "\u0624"
  251. "au\u02BE": "\u0624"
  252. "au\u02BC": "\u0624"
  253. "\u02BEa": "\u0626"
  254. "\u02BCa": "\u0626"
  255. "i\u0304%": "\u0649"
  256. "a\u0301%": "\u0649\u0670"
  257. # A
  258. "\u02BBA\u0304": "\u0639\u0627"
  259. "\u02BBa\u0304": "\u0639\u0627"
  260. "%\u02BBA": "\u0639"
  261. "\u02BBa": "\u0639"
  262. "A\u02BB": "\u0627\u0639"
  263. "%a\u02BB": "\u0627\u0639"
  264. "a\u02BB": "\u0639"
  265. "%A\u0304": "\u0622"
  266. "%a\u0304": "\u0622"
  267. "a\u0304": "\u0627"
  268. "a\u0301": "\u0649"
  269. "ayy": "\u064A\u0651"
  270. "%A": "\u0627"
  271. "%a": "\u0627"
  272. "A": ""
  273. "a": ""
  274. # E
  275. "e%": "\u06D2"
  276. "%E": "\u0627\u064A"
  277. "%e": "\u0627\u064A"
  278. "e": "\u064A"
  279. # I
  280. "\u02BBI\u0304": "\u0639\u064A"
  281. "\u02BBi\u0304": "\u0639\u064A"
  282. "I\u02BB": "\u0627\u0639"
  283. "i\u02BB": "\u0639"
  284. "\u02BBI": "\u0639"
  285. "%I\u0304": "\u0627\u064A"
  286. "%i\u0304": "\u0627\u064A"
  287. "i\u0304y": "\u064A"
  288. "i\u0304": "\u064A"
  289. "iyy": "\u064A\u0651"
  290. "%I": "\u0627"
  291. "%i": "\u0627"
  292. "I": "\u0627"
  293. "i": ""
  294. # O
  295. "%O": "\u0627\u0648"
  296. "o": "\u0648"
  297. # U
  298. "\u02BBu\u0304": "\u0639\u0648"
  299. "\u02BBU": "\u0639"
  300. "\u02BBu": "\u0639"
  301. "%U\u0304": "\u0627\u0648"
  302. "%u\u0304": "\u0627\u0648"
  303. "u\u0304": "\u0648"
  304. "%U": "\u0627"
  305. "%u": "\u0627"
  306. "U": ""
  307. "u": ""
  308. # Consonants:
  309. "B": "\u0628"
  310. "bb": "\u0628\u0651"
  311. "b": "\u0628"
  312. "P": "\u067E"
  313. "pp": "\u067E\u0651"
  314. "p": "\u067E"
  315. "T\u0323": "\u0679"
  316. "t\u0323t\u0323": "\u0679\u0651"
  317. "t\u0323": "\u0679"
  318. "T\u0324": "\u0637"
  319. "t\u0324t\u0324": "\u0637\u0651"
  320. "t\u0324": "\u0637"
  321. "T": "\u062A"
  322. "tt": "\u062A\u0651"
  323. "t": "\u062A"
  324. "Sh": "\u0634"
  325. "shsh": "\u0634\u0651"
  326. "sh": "\u0634"
  327. "S\u0323": "\u0635"
  328. "s\u0323s\u0323": "\u0635\u0651"
  329. "s\u0323": "\u0635"
  330. "S\u0332": "\u062B"
  331. "s\u0332s\u0332": "\u062B\u0651"
  332. "s\u0332": "\u062B"
  333. "S": "\u0633"
  334. "ss": "\u0633\u0651"
  335. "s": "\u0633"
  336. "J": "\u062C"
  337. "jj": "\u062C\u0651"
  338. "j": "\u062C"
  339. "C": "\u0686"
  340. "cc": "\u0686\u0651"
  341. "c": "\u0686"
  342. "H\u0323": "\u062D"
  343. "h\u0323h\u0323": "\u062D\u0651"
  344. "h\u0323": "\u062D"
  345. "H": "\u0647"
  346. "hh": "\u0647\u0651"
  347. "h": "\u0647"
  348. "K\u0332h\u0332": "\u062E"
  349. "k\u0332h\u0332k\u0332h\u0332": "\u062E\u0651"
  350. "k\u0332h\u0332": "\u062E"
  351. "K": "\u06A9"
  352. "kk": "\u06A9\u0651"
  353. "k": "\u06A9"
  354. "D\u0323": "\u0688"
  355. "d\u0323d\u0323": "\u0688\u0651"
  356. "d\u0323": "\u0688"
  357. "D": "\u062F"
  358. "dd": "\u062F\u0651"
  359. "d": "\u062F"
  360. "Z\u0324": "\u0638"
  361. "z\u0324z\u0324": "\u0638\u0651"
  362. "z\u0324": "\u0638"
  363. "Z\u0323": "\u0636"
  364. "z\u0323z\u0323": "\u0636\u0651"
  365. "z\u0323": "\u0636"
  366. "Z\u0332": "\u0630"
  367. "z\u0332z\u0332": "\u0630\u0651"
  368. "z\u0332": "\u0630"
  369. "zz": "\u0632\u0651"
  370. "Zh": "\u0698"
  371. "zhzh": "\u0698\u0651"
  372. "zh": "\u0698"
  373. "Z": "\u0632"
  374. "z": "\u0632"
  375. "R\u0323": "\u0691"
  376. "r\u0323r\u0323": "\u0691\u0651"
  377. "r\u0323": "\u0691"
  378. "R": "\u0631"
  379. "rr": "\u0631\u0651"
  380. "r": "\u0631"
  381. "G\u0332h\u0332": "\u063A"
  382. "g\u0332h\u0332g\u0332h\u0332": "\u063A\u0651"
  383. "g\u0332h\u0332": "\u063A"
  384. "G": "\u06AF"
  385. "gg": "\u06AF\u0651"
  386. "g": "\u06AF"
  387. "F": "\u0641"
  388. "ff": "\u0641\u0651"
  389. "f": "\u0641"
  390. "Q": "\u0642"
  391. "qq": "\u0642\u0651"
  392. "q": "\u0642"
  393. "L": "\u0644"
  394. "ll": "\u0644\u0651"
  395. "l": "\u0644"
  396. "M": "\u0645"
  397. "mm": "\u0645\u0651"
  398. "m": "\u0645"
  399. "N\u0332": "\u06BA"
  400. "n\u0332n\u0332": "\u06BA\u0651"
  401. "n\u0332": "\u06BA"
  402. "N": "\u0646"
  403. "nn": "\u0646\u0651"
  404. "n": "\u0646"
  405. "V": "\u0648"
  406. "vv": "\u0648\u0651"
  407. "v": "\u0648"
  408. "Y": "\u064A"
  409. "yy": "\u064A\u0651"
  410. "y": "\u064A"
  411. # ain (alone)
  412. "\u02BB": "\u0639"
  413. # hamza (alone in final position)
  414. "\u02BE%": "\u0621"
  415. "\u02BC%": "\u0621"