divehi_thaana.yml 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. ---
  2. general:
  3. name: Divehi (Thaana)
  4. case_sensitive: false
  5. roman_to_script:
  6. map:
  7. # COMMON COMBINING CHARACTERS (always follow a base letter)
  8. # combining acute \u0301
  9. # combining tilde \u0303
  10. # combining macron \u0304
  11. # combining dot above \u0307
  12. # combining dot below \u0323
  13. # combining diaeresis below \u0324
  14. # combining low line \u0332
  15. # combining breve below \u032E
  16. # ayn (spacing) \u02BB
  17. # apostrophe (spacing) \u0027
  18. # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
  19. # THAANA LETTER ALIF APPEARING MEDIALLY WITH ANY VOWEL IS ROMANIZED WITH APOSTROPHE
  20. # ORIGINAL VOWEL ASSOCIATED WITH THE ALIF CANNOT BE REGENERATED DURING CONVERSION
  21. "\u0027": "\u0787"
  22. # THAANA LETTER SHAVIYANI WITH SUKUN(BREVE 032E)
  23. "H\u032E": "\u0781\u07B0"
  24. "hh": "\u0787\u07B0\u0780"
  25. "h\u032E": "\u0781\u07B0"
  26. # THAANA LETTER ALIFU FINAL WITH SUKUN (LOW LINE 0332)
  27. "H\u0332%": "\u0787\u07B0"
  28. "h\u0332%": "\u0787\u07B0"
  29. "H": "\u0780"
  30. "h": "\u0780"
  31. "S\u0301": "\u0781"
  32. "ss": "\u0787\u07B0\u0781"
  33. "s\u0301": "\u0781"
  34. # THAANA LETTER MEDIAL NOONU WITHOUT SUKUN (DOT ABOVE 0307)
  35. "M\u0307": "\u0782"
  36. "m\u0307": "\u0782"
  37. "mm": "\u0787\u07B0\u0789"
  38. "N": "\u0782"
  39. "nn": "\u0787\u07B0\u0782"
  40. "n": "\u0782"
  41. "R": "\u0783"
  42. "rr": "\u0787\u07B0\u0783"
  43. "r": "\u0783"
  44. "B": "\u0784"
  45. "bb": "\u0787\u07B0\u0784"
  46. "b": "\u0784"
  47. # THAANA LETTER "L/l" WITH DOT BELOW (0323)
  48. "L\u0323": "\u0785"
  49. "l\u0323": "\u0785"
  50. "K": "\u0786"
  51. "kk": "\u0787\u07B0\u0786"
  52. "k": "\u0786"
  53. # THAANA LETTER ALIF--OFTEN OMITTED IN ROMANIZATION (SEE VOWEL CONVERSION BELOW)
  54. "\u0027": "\u0787"
  55. "V": "\u0788"
  56. "vv": "\u0787\u07B0\u0788"
  57. "v": "\u0788"
  58. "M": "\u0789"
  59. "m": "\u0789"
  60. "F": "\u078A"
  61. "ff": "\u0787\u07B0\u078A"
  62. "f": "\u078A"
  63. # THAANA LETTER "D/d" WITH DOT BELOW (0323)
  64. "D\u0323": "\u0791"
  65. "d\u0323": "\u0791"
  66. "D": "\u078B"
  67. "dd": "\u0787\u07B0\u078B"
  68. "d": "\u078B"
  69. # THAANA LETTER "T/t" WITH DOT BELOW (0323)
  70. "T\u0323": "\u0793"
  71. "t\u0323": "\u0793"
  72. "T\u0324T": "\u078C\u07B0\u078C"
  73. "T\u0324t": "\u078C\u07B0\u078C"
  74. "t\u0324t": "\u078C\u07B0\u078C"
  75. "t\u0324T": "\u078C\u07B0\u078C"
  76. "T": "\u078C"
  77. "tt": "\u0787\u07B0\u078C"
  78. "t": "\u078C"
  79. "L": "\u078D"
  80. "ll": "\u0787\u07B0\u078D"
  81. "l": "\u078D"
  82. "G": "\u078E"
  83. "gg": "\u0787\u07B0\u078E"
  84. "g": "\u078E"
  85. # THAANA LETTER "N/n' WITH TILDE (0303)
  86. "N\u0303": "\u078F"
  87. "n\u0303": "\u078F"
  88. "S": "\u0790"
  89. "ss": "\u0787\u07B0\u0790"
  90. "s": "\u0790"
  91. "Z": "\u0792"
  92. "zz": "\u0787\u07B0\u0792"
  93. "z": "\u0792"
  94. "Y": "\u0794"
  95. "yy": "\u0787\u07B0\u0794"
  96. "y": "\u0794"
  97. "P": "\u0795"
  98. "p": "\u0795"
  99. "pp": "\u0787\u07B0\u0795"
  100. "J": "\u0796"
  101. "jj": "\u0787\u07B0\u0796"
  102. "j": "\u0796"
  103. "C": "\u0797"
  104. "cc": "\u0787\u07B0\u0797"
  105. "c": "\u0797"
  106. # THAANA EXTENSION FOR ARABIC LOAN WORDS AND NAMES
  107. # THAANA EXTENSION FOR ARABIC LETTER TTAA
  108. "TH": "\u0798"
  109. "Th": "\u0798"
  110. "thth": "\u0787\u07B0\u0798"
  111. "th": "\u0798"
  112. # THAANA EXTENSION FOR ARABIC LETTER HHAA
  113. "H\u0323": "\u0799"
  114. "h\u0323": "\u0799"
  115. # THAANA EXTENSION FOR ARABIC LETTER KHAA
  116. "KH": "\u079A"
  117. "Kh": "\u079A"
  118. "khkh": "\u0787\u07B0\u079A"
  119. "kh": "\u079A"
  120. # THAANA EXTENSION FOR ARABIC LETTER THAALU
  121. "DH": "\u079B"
  122. "Dh": "\u079B"
  123. "dhdh": "\u0787\u07B0\u079B"
  124. "dh": "\u079B"
  125. # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
  126. "Z\u0332": "\u079C"
  127. "z\u0332": "\u079C"
  128. # THAANA EXTENSION FOR ARABIC LETTER SHEENU
  129. "SH": "\u079D"
  130. "Sh": "\u079D"
  131. "shsh": "\u0787\u07B0\u079D"
  132. "sh": "\u079D"
  133. # THAANA EXTENSION FOR ARABIC LETTER SAADHU
  134. "S\u0323": "\u079E"
  135. "s\u0323": "\u079E"
  136. # THAANA EXTENSION FOR ARABIC LETTER TO
  137. "T\u0324": "\u07A0"
  138. "t\u0324": "\u07A0"
  139. # THAANA EXTENSION FOR ARABIC LETTER ZO
  140. "D\u0332": "\u07A1"
  141. "d\u0332": "\u07A1"
  142. # THAANA EXTENSION FOR ARABIC LETTER AINU
  143. "\u02BB": "\u07A2"
  144. # THAANA EXTENSION FOR ARABIC LETTER GHAINU
  145. "GH": "\u07A3"
  146. "Gh": "\u07A3"
  147. "ghgh": "\u0787\u07B0\u07A3"
  148. "gh": "\u07A3"
  149. # THAANA EXTENSION FOR ARABIC LETTER QAAFU
  150. "Q": "\u07A4"
  151. "qq": "\u0787\u07B0\u07A4"
  152. "q": "\u07A4"
  153. # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
  154. "W": "\u07A5"
  155. "ww": "\u0787\u07B0\u07A5"
  156. "w": "\u07A5"
  157. # INITIAL (AND UPPERCASE) VOWELS THAT CONVERT
  158. # TO ALIF FOLLOWED BY VOWEL (ALIF OMITTED IN ROMANIZATION)
  159. "%A\u0304": "\u0787\u07A7"
  160. "%A": "\u0787\u07A6"
  161. "\u0020a\u0304": "\u0020\u0787\u07A7"
  162. "\u0020a": "\u0020\u0787\u07A6"
  163. "%E\u0304": "\u0787\u07AD"
  164. "%E": "\u0787\u07AC"
  165. "\u0020e\u0304": "\u0020\u0787\u07AD"
  166. "\u0020e": "\u0020\u0787\u07AC"
  167. "%I\u0304": "\u0787\u07A9"
  168. "%I": "\u0787\u07A8"
  169. "\u0020i\u0304": "\u0020\u0787\u07A9"
  170. "\u0020i": "\u0020\u0787\u07A8"
  171. "%O\u0304": "\u0787\u07AF"
  172. "%O": "\u0787\u07AE"
  173. "\u0020o\u0304": "\u0020\u0787\u07AF"
  174. "\u0020o": "\u0020\u0787\u07AE"
  175. "%U\u0304": "\u0787\u07AB"
  176. "%U": "\u0787\u07AB"
  177. "\u0020u\u0304": "\u0020\u0787\u07AB"
  178. "\u0020u": "\u0020\u0787\u07AB"
  179. # THAANA MEDIAL OR FINAL VOWELS OVER ANY CONSONANT
  180. # (THIS ASSUMES NO UPPERCASE VOWELS REMAIN)
  181. "a\u0304": "\u07A7"
  182. "a": "\u07A6"
  183. "e\u0304": "\u07AD"
  184. "e": "\u07AC"
  185. "i\u0304": "\u07A9"
  186. "i": "\u07A8"
  187. "o\u0304": "\u07AF"
  188. "o": "\u07AE"
  189. "u\u0304": "\u07AB"
  190. "u": "\u07AB"
  191. # THAANA SUKUN (SILENCE) MARK; ONLY GENERATED IN OTHER COMBINATIONS
  192. # "": "\u07B0"
  193. script_to_roman:
  194. map:
  195. # THAANA LETTER NOONU WITHOUT SUKUN FOLLOWED BY A CONSONANT
  196. # IS ROMANIZED AS "m"+ DOT ABOVE (0307) THEN THE CONSONANT
  197. # OTHERWISE LETTER NOONU MAPS TO "n"
  198. "\u0782\u0780": "m\u0307\u0780"
  199. "\u0782\u0781": "m\u0307\u0781"
  200. "\u0782\u0783": "m\u0307\u0783"
  201. "\u0782\u0784": "m\u0307\u0784"
  202. "\u0782\u0785": "m\u0307\u0785"
  203. "\u0782\u0786": "m\u0307\u0786"
  204. "\u0782\u0788": "m\u0307\u0788"
  205. "\u0782\u0789": "m\u0307\u0789"
  206. "\u0782\u078A": "m\u0307\u078A"
  207. "\u0782\u0791": "m\u0307\u0791"
  208. "\u0782\u078B": "m\u0307\u078B"
  209. "\u0782\u078C": "m\u0307\u078C"
  210. "\u0782\u078D": "m\u0307\u078D"
  211. "\u0782\u078E": "m\u0307\u078E"
  212. "\u0782\u078F": "m\u0307\u078F"
  213. "\u0782\u0790": "m\u0307\u0790"
  214. "\u0782\u0792": "m\u0307\u0792"
  215. "\u0782\u0794": "m\u0307\u0794"
  216. "\u0782\u0795": "m\u0307\u0795"
  217. "\u0782\u0796": "m\u0307\u0796"
  218. "\u0782\u0797": "m\u0307\u0797"
  219. "\u0782\u0798": "m\u0307\u0798"
  220. "\u0782\u0799": "m\u0307\u0799"
  221. "\u0782\u079A": "m\u0307\u079A"
  222. "\u0782\u079B": "m\u0307\u079B"
  223. "\u0782\u079C": "m\u0307\u079C"
  224. "\u0782\u079D": "m\u0307\u079D"
  225. "\u0782\u079E": "m\u0307\u079E"
  226. "\u0782\u07A0": "m\u0307\u07A0"
  227. "\u0782\u07A1": "m\u0307\u07A1"
  228. "\u0782\u07A2": "m\u0307\u07A2"
  229. "\u0782\u07A3": "m\u0307\u07A3"
  230. "\u0782\u07A4": "m\u0307\u07A4"
  231. "\u0782\u07A5": "m\u0307\u07A5"
  232. # THAANA FINAL ALIFU WITH SUKUN (SILENCE) MARK
  233. # IS ROMANIZED WITH "h"+LOW LINE (0332)
  234. "\u0787\u07B0\u0020": "h\u0332\u0020"
  235. # THAANA SHAVIYANI WITH SUKUN (SILENCE) MARK
  236. # IS ROMANIZED WITH "h"+BREVE BELOW
  237. "\u0781\u07B0": "h\u032E"
  238. "\u0787\u07B0\u0780": "hh"
  239. "\u0780": "h"
  240. # THAANA ALIF WITH SUKUN AND SHAVIYANI
  241. "\u0787\u07B0\u0781": "s\u0301s\u0301"
  242. "\u0787\u07B0": "h\u0332"
  243. "\u0781": "s\u0301"
  244. "\u0787\u07B0\u0782": "nn"
  245. "\u0782": "n"
  246. "\u0787\u07B0\u0783": "rr"
  247. "\u0783": "r"
  248. "\u0787\u07B0\u0784": "bb"
  249. "\u0784": "b"
  250. "\u0787\u07B0\u0785": "l\u0323"
  251. "\u0785": "l\u0323"
  252. "\u0787\u07B0\u0786": "kk"
  253. "\u0786": "k"
  254. "\u0787\u07B0\u0788": "vv"
  255. "\u0788": "v"
  256. "\u0787\u07B0\u0789": "mm"
  257. "\u0789": "m"
  258. "\u0787\u07B0\u078A": "ff"
  259. "\u078A": "f"
  260. # THAANA LETTER "D/d" WITH DOT BELOW (0323)
  261. "\u0787\u07B0\u0791": "d\u0323d\u0323"
  262. "\u0791": "d\u0323"
  263. "\u0787\u07B0\u078B": "dd"
  264. "\u078B": "d"
  265. # THAANA LETTER "T/t" WITH DOT BELOW (0323)
  266. "\u078C\u07B0\u078C": "t\u0324t"
  267. "\u0787\u07B0\u078C": "tt"
  268. "\u0793": "t\u0323"
  269. "\u078C": "t"
  270. "\u0787\u07B0\u078D": "ll"
  271. "\u078D": "l"
  272. "\u0787\u07B0\u078E": "gg"
  273. "\u078E": "g"
  274. # THAANA LETTER "N/n' WITH TILDE (0303)
  275. "\u0787\u07B0\u078F": "n\u0303n\u0303"
  276. "\u078F": "n\u0303"
  277. "\u0787\u07B0\u0790": "ss"
  278. "\u0790": "s"
  279. "\u0787\u07B0\u0792": "zz"
  280. "\u0792": "z"
  281. "\u0787\u07B0\u0794": "yy"
  282. "\u0794": "y"
  283. "\u0787\u07B0\u0795": "pp"
  284. "\u0795": "p"
  285. "\u0787\u07B0\u0796": "jj"
  286. "\u0796": "j"
  287. "\u0787\u07B0\u0797": "cc"
  288. "\u0797": "c"
  289. # THAANA EXTENSION FOR ARABIC LETTER TTAA
  290. "\u0787\u07B0\u0798": "thth"
  291. "\u0798": "th"
  292. # THAANA EXTENSION FOR ARABIC LETTER HHAA
  293. "\u0787\u07B0\u0799": "h\u0323h\u0323"
  294. "\u0799": "h\u0323"
  295. # THAANA EXTENSION FOR ARABIC LETTER KHAA
  296. "\u0787\u07B0\u079A": "khkh"
  297. "\u079A": "kh"
  298. # THAANA EXTENSION FOR ARABIC LETTER THAALU
  299. "\u0787\u07B0\u079B": "dhdh"
  300. "\u079B": "dh"
  301. # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
  302. "\u0787\u07B0\u079C": "z\u0332z\u0332"
  303. "\u079C": "z\u0332"
  304. # THAANA EXTENSION FOR ARABIC LETTER SHEENU
  305. "\u0787\u07B0\u079D": "shsh"
  306. "\u079D": "sh"
  307. # THAANA EXTENSION FOR ARABIC LETTER SAADHU
  308. "\u0787U": "07B0\u079E=s\u0323s\u0323"
  309. "\u079E": "s\u0323"
  310. # THAANA EXTENSION FOR ARABIC LETTER TO
  311. "\u0787U": "07B0\u07A0=t\u0324t\u0324"
  312. "\u07A0": "t\u0324"
  313. # THAANA EXTENSION FOR ARABIC LETTER ZO
  314. "\u0787U": "07B0\u07A1=d\u0332d\u0332"
  315. "\u07A1": "d\u0332"
  316. # THAANA EXTENSION FOR ARABIC LETTER AINU
  317. "\u0787U": "07B0\u07A2=\u02BB\u02BB"
  318. "\u07A2": "\u02BB"
  319. # THAANA EXTENSION FOR ARABIC LETTER GHAINU
  320. "\u0787\u07B0\u07A3": "ghgh"
  321. "\u07A3": "gh"
  322. # THAANA EXTENSION FOR ARABIC LETTER QAAFU
  323. "\u0787\u07B0\u07A4": "qq"
  324. "\u07A4": "q"
  325. # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
  326. "\u0787\u07B0\u07A5": "ww"
  327. "\u07A5": "w"
  328. # INITIAL VOWELS FOLLOWING ALIF (ALIF OMITTED IN ROMANIZATION)
  329. "\u0020\u0787\u07A7": "\u0020a\u0304"
  330. "\u0020\u0787\u07A6": "\u0020a"
  331. "\u0020\u0787\u07AD": "\u0020e\u0304"
  332. "\u0020\u0787\u07AC": "\u0020e"
  333. "\u0020\u0787\u07A9": "\u0020i\u0304"
  334. "\u0020\u0787\u07A8": "\u0020i"
  335. "\u0020\u0787\u07AF": "\u0020o\u0304"
  336. "\u0020\u0787\u07AE": "\u0020o"
  337. "\u0020\u0787\u07AB": "\u0020u\u0304"
  338. "\u0020\u0787\u07AB": "\u0020u"
  339. # THAANA ALIF APPEARING MEDIALLY WITH ANY VOWEL
  340. # IS ROMANIZED WITH APOSTROPHE FOLLOWED BY THE SAME VOWEL
  341. "\u0787\u07A7": "\u0027a\u0304"
  342. "\u0787\u07A6": "\u0027a"
  343. "\u0787\u07AD": "\u0027e\u0304"
  344. "\u0787\u07AC": "\u0027e"
  345. "\u0787\u07A9": "\u0027i\u0304"
  346. "\u0787\u07A8": "\u0027i"
  347. "\u0787\u07AF": "\u0027o\u0304"
  348. "\u0787\u07AE": "\u0027o"
  349. "\u0787\u07AB": "\u0027u\u0304"
  350. "\u0787\u07AB": "\u0027u"
  351. # THAANA MEDIAL OR FINAL VOWELS OVER CONSONANTS EXCEPT ALIF
  352. # THIS PRODUCES NO UPPERCASE UPPERCASE VOWELS
  353. "\u07A7": "a\u0304"
  354. "\u07A6": "a"
  355. "\u07AD": "e\u0304"
  356. "\u07AC": "e"
  357. "\u07A9": "i\u0304"
  358. "\u07A8": "i"
  359. "\u07AF": "o\u0304"
  360. "\u07AE": "o"
  361. "\u07AB": "u\u0304"
  362. "\u07AB": "u"
  363. # THAANA LETTER ALIF--ANY REMAINING AFTER CONVERSION MAP TO APOSTROPHE
  364. "\u0787": "\u0027"