divehi_thaana.yml 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. general:
  2. name: Divehi (Thaana)
  3. roman_to_script:
  4. map:
  5. # COMMON COMBINING CHARACTERS (always follow a base letter)
  6. # combining acute \u0301
  7. # combining tilde \u0303
  8. # combining macron \u0304
  9. # combining dot above \u0307
  10. # combining dot below \u0323
  11. # combining diaeresis below \u0324
  12. # combining low line \u0332
  13. # combining breve below \u032E
  14. # ayn (spacing) \u02BB
  15. # apostrophe (spacing) \u0027
  16. # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
  17. # THAANA LETTER ALIF APPEARING MEDIALLY WITH ANY VOWEL IS ROMANIZED WITH APOSTROPHE
  18. # ORIGINAL VOWEL ASSOCIATED WITH THE ALIF CANNOT BE REGENERATED DURING CONVERSION
  19. "\u0027": "\u0787"
  20. # THAANA LETTER SHAVIYANI WITH SUKUN(BREVE 032E)
  21. "H\u032E": "\u0781\u07B0"
  22. "hh": "\u0787\u07B0\u0780"
  23. "h\u032E": "\u0781\u07B0"
  24. # THAANA LETTER ALIFU FINAL WITH SUKUN (LOW LINE 0332)
  25. "%H\u0332": "\u0787\u07B0"
  26. "%h\u0332": "\u0787\u07B0"
  27. "H": "\u0780"
  28. "h": "\u0780"
  29. "S\u0301": "\u0781"
  30. "ss": "\u0787\u07B0\u0781"
  31. "s\u0301": "\u0781"
  32. # THAANA LETTER MEDIAL NOONU WITHOUT SUKUN (DOT ABOVE 0307)
  33. "M\u0307": "\u0782"
  34. "m\u0307": "\u0782"
  35. "mm": "\u0787\u07B0\u0789"
  36. "N": "\u0782"
  37. "nn": "\u0787\u07B0\u0782"
  38. "n": "\u0782"
  39. "R": "\u0783"
  40. "rr": "\u0787\u07B0\u0783"
  41. "r": "\u0783"
  42. "B": "\u0784"
  43. "bb": "\u0787\u07B0\u0784"
  44. "b": "\u0784"
  45. # THAANA LETTER "L/l" WITH DOT BELOW (0323)
  46. "L\u0323": "\u0785"
  47. "l\u0323": "\u0785"
  48. "K": "\u0786"
  49. "kk": "\u0787\u07B0\u0786"
  50. "k": "\u0786"
  51. # THAANA LETTER ALIF--OFTEN OMITTED IN ROMANIZATION (SEE VOWEL CONVERSION BELOW)
  52. "\u0027": "\u0787"
  53. "V": "\u0788"
  54. "vv": "\u0787\u07B0\u0788"
  55. "v": "\u0788"
  56. "M": "\u0789"
  57. "m": "\u0789"
  58. "F": "\u078A"
  59. "ff": "\u0787\u07B0\u078A"
  60. "f": "\u078A"
  61. # THAANA LETTER "D/d" WITH DOT BELOW (0323)
  62. "D\u0323": "\u0791"
  63. "d\u0323": "\u0791"
  64. "D": "\u078B"
  65. "dd": "\u0787\u07B0\u078B"
  66. "d": "\u078B"
  67. # THAANA LETTER "T/t" WITH DOT BELOW (0323)
  68. "T\u0323": "\u0793"
  69. "t\u0323": "\u0793"
  70. "T\u0324T": "\u078C\u07B0\u078C"
  71. "T\u0324t": "\u078C\u07B0\u078C"
  72. "t\u0324t": "\u078C\u07B0\u078C"
  73. "t\u0324T": "\u078C\u07B0\u078C"
  74. "T": "\u078C"
  75. "tt": "\u0787\u07B0\u078C"
  76. "t": "\u078C"
  77. "L": "\u078D"
  78. "ll": "\u0787\u07B0\u078D"
  79. "l": "\u078D"
  80. "G": "\u078E"
  81. "gg": "\u0787\u07B0\u078E"
  82. "g": "\u078E"
  83. # THAANA LETTER "N/n' WITH TILDE (0303)
  84. "N\u0303": "\u078F"
  85. "n\u0303": "\u078F"
  86. "S": "\u0790"
  87. "ss": "\u0787\u07B0\u0790"
  88. "s": "\u0790"
  89. "Z": "\u0792"
  90. "zz": "\u0787\u07B0\u0792"
  91. "z": "\u0792"
  92. "Y": "\u0794"
  93. "yy": "\u0787\u07B0\u0794"
  94. "y": "\u0794"
  95. "P": "\u0795"
  96. "p": "\u0795"
  97. "pp": "\u0787\u07B0\u0795"
  98. "J": "\u0796"
  99. "jj": "\u0787\u07B0\u0796"
  100. "j": "\u0796"
  101. "C": "\u0797"
  102. "cc": "\u0787\u07B0\u0797"
  103. "c": "\u0797"
  104. # THAANA EXTENSION FOR ARABIC LOAN WORDS AND NAMES
  105. # THAANA EXTENSION FOR ARABIC LETTER TTAA
  106. "TH": "\u0798"
  107. "Th": "\u0798"
  108. "thth": "\u0787\u07B0\u0798"
  109. "th": "\u0798"
  110. # THAANA EXTENSION FOR ARABIC LETTER HHAA
  111. "H\u0323": "\u0799"
  112. "h\u0323": "\u0799"
  113. # THAANA EXTENSION FOR ARABIC LETTER KHAA
  114. "KH": "\u079A"
  115. "Kh": "\u079A"
  116. "khkh": "\u0787\u07B0\u079A"
  117. "kh": "\u079A"
  118. # THAANA EXTENSION FOR ARABIC LETTER THAALU
  119. "DH": "\u079B"
  120. "Dh": "\u079B"
  121. "dhdh": "\u0787\u07B0\u079B"
  122. "dh": "\u079B"
  123. # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
  124. "Z\u0332": "\u079C"
  125. "z\u0332": "\u079C"
  126. # THAANA EXTENSION FOR ARABIC LETTER SHEENU
  127. "SH": "\u079D"
  128. "Sh": "\u079D"
  129. "shsh": "\u0787\u07B0\u079D"
  130. "sh": "\u079D"
  131. # THAANA EXTENSION FOR ARABIC LETTER SAADHU
  132. "S\u0323": "\u079E"
  133. "s\u0323": "\u079E"
  134. # THAANA EXTENSION FOR ARABIC LETTER TO
  135. "T\u0324": "\u07A0"
  136. "t\u0324": "\u07A0"
  137. # THAANA EXTENSION FOR ARABIC LETTER ZO
  138. "D\u0332": "\u07A1"
  139. "d\u0332": "\u07A1"
  140. # THAANA EXTENSION FOR ARABIC LETTER AINU
  141. "\u02BB": "\u07A2"
  142. # THAANA EXTENSION FOR ARABIC LETTER GHAINU
  143. "GH": "\u07A3"
  144. "Gh": "\u07A3"
  145. "ghgh": "\u0787\u07B0\u07A3"
  146. "gh": "\u07A3"
  147. # THAANA EXTENSION FOR ARABIC LETTER QAAFU
  148. "Q": "\u07A4"
  149. "qq": "\u0787\u07B0\u07A4"
  150. "q": "\u07A4"
  151. # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
  152. "W": "\u07A5"
  153. "ww": "\u0787\u07B0\u07A5"
  154. "w": "\u07A5"
  155. # INITIAL (AND UPPERCASE) VOWELS THAT CONVERT
  156. # TO ALIF FOLLOWED BY VOWEL (ALIF OMITTED IN ROMANIZATION)
  157. "A\u0304%": "\u0787\u07A7"
  158. "A%": "\u0787\u07A6"
  159. "\u0020a\u0304": "\u0020\u0787\u07A7"
  160. "\u0020a": "\u0020\u0787\u07A6"
  161. "E\u0304%": "\u0787\u07AD"
  162. "E%": "\u0787\u07AC"
  163. "\u0020e\u0304": "\u0020\u0787\u07AD"
  164. "\u0020e": "\u0020\u0787\u07AC"
  165. "I\u0304%": "\u0787\u07A9"
  166. "I%": "\u0787\u07A8"
  167. "\u0020i\u0304": "\u0020\u0787\u07A9"
  168. "\u0020i": "\u0020\u0787\u07A8"
  169. "O\u0304%": "\u0787\u07AF"
  170. "O%": "\u0787\u07AE"
  171. "\u0020o\u0304": "\u0020\u0787\u07AF"
  172. "\u0020o": "\u0020\u0787\u07AE"
  173. "U\u0304%": "\u0787\u07AB"
  174. "U%": "\u0787\u07AB"
  175. "\u0020u\u0304": "\u0020\u0787\u07AB"
  176. "\u0020u": "\u0020\u0787\u07AB"
  177. # THAANA MEDIAL OR FINAL VOWELS OVER ANY CONSONANT
  178. # (THIS ASSUMES NO UPPERCASE VOWELS REMAIN)
  179. "a\u0304": "\u07A7"
  180. "a": "\u07A6"
  181. "e\u0304": "\u07AD"
  182. "e": "\u07AC"
  183. "i\u0304": "\u07A9"
  184. "i": "\u07A8"
  185. "o\u0304": "\u07AF"
  186. "o": "\u07AE"
  187. "u\u0304": "\u07AB"
  188. "u": "\u07AB"
  189. # THAANA SUKUN (SILENCE) MARK; ONLY GENERATED IN OTHER COMBINATIONS
  190. # "": "\u07B0"
  191. script_to_roman:
  192. map:
  193. # THAANA LETTER NOONU WITHOUT SUKUN FOLLOWED BY A CONSONANT
  194. # IS ROMANIZED AS "m"+ DOT ABOVE (0307) THEN THE CONSONANT
  195. # OTHERWISE LETTER NOONU MAPS TO "n"
  196. "\u0782\u0780": "m\u0307\u0780"
  197. "\u0782\u0781": "m\u0307\u0781"
  198. "\u0782\u0783": "m\u0307\u0783"
  199. "\u0782\u0784": "m\u0307\u0784"
  200. "\u0782\u0785": "m\u0307\u0785"
  201. "\u0782\u0786": "m\u0307\u0786"
  202. "\u0782\u0788": "m\u0307\u0788"
  203. "\u0782\u0789": "m\u0307\u0789"
  204. "\u0782\u078A": "m\u0307\u078A"
  205. "\u0782\u0791": "m\u0307\u0791"
  206. "\u0782\u078B": "m\u0307\u078B"
  207. "\u0782\u078C": "m\u0307\u078C"
  208. "\u0782\u078D": "m\u0307\u078D"
  209. "\u0782\u078E": "m\u0307\u078E"
  210. "\u0782\u078F": "m\u0307\u078F"
  211. "\u0782\u0790": "m\u0307\u0790"
  212. "\u0782\u0792": "m\u0307\u0792"
  213. "\u0782\u0794": "m\u0307\u0794"
  214. "\u0782\u0795": "m\u0307\u0795"
  215. "\u0782\u0796": "m\u0307\u0796"
  216. "\u0782\u0797": "m\u0307\u0797"
  217. "\u0782\u0798": "m\u0307\u0798"
  218. "\u0782\u0799": "m\u0307\u0799"
  219. "\u0782\u079A": "m\u0307\u079A"
  220. "\u0782\u079B": "m\u0307\u079B"
  221. "\u0782\u079C": "m\u0307\u079C"
  222. "\u0782\u079D": "m\u0307\u079D"
  223. "\u0782\u079E": "m\u0307\u079E"
  224. "\u0782\u07A0": "m\u0307\u07A0"
  225. "\u0782\u07A1": "m\u0307\u07A1"
  226. "\u0782\u07A2": "m\u0307\u07A2"
  227. "\u0782\u07A3": "m\u0307\u07A3"
  228. "\u0782\u07A4": "m\u0307\u07A4"
  229. "\u0782\u07A5": "m\u0307\u07A5"
  230. # THAANA FINAL ALIFU WITH SUKUN (SILENCE) MARK
  231. # IS ROMANIZED WITH "h"+LOW LINE (0332)
  232. "\u0787\u07B0\u0020": "h\u0332\u0020"
  233. # THAANA SHAVIYANI WITH SUKUN (SILENCE) MARK
  234. # IS ROMANIZED WITH "h"+BREVE BELOW
  235. "\u0781\u07B0": "h\u032E"
  236. "\u0787\u07B0\u0780": "hh"
  237. "\u0780": "h"
  238. # THAANA ALIF WITH SUKUN AND SHAVIYANI
  239. "\u0787\u07B0\u0781": "s\u0301s\u0301"
  240. "\u0787\u07B0": "h\u0332"
  241. "\u0781": "s\u0301"
  242. "\u0787\u07B0\u0782": "nn"
  243. "\u0782": "n"
  244. "\u0787\u07B0\u0783": "rr"
  245. "\u0783": "r"
  246. "\u0787\u07B0\u0784": "bb"
  247. "\u0784": "b"
  248. "\u0787\u07B0\u0785": "l\u0323"
  249. "\u0785": "l\u0323"
  250. "\u0787\u07B0\u0786": "kk"
  251. "\u0786": "k"
  252. "\u0787\u07B0\u0788": "vv"
  253. "\u0788": "v"
  254. "\u0787\u07B0\u0789": "mm"
  255. "\u0789": "m"
  256. "\u0787\u07B0\u078A": "ff"
  257. "\u078A": "f"
  258. # THAANA LETTER "D/d" WITH DOT BELOW (0323)
  259. "\u0787\u07B0\u0791": "d\u0323d\u0323"
  260. "\u0791": "d\u0323"
  261. "\u0787\u07B0\u078B": "dd"
  262. "\u078B": "d"
  263. # THAANA LETTER "T/t" WITH DOT BELOW (0323)
  264. "\u078C\u07B0\u078C": "t\u0324t"
  265. "\u0787\u07B0\u078C": "tt"
  266. "\u0793": "t\u0323"
  267. "\u078C": "t"
  268. "\u0787\u07B0\u078D": "ll"
  269. "\u078D": "l"
  270. "\u0787\u07B0\u078E": "gg"
  271. "\u078E": "g"
  272. # THAANA LETTER "N/n' WITH TILDE (0303)
  273. "\u0787\u07B0\u078F": "n\u0303n\u0303"
  274. "\u078F": "n\u0303"
  275. "\u0787\u07B0\u0790": "ss"
  276. "\u0790": "s"
  277. "\u0787\u07B0\u0792": "zz"
  278. "\u0792": "z"
  279. "\u0787\u07B0\u0794": "yy"
  280. "\u0794": "y"
  281. "\u0787\u07B0\u0795": "pp"
  282. "\u0795": "p"
  283. "\u0787\u07B0\u0796": "jj"
  284. "\u0796": "j"
  285. "\u0787\u07B0\u0797": "cc"
  286. "\u0797": "c"
  287. # THAANA EXTENSION FOR ARABIC LETTER TTAA
  288. "\u0787\u07B0\u0798": "thth"
  289. "\u0798": "th"
  290. # THAANA EXTENSION FOR ARABIC LETTER HHAA
  291. "\u0787\u07B0\u0799": "h\u0323h\u0323"
  292. "\u0799": "h\u0323"
  293. # THAANA EXTENSION FOR ARABIC LETTER KHAA
  294. "\u0787\u07B0\u079A": "khkh"
  295. "\u079A": "kh"
  296. # THAANA EXTENSION FOR ARABIC LETTER THAALU
  297. "\u0787\u07B0\u079B": "dhdh"
  298. "\u079B": "dh"
  299. # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
  300. "\u0787\u07B0\u079C": "z\u0332z\u0332"
  301. "\u079C": "z\u0332"
  302. # THAANA EXTENSION FOR ARABIC LETTER SHEENU
  303. "\u0787\u07B0\u079D": "shsh"
  304. "\u079D": "sh"
  305. # THAANA EXTENSION FOR ARABIC LETTER SAADHU
  306. "\u0787U": "07B0\u079E=s\u0323s\u0323"
  307. "\u079E": "s\u0323"
  308. # THAANA EXTENSION FOR ARABIC LETTER TO
  309. "\u0787U": "07B0\u07A0=t\u0324t\u0324"
  310. "\u07A0": "t\u0324"
  311. # THAANA EXTENSION FOR ARABIC LETTER ZO
  312. "\u0787U": "07B0\u07A1=d\u0332d\u0332"
  313. "\u07A1": "d\u0332"
  314. # THAANA EXTENSION FOR ARABIC LETTER AINU
  315. "\u0787U": "07B0\u07A2=\u02BB\u02BB"
  316. "\u07A2": "\u02BB"
  317. # THAANA EXTENSION FOR ARABIC LETTER GHAINU
  318. "\u0787\u07B0\u07A3": "ghgh"
  319. "\u07A3": "gh"
  320. # THAANA EXTENSION FOR ARABIC LETTER QAAFU
  321. "\u0787\u07B0\u07A4": "qq"
  322. "\u07A4": "q"
  323. # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
  324. "\u0787\u07B0\u07A5": "ww"
  325. "\u07A5": "w"
  326. # INITIAL VOWELS FOLLOWING ALIF (ALIF OMITTED IN ROMANIZATION)
  327. "\u0020\u0787\u07A7": "\u0020a\u0304"
  328. "\u0020\u0787\u07A6": "\u0020a"
  329. "\u0020\u0787\u07AD": "\u0020e\u0304"
  330. "\u0020\u0787\u07AC": "\u0020e"
  331. "\u0020\u0787\u07A9": "\u0020i\u0304"
  332. "\u0020\u0787\u07A8": "\u0020i"
  333. "\u0020\u0787\u07AF": "\u0020o\u0304"
  334. "\u0020\u0787\u07AE": "\u0020o"
  335. "\u0020\u0787\u07AB": "\u0020u\u0304"
  336. "\u0020\u0787\u07AB": "\u0020u"
  337. # THAANA ALIF APPEARING MEDIALLY WITH ANY VOWEL
  338. # IS ROMANIZED WITH APOSTROPHE FOLLOWED BY THE SAME VOWEL
  339. "\u0787\u07A7": "\u0027a\u0304"
  340. "\u0787\u07A6": "\u0027a"
  341. "\u0787\u07AD": "\u0027e\u0304"
  342. "\u0787\u07AC": "\u0027e"
  343. "\u0787\u07A9": "\u0027i\u0304"
  344. "\u0787\u07A8": "\u0027i"
  345. "\u0787\u07AF": "\u0027o\u0304"
  346. "\u0787\u07AE": "\u0027o"
  347. "\u0787\u07AB": "\u0027u\u0304"
  348. "\u0787\u07AB": "\u0027u"
  349. # THAANA MEDIAL OR FINAL VOWELS OVER CONSONANTS EXCEPT ALIF
  350. # THIS PRODUCES NO UPPERCASE UPPERCASE VOWELS
  351. "\u07A7": "a\u0304"
  352. "\u07A6": "a"
  353. "\u07AD": "e\u0304"
  354. "\u07AC": "e"
  355. "\u07A9": "i\u0304"
  356. "\u07A8": "i"
  357. "\u07AF": "o\u0304"
  358. "\u07AE": "o"
  359. "\u07AB": "u\u0304"
  360. "\u07AB": "u"
  361. # THAANA LETTER ALIF--ANY REMAINING AFTER CONVERSION MAP TO APOSTROPHE
  362. "\u0787": "\u0027"