ThaanaRomanization.cfg 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. # Original table by Randall K. Barry, 23 February 2021
  2. [General]
  3. Name=Thaana
  4. # Script used by the Divehi language of the Maldives
  5. # UNICODE THAANA CHARACTER RANGE: U+0780-U+07BF
  6. Truncation=%
  7. [RomanToScript]
  8. FieldsIncluded=100 110 111 130 240 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
  9. SubfieldsAlwaysExcluded=uvxy0123456789
  10. OtherSubfieldsExcludedByTag=100/e 110/e 111/j 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
  11. Subfield6Code=(4
  12. # RDA boilerplate phrases not transliterated:
  13. Place of publication not identified=Place of publication not identified
  14. publisher not identified=publisher not identified
  15. date of publication not identified=date of publication not identified
  16. At head of title=At head of title
  17. Colophon=Colophon
  18. # COMMON COMBINING CHARACTERS (always follow a base letter)
  19. # combining acute U+0301
  20. # combining tilde U+0303
  21. # combining macron U+0304
  22. # combining dot above U+0307
  23. # combining dot below U+0323
  24. # combining diaeresis below U+0324
  25. # combining low line U+0332
  26. # combining breve below U+032E
  27. # ayn (spacing) U+02BB
  28. # apostrophe (spacing) U+0027
  29. # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
  30. # THAANA LETTER ALIF APPEARING MEDIALLY WITH ANY VOWEL IS ROMANIZED WITH APOSTROPHE
  31. # ORIGINAL VOWEL ASSOCIATED WITH THE ALIF CANNOT BE REGENERATED DURING CONVERSION
  32. U+0027=U+0787
  33. # THAANA LETTER SHAVIYANI WITH SUKUN(BREVE 032E)
  34. HU+032E=U+0781U+07B0
  35. hh=U+0787U+07B0U+0780
  36. hU+032E=U+0781U+07B0
  37. # THAANA LETTER ALIFU FINAL WITH SUKUN (LOW LINE 0332)
  38. %HU+0332=U+0787U+07B0
  39. %hU+0332=U+0787U+07B0
  40. H=U+0780
  41. h=U+0780
  42. SU+0301=U+0781
  43. ss=U+0787U+07B0U+0781
  44. sU+0301=U+0781
  45. # THAANA LETTER MEDIAL NOONU WITHOUT SUKUN (DOT ABOVE 0307)
  46. MU+0307=U+0782
  47. mU+0307=U+0782
  48. mm=U+0787U+07B0U+0789
  49. N=U+0782
  50. nn=U+0787U+07B0U+0782
  51. n=U+0782
  52. R=U+0783
  53. rr=U+0787U+07B0U+0783
  54. r=U+0783
  55. B=U+0784
  56. bb=U+0787U+07B0U+0784
  57. b=U+0784
  58. # THAANA LETTER "L/l" WITH DOT BELOW (0323)
  59. LU+0323=U+0785
  60. lU+0323=U+0785
  61. K=U+0786
  62. kk=U+0787U+07B0U+0786
  63. k=U+0786
  64. # THAANA LETTER ALIF--OFTEN OMITTED IN ROMANIZATION (SEE VOWEL CONVERSION BELOW)
  65. U+0027=U+0787
  66. V=U+0788
  67. vv=U+0787U+07B0U+0788
  68. v=U+0788
  69. M=U+0789
  70. m=U+0789
  71. F=U+078A
  72. ff=U+0787U+07B0U+078A
  73. f=U+078A
  74. # THAANA LETTER "D/d" WITH DOT BELOW (0323)
  75. DU+0323=U+0791
  76. dU+0323=U+0791
  77. D=U+078B
  78. dd=U+0787U+07B0U+078B
  79. d=U+078B
  80. # THAANA LETTER "T/t" WITH DOT BELOW (0323)
  81. TU+0323=U+0793
  82. tU+0323=U+0793
  83. TU+0324T=U+078CU+07B0U+078C
  84. TU+0324t=U+078CU+07B0U+078C
  85. tU+0324t=U+078CU+07B0U+078C
  86. tU+0324T=U+078CU+07B0U+078C
  87. T=U+078C
  88. tt=U+0787U+07B0U+078C
  89. t=U+078C
  90. L=U+078D
  91. ll=U+0787U+07B0U+078D
  92. l=U+078D
  93. G=U+078E
  94. gg=U+0787U+07B0U+078E
  95. g=U+078E
  96. # THAANA LETTER "N/n' WITH TILDE (0303)
  97. NU+0303=U+078F
  98. nU+0303=U+078F
  99. S=U+0790
  100. ss=U+0787U+07B0U+0790
  101. s=U+0790
  102. Z=U+0792
  103. zz=U+0787U+07B0U+0792
  104. z=U+0792
  105. Y=U+0794
  106. yy=U+0787U+07B0U+0794
  107. y=U+0794
  108. P=U+0795
  109. p=U+0795
  110. pp=U+0787U+07B0U+0795
  111. J=U+0796
  112. jj=U+0787U+07B0U+0796
  113. j=U+0796
  114. C=U+0797
  115. cc=U+0787U+07B0U+0797
  116. c=U+0797
  117. # THAANA EXTENSION FOR ARABIC LOAN WORDS AND NAMES
  118. # THAANA EXTENSION FOR ARABIC LETTER TTAA
  119. TH=U+0798
  120. Th=U+0798
  121. thth=U+0787U+07B0U+0798
  122. th=U+0798
  123. # THAANA EXTENSION FOR ARABIC LETTER HHAA
  124. HU+0323=U+0799
  125. hU+0323=U+0799
  126. # THAANA EXTENSION FOR ARABIC LETTER KHAA
  127. KH=U+079A
  128. Kh=U+079A
  129. khkh=U+0787U+07B0U+079A
  130. kh=U+079A
  131. # THAANA EXTENSION FOR ARABIC LETTER THAALU
  132. DH=U+079B
  133. Dh=U+079B
  134. dhdh=U+0787U+07B0U+079B
  135. dh=U+079B
  136. # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
  137. ZU+0332=U+079C
  138. zU+0332=U+079C
  139. # THAANA EXTENSION FOR ARABIC LETTER SHEENU
  140. SH=U+079D
  141. Sh=U+079D
  142. shsh=U+0787U+07B0U+079D
  143. sh=U+079D
  144. # THAANA EXTENSION FOR ARABIC LETTER SAADHU
  145. SU+0323=U+079E
  146. sU+0323=U+079E
  147. # THAANA EXTENSION FOR ARABIC LETTER TO
  148. TU+0324=U+07A0
  149. tU+0324=U+07A0
  150. # THAANA EXTENSION FOR ARABIC LETTER ZO
  151. DU+0332=U+07A1
  152. dU+0332=U+07A1
  153. # THAANA EXTENSION FOR ARABIC LETTER AINU
  154. U+02BB=U+07A2
  155. # THAANA EXTENSION FOR ARABIC LETTER GHAINU
  156. GH=U+07A3
  157. Gh=U+07A3
  158. ghgh=U+0787U+07B0U+07A3
  159. gh=U+07A3
  160. # THAANA EXTENSION FOR ARABIC LETTER QAAFU
  161. Q=U+07A4
  162. qq=U+0787U+07B0U+07A4
  163. q=U+07A4
  164. # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
  165. W=U+07A5
  166. ww=U+0787U+07B0U+07A5
  167. w=U+07A5
  168. # INITIAL (AND UPPERCASE) VOWELS THAT CONVERT
  169. # TO ALIF FOLLOWED BY VOWEL (ALIF OMITTED IN ROMANIZATION)
  170. AU+0304%=U+0787U+07A7
  171. A%=U+0787U+07A6
  172. U+0020aU+0304=U+0020U+0787U+07A7
  173. U+0020a=U+0020U+0787U+07A6
  174. EU+0304%=U+0787U+07AD
  175. E%=U+0787U+07AC
  176. U+0020eU+0304=U+0020U+0787U+07AD
  177. U+0020e=U+0020U+0787U+07AC
  178. IU+0304%=U+0787U+07A9
  179. I%=U+0787U+07A8
  180. U+0020iU+0304=U+0020U+0787U+07A9
  181. U+0020i=U+0020U+0787U+07A8
  182. OU+0304%=U+0787U+07AF
  183. O%=U+0787U+07AE
  184. U+0020oU+0304=U+0020U+0787U+07AF
  185. U+0020o=U+0020U+0787U+07AE
  186. UU+0304%=U+0787U+07AB
  187. U%=U+0787U+07AB
  188. U+0020uU+0304=U+0020U+0787U+07AB
  189. U+0020u=U+0020U+0787U+07AB
  190. # THAANA MEDIAL OR FINAL VOWELS OVER ANY CONSONANT
  191. # (THIS ASSUMES NO UPPERCASE VOWELS REMAIN)
  192. aU+0304=U+07A7
  193. a=U+07A6
  194. eU+0304=U+07AD
  195. e=U+07AC
  196. iU+0304=U+07A9
  197. i=U+07A8
  198. oU+0304=U+07AF
  199. o=U+07AE
  200. uU+0304=U+07AB
  201. u=U+07AB
  202. # THAANA SUKUN (SILENCE) MARK; ONLY GENERATED IN OTHER COMBINATIONS
  203. # =U+07B0
  204. [ScriptToRoman]
  205. FieldsIncluded=100 110 111 130 240 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
  206. SubfieldsAlwaysExcluded=uvxy0123456789
  207. OtherSubfieldsExcludedByTag=100/e 110/e 111/j 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
  208. # RDA boilerplate phrases not transliterated:
  209. Place of publication not identified=Place of publication not identified
  210. publisher not identified=publisher not identified
  211. date of publication not identified=date of publication not identified
  212. At head of title=At head of title
  213. # THAANA LETTER NOONU WITHOUT SUKUN FOLLOWED BY A CONSONANT
  214. # IS ROMANIZED AS "m"+ DOT ABOVE (0307) THEN THE CONSONANT
  215. # OTHERWISE LETTER NOONU MAPS TO "n"
  216. U+0782U+0780=mU+0307U+0780
  217. U+0782U+0781=mU+0307U+0781
  218. U+0782U+0783=mU+0307U+0783
  219. U+0782U+0784=mU+0307U+0784
  220. U+0782U+0785=mU+0307U+0785
  221. U+0782U+0786=mU+0307U+0786
  222. U+0782U+0788=mU+0307U+0788
  223. U+0782U+0789=mU+0307U+0789
  224. U+0782U+078A=mU+0307U+078A
  225. U+0782U+0791=mU+0307U+0791
  226. U+0782U+078B=mU+0307U+078B
  227. U+0782U+078C=mU+0307U+078C
  228. U+0782U+078D=mU+0307U+078D
  229. U+0782U+078E=mU+0307U+078E
  230. U+0782U+078F=mU+0307U+078F
  231. U+0782U+0790=mU+0307U+0790
  232. U+0782U+0792=mU+0307U+0792
  233. U+0782U+0794=mU+0307U+0794
  234. U+0782U+0795=mU+0307U+0795
  235. U+0782U+0796=mU+0307U+0796
  236. U+0782U+0797=mU+0307U+0797
  237. U+0782U+0798=mU+0307U+0798
  238. U+0782U+0799=mU+0307U+0799
  239. U+0782U+079A=mU+0307U+079A
  240. U+0782U+079B=mU+0307U+079B
  241. U+0782U+079C=mU+0307U+079C
  242. U+0782U+079D=mU+0307U+079D
  243. U+0782U+079E=mU+0307U+079E
  244. U+0782U+07A0=mU+0307U+07A0
  245. U+0782U+07A1=mU+0307U+07A1
  246. U+0782U+07A2=mU+0307U+07A2
  247. U+0782U+07A3=mU+0307U+07A3
  248. U+0782U+07A4=mU+0307U+07A4
  249. U+0782U+07A5=mU+0307U+07A5
  250. # THAANA FINAL ALIFU WITH SUKUN (SILENCE) MARK
  251. # IS ROMANIZED WITH "h"+LOW LINE (0332)
  252. U+0787U+07B0U+0020=hU+0332U+0020
  253. # THAANA SHAVIYANI WITH SUKUN (SILENCE) MARK
  254. # IS ROMANIZED WITH "h"+BREVE BELOW
  255. U+0781U+07B0=hU+032E
  256. U+0787U+07B0U+0780=hh
  257. U+0780=h
  258. # THAANA ALIF WITH SUKUN AND SHAVIYANI
  259. U+0787U+07B0U+0781=sU+0301sU+0301
  260. U+0787U+07B0=hU+0332
  261. U+0781=sU+0301
  262. U+0787U+07B0U+0782=nn
  263. U+0782=n
  264. U+0787U+07B0U+0783=rr
  265. U+0783=r
  266. U+0787U+07B0U+0784=bb
  267. U+0784=b
  268. U+0787U+07B0U+0785=lU+0323
  269. U+0785=lU+0323
  270. U+0787U+07B0U+0786=kk
  271. U+0786=k
  272. U+0787U+07B0U+0788=vv
  273. U+0788=v
  274. U+0787U+07B0U+0789=mm
  275. U+0789=m
  276. U+0787U+07B0U+078A=ff
  277. U+078A=f
  278. # THAANA LETTER "D/d" WITH DOT BELOW (0323)
  279. U+0787U+07B0U+0791=dU+0323dU+0323
  280. U+0791=dU+0323
  281. U+0787U+07B0U+078B=dd
  282. U+078B=d
  283. # THAANA LETTER "T/t" WITH DOT BELOW (0323)
  284. U+078CU+07B0U+078C=tU+0324t
  285. U+0787U+07B0U+078C=tt
  286. U+0793=tU+0323
  287. U+078C=t
  288. U+0787U+07B0U+078D=ll
  289. U+078D=l
  290. U+0787U+07B0U+078E=gg
  291. U+078E=g
  292. # THAANA LETTER "N/n' WITH TILDE (0303)
  293. U+0787U+07B0U+078F=nU+0303nU+0303
  294. U+078F=nU+0303
  295. U+0787U+07B0U+0790=ss
  296. U+0790=s
  297. U+0787U+07B0U+0792=zz
  298. U+0792=z
  299. U+0787U+07B0U+0794=yy
  300. U+0794=y
  301. U+0787U+07B0U+0795=pp
  302. U+0795=p
  303. U+0787U+07B0U+0796=jj
  304. U+0796=j
  305. U+0787U+07B0U+0797=cc
  306. U+0797=c
  307. # THAANA EXTENSION FOR ARABIC LETTER TTAA
  308. U+0787U+07B0U+0798=thth
  309. U+0798=th
  310. # THAANA EXTENSION FOR ARABIC LETTER HHAA
  311. U+0787U+07B0U+0799=hU+0323hU+0323
  312. U+0799=hU+0323
  313. # THAANA EXTENSION FOR ARABIC LETTER KHAA
  314. U+0787U+07B0U+079A=khkh
  315. U+079A=kh
  316. # THAANA EXTENSION FOR ARABIC LETTER THAALU
  317. U+0787U+07B0U+079B=dhdh
  318. U+079B=dh
  319. # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
  320. U+0787U+07B0U+079C=zU+0332zU+0332
  321. U+079C=zU+0332
  322. # THAANA EXTENSION FOR ARABIC LETTER SHEENU
  323. U+0787U+07B0U+079D=shsh
  324. U+079D=sh
  325. # THAANA EXTENSION FOR ARABIC LETTER SAADHU
  326. U+0787U=07B0U+079E=sU+0323sU+0323
  327. U+079E=sU+0323
  328. # THAANA EXTENSION FOR ARABIC LETTER TO
  329. U+0787U=07B0U+07A0=tU+0324tU+0324
  330. U+07A0=tU+0324
  331. # THAANA EXTENSION FOR ARABIC LETTER ZO
  332. U+0787U=07B0U+07A1=dU+0332dU+0332
  333. U+07A1=dU+0332
  334. # THAANA EXTENSION FOR ARABIC LETTER AINU
  335. U+0787U=07B0U+07A2=U+02BBU+02BB
  336. U+07A2=U+02BB
  337. # THAANA EXTENSION FOR ARABIC LETTER GHAINU
  338. U+0787U+07B0U+07A3=ghgh
  339. U+07A3=gh
  340. # THAANA EXTENSION FOR ARABIC LETTER QAAFU
  341. U+0787U+07B0U+07A4=qq
  342. U+07A4=q
  343. # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
  344. U+0787U+07B0U+07A5=ww
  345. U+07A5=w
  346. # INITIAL VOWELS FOLLOWING ALIF (ALIF OMITTED IN ROMANIZATION)
  347. U+0020U+0787U+07A7=U+0020aU+0304
  348. U+0020U+0787U+07A6=U+0020a
  349. U+0020U+0787U+07AD=U+0020eU+0304
  350. U+0020U+0787U+07AC=U+0020e
  351. U+0020U+0787U+07A9=U+0020iU+0304
  352. U+0020U+0787U+07A8=U+0020i
  353. U+0020U+0787U+07AF=U+0020oU+0304
  354. U+0020U+0787U+07AE=U+0020o
  355. U+0020U+0787U+07AB=U+0020uU+0304
  356. U+0020U+0787U+07AB=U+0020u
  357. # THAANA ALIF APPEARING MEDIALLY WITH ANY VOWEL
  358. # IS ROMANIZED WITH APOSTROPHE FOLLOWED BY THE SAME VOWEL
  359. U+0787U+07A7=U+0027aU+0304
  360. U+0787U+07A6=U+0027a
  361. U+0787U+07AD=U+0027eU+0304
  362. U+0787U+07AC=U+0027e
  363. U+0787U+07A9=U+0027iU+0304
  364. U+0787U+07A8=U+0027i
  365. U+0787U+07AF=U+0027oU+0304
  366. U+0787U+07AE=U+0027o
  367. U+0787U+07AB=U+0027uU+0304
  368. U+0787U+07AB=U+0027u
  369. # THAANA MEDIAL OR FINAL VOWELS OVER CONSONANTS EXCEPT ALIF
  370. # THIS PRODUCES NO UPPERCASE UPPERCASE VOWELS
  371. U+07A7=aU+0304
  372. U+07A6=a
  373. U+07AD=eU+0304
  374. U+07AC=e
  375. U+07A9=iU+0304
  376. U+07A8=i
  377. U+07AF=oU+0304
  378. U+07AE=o
  379. U+07AB=uU+0304
  380. U+07AB=u
  381. # THAANA LETTER ALIF--ANY REMAINING AFTER CONVERSION MAP TO APOSTROPHE
  382. U+0787=U+0027
  383.