DivehiThaanaRomanization.cfg 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. # Original table by Randall K. Barry, 28 February 2021
  2. # Updated by Randall K. Barry, 11 May 2023 to add script code
  3. [General]
  4. Name=Divehi (Thaana script)
  5. ScriptCode=Thaa
  6. # Script used by the Divehi language of the Maldives
  7. # UNICODE THAANA CHARACTER RANGE: U+0780-U+07BF
  8. # ISO 15924 4-alpha script code: Thaa
  9. Truncation=%
  10. [RomanToScript]
  11. FieldsIncluded=100 110 111 130 240 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
  12. SubfieldsAlwaysExcluded=uvxy0123456789
  13. OtherSubfieldsExcludedByTag=100/e 110/e 111/j 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
  14. # RDA boilerplate phrases not transliterated:
  15. Place of publication not identified=Place of publication not identified
  16. publisher not identified=publisher not identified
  17. date of publication not identified=date of publication not identified
  18. At head of title=At head of title
  19. Colophon=Colophon
  20. and others=and others
  21. and one other=and one other
  22. and two others=and two others
  23. and three others=and three others
  24. and four others=and four others
  25. and five others=and five others
  26. and six others=and six others
  27. and seven others=and seven others
  28. and eight others=and eight others
  29. and nine others=and nine others
  30. and ten others=and ten others
  31. and eleven others=and eleven others
  32. and twelve others=and twelve others
  33. and thirteen others=and thirteen others
  34. and fourteen others=and fourteen others
  35. and fifteen others=and fifteen others
  36. and sixteen others=and sixteen others
  37. and seventeen others=and seventeen others
  38. and eighteen others=and eighteen others
  39. and nineteen others=and nineteen others
  40. et al.=et al.
  41. i.e.=i.e.
  42. S.l.=S.l.
  43. s.n.=s.n.
  44. XLIX=XLIX
  45. XLIV=XLIV
  46. XLIII=XLIII
  47. XLII=XLII
  48. XLI=XLI
  49. XLVIII-XLVIII
  50. XLVII=XLVII
  51. XLVI=XLVI
  52. XLV=XLV
  53. XL=XL
  54. LXXXIX=LXXXIX
  55. LXXXIV=LXXXIV
  56. LXXXIII=LXXXIII
  57. LXXXII=LXXXII
  58. LXXXI=LXXXI
  59. LXXXVIII=LXXXVIII
  60. LXXXVII=LXXXVII
  61. LXXXVI=LXXXVI
  62. LXXXV=LXXXV
  63. LXXX=LXXX
  64. LXXIX=LXXIX
  65. LXXIV=LXXIV
  66. LXXIII=LXXIII
  67. LXXII=LXXII
  68. LXXI=LXXI
  69. LXXVIII=LXXVIII
  70. LXXVII=LXXVII
  71. LXXVI=LXXVI
  72. LXXV=LXXV
  73. LXX=LXX
  74. LXIX=XIX
  75. LXIV=LXIV
  76. LXIII=LXIII
  77. LXII=LXII
  78. LXI=LXI
  79. LXVIII=LXVIII
  80. LXVII=LXVII
  81. LXVI=LXVI
  82. LXV=LXV
  83. LX=LX
  84. LIX=LIX
  85. LIV=LIV
  86. LIII=LIII
  87. LII=LII
  88. LI=LI
  89. LVIII=LVIII
  90. LVII=LVII
  91. LVI=LVI
  92. LV=LV
  93. XXXIX=XXXIX
  94. XXXIV=XXXIV
  95. XXXIII=XXXIII
  96. XXXII=XXXII
  97. XXXI=XXXI
  98. XXXVIII=XXXVIII
  99. XXXVII=XXXVII
  100. XXXVI=XXXVI
  101. XXXV=XXXV
  102. XXX=XXX
  103. XXIX=XXIX
  104. XXIV=XXIV
  105. XXIII=XXIII
  106. XXII=XXII
  107. XXI=XXI
  108. XXVIII=XXVIII
  109. XXVII=XXVII
  110. XXVI=XXVI
  111. XXV=XXV
  112. XX=XX
  113. XIX=XIX
  114. XIV=XIV
  115. XIII=XIII
  116. XII=XII
  117. XI=XI
  118. XVIII=XVIII
  119. XVII=XVII
  120. XVI=XVI
  121. XV=XV
  122. IV=IV
  123. VIII=VIII
  124. VII=VII
  125. VI=VI
  126. IX=IX
  127. III=III
  128. II=II
  129. # COMMON COMBINING CHARACTERS (always follow a base letter)
  130. # combining acute U+0301
  131. # combining tilde U+0303
  132. # combining macron U+0304
  133. # combining dot above U+0307
  134. # combining dot below U+0323
  135. # combining diaeresis below U+0324
  136. # combining low line U+0332
  137. # combining breve below U+032E
  138. # ayn (spacing) U+02BB
  139. # apostrophe (spacing) U+0027
  140. # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
  141. # THAANA LETTER ALIF APPEARING MEDIALLY WITH ANY VOWEL IS ROMANIZED WITH APOSTROPHE
  142. # ORIGINAL VOWEL ASSOCIATED WITH THE ALIF CANNOT BE REGENERATED DURING CONVERSION
  143. U+0027=U+0787
  144. # THAANA LETTER SHAVIYANI WITH SUKUN(BREVE 032E)
  145. HU+032E=U+0781U+07B0
  146. hh=U+0787U+07B0U+0780
  147. hU+032E=U+0781U+07B0
  148. # THAANA LETTER ALIFU FINAL WITH SUKUN (LOW LINE 0332)
  149. %HU+0332=U+0787U+07B0
  150. %hU+0332=U+0787U+07B0
  151. H=U+0780
  152. h=U+0780
  153. SU+0301=U+0781
  154. ss=U+0787U+07B0U+0781
  155. sU+0301=U+0781
  156. # THAANA LETTER MEDIAL NOONU WITHOUT SUKUN (DOT ABOVE 0307)
  157. MU+0307=U+0782
  158. mU+0307=U+0782
  159. mm=U+0787U+07B0U+0789
  160. N=U+0782
  161. nn=U+0787U+07B0U+0782
  162. n=U+0782
  163. R=U+0783
  164. rr=U+0787U+07B0U+0783
  165. r=U+0783
  166. B=U+0784
  167. bb=U+0787U+07B0U+0784
  168. b=U+0784
  169. # THAANA LETTER "L/l" WITH DOT BELOW (0323)
  170. LU+0323=U+0785
  171. lU+0323=U+0785
  172. K=U+0786
  173. kk=U+0787U+07B0U+0786
  174. k=U+0786
  175. # THAANA LETTER ALIF--OFTEN OMITTED IN ROMANIZATION (SEE VOWEL CONVERSION BELOW)
  176. U+0027=U+0787
  177. V=U+0788
  178. vv=U+0787U+07B0U+0788
  179. v=U+0788
  180. M=U+0789
  181. m=U+0789
  182. F=U+078A
  183. ff=U+0787U+07B0U+078A
  184. f=U+078A
  185. # THAANA LETTER "D/d" WITH DOT BELOW (0323)
  186. DU+0323=U+0791
  187. dU+0323=U+0791
  188. D=U+078B
  189. dd=U+0787U+07B0U+078B
  190. d=U+078B
  191. # THAANA LETTER "T/t" WITH DOT BELOW (0323)
  192. TU+0323=U+0793
  193. tU+0323=U+0793
  194. TU+0324T=U+078CU+07B0U+078C
  195. TU+0324t=U+078CU+07B0U+078C
  196. tU+0324t=U+078CU+07B0U+078C
  197. tU+0324T=U+078CU+07B0U+078C
  198. T=U+078C
  199. tt=U+0787U+07B0U+078C
  200. t=U+078C
  201. L=U+078D
  202. ll=U+0787U+07B0U+078D
  203. l=U+078D
  204. G=U+078E
  205. gg=U+0787U+07B0U+078E
  206. g=U+078E
  207. # THAANA LETTER "N/n' WITH TILDE (0303)
  208. NU+0303=U+078F
  209. nU+0303=U+078F
  210. S=U+0790
  211. ss=U+0787U+07B0U+0790
  212. s=U+0790
  213. Z=U+0792
  214. zz=U+0787U+07B0U+0792
  215. z=U+0792
  216. Y=U+0794
  217. yy=U+0787U+07B0U+0794
  218. y=U+0794
  219. P=U+0795
  220. p=U+0795
  221. pp=U+0787U+07B0U+0795
  222. J=U+0796
  223. jj=U+0787U+07B0U+0796
  224. j=U+0796
  225. C=U+0797
  226. cc=U+0787U+07B0U+0797
  227. c=U+0797
  228. # THAANA EXTENSION FOR ARABIC LOAN WORDS AND NAMES
  229. # THAANA EXTENSION FOR ARABIC LETTER TTAA
  230. TH=U+0798
  231. Th=U+0798
  232. thth=U+0787U+07B0U+0798
  233. th=U+0798
  234. # THAANA EXTENSION FOR ARABIC LETTER HHAA
  235. HU+0323=U+0799
  236. hU+0323=U+0799
  237. # THAANA EXTENSION FOR ARABIC LETTER KHAA
  238. KH=U+079A
  239. Kh=U+079A
  240. khkh=U+0787U+07B0U+079A
  241. kh=U+079A
  242. # THAANA EXTENSION FOR ARABIC LETTER THAALU
  243. DH=U+079B
  244. Dh=U+079B
  245. dhdh=U+0787U+07B0U+079B
  246. dh=U+079B
  247. # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
  248. ZU+0332=U+079C
  249. zU+0332=U+079C
  250. # THAANA EXTENSION FOR ARABIC LETTER SHEENU
  251. SH=U+079D
  252. Sh=U+079D
  253. shsh=U+0787U+07B0U+079D
  254. sh=U+079D
  255. # THAANA EXTENSION FOR ARABIC LETTER SAADHU
  256. SU+0323=U+079E
  257. sU+0323=U+079E
  258. # THAANA EXTENSION FOR ARABIC LETTER TO
  259. TU+0324=U+07A0
  260. tU+0324=U+07A0
  261. # THAANA EXTENSION FOR ARABIC LETTER ZO
  262. DU+0332=U+07A1
  263. dU+0332=U+07A1
  264. # THAANA EXTENSION FOR ARABIC LETTER AINU
  265. U+02BB=U+07A2
  266. # THAANA EXTENSION FOR ARABIC LETTER GHAINU
  267. GH=U+07A3
  268. Gh=U+07A3
  269. ghgh=U+0787U+07B0U+07A3
  270. gh=U+07A3
  271. # THAANA EXTENSION FOR ARABIC LETTER QAAFU
  272. Q=U+07A4
  273. qq=U+0787U+07B0U+07A4
  274. q=U+07A4
  275. # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
  276. W=U+07A5
  277. ww=U+0787U+07B0U+07A5
  278. w=U+07A5
  279. # INITIAL (AND UPPERCASE) VOWELS THAT CONVERT
  280. # TO ALIF FOLLOWED BY VOWEL (ALIF OMITTED IN ROMANIZATION)
  281. AU+0304%=U+0787U+07A7
  282. A%=U+0787U+07A6
  283. U+0020aU+0304=U+0020U+0787U+07A7
  284. U+0020a=U+0020U+0787U+07A6
  285. EU+0304%=U+0787U+07AD
  286. E%=U+0787U+07AC
  287. U+0020eU+0304=U+0020U+0787U+07AD
  288. U+0020e=U+0020U+0787U+07AC
  289. IU+0304%=U+0787U+07A9
  290. I%=U+0787U+07A8
  291. U+0020iU+0304=U+0020U+0787U+07A9
  292. U+0020i=U+0020U+0787U+07A8
  293. OU+0304%=U+0787U+07AF
  294. O%=U+0787U+07AE
  295. U+0020oU+0304=U+0020U+0787U+07AF
  296. U+0020o=U+0020U+0787U+07AE
  297. UU+0304%=U+0787U+07AB
  298. U%=U+0787U+07AB
  299. U+0020uU+0304=U+0020U+0787U+07AB
  300. U+0020u=U+0020U+0787U+07AB
  301. # THAANA MEDIAL OR FINAL VOWELS OVER ANY CONSONANT
  302. # (THIS ASSUMES NO UPPERCASE VOWELS REMAIN)
  303. aU+0304=U+07A7
  304. a=U+07A6
  305. eU+0304=U+07AD
  306. e=U+07AC
  307. iU+0304=U+07A9
  308. i=U+07A8
  309. oU+0304=U+07AF
  310. o=U+07AE
  311. uU+0304=U+07AB
  312. u=U+07AB
  313. # THAANA SUKUN (SILENCE) MARK; ONLY GENERATED IN OTHER COMBINATIONS
  314. # =U+07B0
  315. [ScriptToRoman]
  316. FieldsIncluded=100 110 111 130 240 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
  317. SubfieldsAlwaysExcluded=uvxy0123456789
  318. OtherSubfieldsExcludedByTag=100/e 110/e 111/j 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
  319. # RDA boilerplate phrases not transliterated:
  320. Place of publication not identified=Place of publication not identified
  321. publisher not identified=publisher not identified
  322. date of publication not identified=date of publication not identified
  323. At head of title=At head of title
  324. # THAANA LETTER NOONU WITHOUT SUKUN FOLLOWED BY A CONSONANT
  325. # IS ROMANIZED AS "m"+ DOT ABOVE (0307) THEN THE CONSONANT
  326. # OTHERWISE LETTER NOONU MAPS TO "n"
  327. U+0782U+0780=mU+0307U+0780
  328. U+0782U+0781=mU+0307U+0781
  329. U+0782U+0783=mU+0307U+0783
  330. U+0782U+0784=mU+0307U+0784
  331. U+0782U+0785=mU+0307U+0785
  332. U+0782U+0786=mU+0307U+0786
  333. U+0782U+0788=mU+0307U+0788
  334. U+0782U+0789=mU+0307U+0789
  335. U+0782U+078A=mU+0307U+078A
  336. U+0782U+0791=mU+0307U+0791
  337. U+0782U+078B=mU+0307U+078B
  338. U+0782U+078C=mU+0307U+078C
  339. U+0782U+078D=mU+0307U+078D
  340. U+0782U+078E=mU+0307U+078E
  341. U+0782U+078F=mU+0307U+078F
  342. U+0782U+0790=mU+0307U+0790
  343. U+0782U+0792=mU+0307U+0792
  344. U+0782U+0794=mU+0307U+0794
  345. U+0782U+0795=mU+0307U+0795
  346. U+0782U+0796=mU+0307U+0796
  347. U+0782U+0797=mU+0307U+0797
  348. U+0782U+0798=mU+0307U+0798
  349. U+0782U+0799=mU+0307U+0799
  350. U+0782U+079A=mU+0307U+079A
  351. U+0782U+079B=mU+0307U+079B
  352. U+0782U+079C=mU+0307U+079C
  353. U+0782U+079D=mU+0307U+079D
  354. U+0782U+079E=mU+0307U+079E
  355. U+0782U+07A0=mU+0307U+07A0
  356. U+0782U+07A1=mU+0307U+07A1
  357. U+0782U+07A2=mU+0307U+07A2
  358. U+0782U+07A3=mU+0307U+07A3
  359. U+0782U+07A4=mU+0307U+07A4
  360. U+0782U+07A5=mU+0307U+07A5
  361. # THAANA FINAL ALIFU WITH SUKUN (SILENCE) MARK
  362. # IS ROMANIZED WITH "h"+LOW LINE (0332)
  363. U+0787U+07B0U+0020=hU+0332U+0020
  364. # THAANA SHAVIYANI WITH SUKUN (SILENCE) MARK
  365. # IS ROMANIZED WITH "h"+BREVE BELOW
  366. U+0781U+07B0=hU+032E
  367. U+0787U+07B0U+0780=hh
  368. U+0780=h
  369. # THAANA ALIF WITH SUKUN AND SHAVIYANI
  370. U+0787U+07B0U+0781=sU+0301sU+0301
  371. U+0787U+07B0=hU+0332
  372. U+0781=sU+0301
  373. U+0787U+07B0U+0782=nn
  374. U+0782=n
  375. U+0787U+07B0U+0783=rr
  376. U+0783=r
  377. U+0787U+07B0U+0784=bb
  378. U+0784=b
  379. U+0787U+07B0U+0785=lU+0323
  380. U+0785=lU+0323
  381. U+0787U+07B0U+0786=kk
  382. U+0786=k
  383. U+0787U+07B0U+0788=vv
  384. U+0788=v
  385. U+0787U+07B0U+0789=mm
  386. U+0789=m
  387. U+0787U+07B0U+078A=ff
  388. U+078A=f
  389. # THAANA LETTER "D/d" WITH DOT BELOW (0323)
  390. U+0787U+07B0U+0791=dU+0323dU+0323
  391. U+0791=dU+0323
  392. U+0787U+07B0U+078B=dd
  393. U+078B=d
  394. # THAANA LETTER "T/t" WITH DOT BELOW (0323)
  395. U+078CU+07B0U+078C=tU+0324t
  396. U+0787U+07B0U+078C=tt
  397. U+0793=tU+0323
  398. U+078C=t
  399. U+0787U+07B0U+078D=ll
  400. U+078D=l
  401. U+0787U+07B0U+078E=gg
  402. U+078E=g
  403. # THAANA LETTER "N/n' WITH TILDE (0303)
  404. U+0787U+07B0U+078F=nU+0303nU+0303
  405. U+078F=nU+0303
  406. U+0787U+07B0U+0790=ss
  407. U+0790=s
  408. U+0787U+07B0U+0792=zz
  409. U+0792=z
  410. U+0787U+07B0U+0794=yy
  411. U+0794=y
  412. U+0787U+07B0U+0795=pp
  413. U+0795=p
  414. U+0787U+07B0U+0796=jj
  415. U+0796=j
  416. U+0787U+07B0U+0797=cc
  417. U+0797=c
  418. # THAANA EXTENSION FOR ARABIC LETTER TTAA
  419. U+0787U+07B0U+0798=thth
  420. U+0798=th
  421. # THAANA EXTENSION FOR ARABIC LETTER HHAA
  422. U+0787U+07B0U+0799=hU+0323hU+0323
  423. U+0799=hU+0323
  424. # THAANA EXTENSION FOR ARABIC LETTER KHAA
  425. U+0787U+07B0U+079A=khkh
  426. U+079A=kh
  427. # THAANA EXTENSION FOR ARABIC LETTER THAALU
  428. U+0787U+07B0U+079B=dhdh
  429. U+079B=dh
  430. # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
  431. U+0787U+07B0U+079C=zU+0332zU+0332
  432. U+079C=zU+0332
  433. # THAANA EXTENSION FOR ARABIC LETTER SHEENU
  434. U+0787U+07B0U+079D=shsh
  435. U+079D=sh
  436. # THAANA EXTENSION FOR ARABIC LETTER SAADHU
  437. U+0787U=07B0U+079E=sU+0323sU+0323
  438. U+079E=sU+0323
  439. # THAANA EXTENSION FOR ARABIC LETTER TO
  440. U+0787U=07B0U+07A0=tU+0324tU+0324
  441. U+07A0=tU+0324
  442. # THAANA EXTENSION FOR ARABIC LETTER ZO
  443. U+0787U=07B0U+07A1=dU+0332dU+0332
  444. U+07A1=dU+0332
  445. # THAANA EXTENSION FOR ARABIC LETTER AINU
  446. U+0787U=07B0U+07A2=U+02BBU+02BB
  447. U+07A2=U+02BB
  448. # THAANA EXTENSION FOR ARABIC LETTER GHAINU
  449. U+0787U+07B0U+07A3=ghgh
  450. U+07A3=gh
  451. # THAANA EXTENSION FOR ARABIC LETTER QAAFU
  452. U+0787U+07B0U+07A4=qq
  453. U+07A4=q
  454. # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
  455. U+0787U+07B0U+07A5=ww
  456. U+07A5=w
  457. # INITIAL VOWELS FOLLOWING ALIF (ALIF OMITTED IN ROMANIZATION)
  458. U+0020U+0787U+07A7=U+0020aU+0304
  459. U+0020U+0787U+07A6=U+0020a
  460. U+0020U+0787U+07AD=U+0020eU+0304
  461. U+0020U+0787U+07AC=U+0020e
  462. U+0020U+0787U+07A9=U+0020iU+0304
  463. U+0020U+0787U+07A8=U+0020i
  464. U+0020U+0787U+07AF=U+0020oU+0304
  465. U+0020U+0787U+07AE=U+0020o
  466. U+0020U+0787U+07AB=U+0020uU+0304
  467. U+0020U+0787U+07AB=U+0020u
  468. # THAANA ALIF APPEARING MEDIALLY WITH ANY VOWEL
  469. # IS ROMANIZED WITH APOSTROPHE FOLLOWED BY THE SAME VOWEL
  470. U+0787U+07A7=U+0027aU+0304
  471. U+0787U+07A6=U+0027a
  472. U+0787U+07AD=U+0027eU+0304
  473. U+0787U+07AC=U+0027e
  474. U+0787U+07A9=U+0027iU+0304
  475. U+0787U+07A8=U+0027i
  476. U+0787U+07AF=U+0027oU+0304
  477. U+0787U+07AE=U+0027o
  478. U+0787U+07AB=U+0027uU+0304
  479. U+0787U+07AB=U+0027u
  480. # THAANA MEDIAL OR FINAL VOWELS OVER CONSONANTS EXCEPT ALIF
  481. # THIS PRODUCES NO UPPERCASE UPPERCASE VOWELS
  482. U+07A7=aU+0304
  483. U+07A6=a
  484. U+07AD=eU+0304
  485. U+07AC=e
  486. U+07A9=iU+0304
  487. U+07A8=i
  488. U+07AF=oU+0304
  489. U+07AE=o
  490. U+07AB=uU+0304
  491. U+07AB=u
  492. # THAANA LETTER ALIF--ANY REMAINING AFTER CONVERSION MAP TO APOSTROPHE
  493. U+0787=U+0027