123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464 |
- # Original table by Randall K. Barry, 23 February 2021
- [General]
- Name=Thaana
- # Script used by the Divehi language of the Maldives
- # UNICODE THAANA CHARACTER RANGE: U+0780-U+07BF
- Truncation=%
- [RomanToScript]
- FieldsIncluded=100 110 111 130 240 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
- SubfieldsAlwaysExcluded=uvxy0123456789
- OtherSubfieldsExcludedByTag=100/e 110/e 111/j 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
- Subfield6Code=(4
- # RDA boilerplate phrases not transliterated:
- Place of publication not identified=Place of publication not identified
- publisher not identified=publisher not identified
- date of publication not identified=date of publication not identified
- At head of title=At head of title
- Colophon=Colophon
- # COMMON COMBINING CHARACTERS (always follow a base letter)
- # combining acute U+0301
- # combining tilde U+0303
- # combining macron U+0304
- # combining dot above U+0307
- # combining dot below U+0323
- # combining diaeresis below U+0324
- # combining low line U+0332
- # combining breve below U+032E
- # ayn (spacing) U+02BB
- # apostrophe (spacing) U+0027
- # REGULAR LATIN ALPHABETIC CHARACTERS TO BE CONVERTED
- # THAANA LETTER ALIF APPEARING MEDIALLY WITH ANY VOWEL IS ROMANIZED WITH APOSTROPHE
- # ORIGINAL VOWEL ASSOCIATED WITH THE ALIF CANNOT BE REGENERATED DURING CONVERSION
- U+0027=U+0787
- # THAANA LETTER SHAVIYANI WITH SUKUN(BREVE 032E)
- HU+032E=U+0781U+07B0
- hh=U+0787U+07B0U+0780
- hU+032E=U+0781U+07B0
- # THAANA LETTER ALIFU FINAL WITH SUKUN (LOW LINE 0332)
- %HU+0332=U+0787U+07B0
- %hU+0332=U+0787U+07B0
- H=U+0780
- h=U+0780
- SU+0301=U+0781
- ss=U+0787U+07B0U+0781
- sU+0301=U+0781
- # THAANA LETTER MEDIAL NOONU WITHOUT SUKUN (DOT ABOVE 0307)
- MU+0307=U+0782
- mU+0307=U+0782
- mm=U+0787U+07B0U+0789
- N=U+0782
- nn=U+0787U+07B0U+0782
- n=U+0782
- R=U+0783
- rr=U+0787U+07B0U+0783
- r=U+0783
- B=U+0784
- bb=U+0787U+07B0U+0784
- b=U+0784
- # THAANA LETTER "L/l" WITH DOT BELOW (0323)
- LU+0323=U+0785
- lU+0323=U+0785
- K=U+0786
- kk=U+0787U+07B0U+0786
- k=U+0786
- # THAANA LETTER ALIF--OFTEN OMITTED IN ROMANIZATION (SEE VOWEL CONVERSION BELOW)
- U+0027=U+0787
- V=U+0788
- vv=U+0787U+07B0U+0788
- v=U+0788
- M=U+0789
- m=U+0789
- F=U+078A
- ff=U+0787U+07B0U+078A
- f=U+078A
- # THAANA LETTER "D/d" WITH DOT BELOW (0323)
- DU+0323=U+0791
- dU+0323=U+0791
- D=U+078B
- dd=U+0787U+07B0U+078B
- d=U+078B
- # THAANA LETTER "T/t" WITH DOT BELOW (0323)
- TU+0323=U+0793
- tU+0323=U+0793
- TU+0324T=U+078CU+07B0U+078C
- TU+0324t=U+078CU+07B0U+078C
- tU+0324t=U+078CU+07B0U+078C
- tU+0324T=U+078CU+07B0U+078C
- T=U+078C
- tt=U+0787U+07B0U+078C
- t=U+078C
- L=U+078D
- ll=U+0787U+07B0U+078D
- l=U+078D
- G=U+078E
- gg=U+0787U+07B0U+078E
- g=U+078E
- # THAANA LETTER "N/n' WITH TILDE (0303)
- NU+0303=U+078F
- nU+0303=U+078F
- S=U+0790
- ss=U+0787U+07B0U+0790
- s=U+0790
- Z=U+0792
- zz=U+0787U+07B0U+0792
- z=U+0792
- Y=U+0794
- yy=U+0787U+07B0U+0794
- y=U+0794
- P=U+0795
- p=U+0795
- pp=U+0787U+07B0U+0795
- J=U+0796
- jj=U+0787U+07B0U+0796
- j=U+0796
- C=U+0797
- cc=U+0787U+07B0U+0797
- c=U+0797
- # THAANA EXTENSION FOR ARABIC LOAN WORDS AND NAMES
- # THAANA EXTENSION FOR ARABIC LETTER TTAA
- TH=U+0798
- Th=U+0798
- thth=U+0787U+07B0U+0798
- th=U+0798
- # THAANA EXTENSION FOR ARABIC LETTER HHAA
- HU+0323=U+0799
- hU+0323=U+0799
- # THAANA EXTENSION FOR ARABIC LETTER KHAA
- KH=U+079A
- Kh=U+079A
- khkh=U+0787U+07B0U+079A
- kh=U+079A
- # THAANA EXTENSION FOR ARABIC LETTER THAALU
- DH=U+079B
- Dh=U+079B
- dhdh=U+0787U+07B0U+079B
- dh=U+079B
- # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
- ZU+0332=U+079C
- zU+0332=U+079C
- # THAANA EXTENSION FOR ARABIC LETTER SHEENU
- SH=U+079D
- Sh=U+079D
- shsh=U+0787U+07B0U+079D
- sh=U+079D
- # THAANA EXTENSION FOR ARABIC LETTER SAADHU
- SU+0323=U+079E
- sU+0323=U+079E
- # THAANA EXTENSION FOR ARABIC LETTER TO
- TU+0324=U+07A0
- tU+0324=U+07A0
- # THAANA EXTENSION FOR ARABIC LETTER ZO
- DU+0332=U+07A1
- dU+0332=U+07A1
- # THAANA EXTENSION FOR ARABIC LETTER AINU
- U+02BB=U+07A2
- # THAANA EXTENSION FOR ARABIC LETTER GHAINU
- GH=U+07A3
- Gh=U+07A3
- ghgh=U+0787U+07B0U+07A3
- gh=U+07A3
- # THAANA EXTENSION FOR ARABIC LETTER QAAFU
- Q=U+07A4
- qq=U+0787U+07B0U+07A4
- q=U+07A4
- # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
- W=U+07A5
- ww=U+0787U+07B0U+07A5
- w=U+07A5
- # INITIAL (AND UPPERCASE) VOWELS THAT CONVERT
- # TO ALIF FOLLOWED BY VOWEL (ALIF OMITTED IN ROMANIZATION)
- AU+0304%=U+0787U+07A7
- A%=U+0787U+07A6
- U+0020aU+0304=U+0020U+0787U+07A7
- U+0020a=U+0020U+0787U+07A6
- EU+0304%=U+0787U+07AD
- E%=U+0787U+07AC
- U+0020eU+0304=U+0020U+0787U+07AD
- U+0020e=U+0020U+0787U+07AC
- IU+0304%=U+0787U+07A9
- I%=U+0787U+07A8
- U+0020iU+0304=U+0020U+0787U+07A9
- U+0020i=U+0020U+0787U+07A8
- OU+0304%=U+0787U+07AF
- O%=U+0787U+07AE
- U+0020oU+0304=U+0020U+0787U+07AF
- U+0020o=U+0020U+0787U+07AE
- UU+0304%=U+0787U+07AB
- U%=U+0787U+07AB
- U+0020uU+0304=U+0020U+0787U+07AB
- U+0020u=U+0020U+0787U+07AB
- # THAANA MEDIAL OR FINAL VOWELS OVER ANY CONSONANT
- # (THIS ASSUMES NO UPPERCASE VOWELS REMAIN)
- aU+0304=U+07A7
- a=U+07A6
- eU+0304=U+07AD
- e=U+07AC
- iU+0304=U+07A9
- i=U+07A8
- oU+0304=U+07AF
- o=U+07AE
- uU+0304=U+07AB
- u=U+07AB
- # THAANA SUKUN (SILENCE) MARK; ONLY GENERATED IN OTHER COMBINATIONS
- # =U+07B0
- [ScriptToRoman]
- FieldsIncluded=100 110 111 130 240 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
- SubfieldsAlwaysExcluded=uvxy0123456789
- OtherSubfieldsExcludedByTag=100/e 110/e 111/j 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
- # RDA boilerplate phrases not transliterated:
- Place of publication not identified=Place of publication not identified
- publisher not identified=publisher not identified
- date of publication not identified=date of publication not identified
- At head of title=At head of title
- # THAANA LETTER NOONU WITHOUT SUKUN FOLLOWED BY A CONSONANT
- # IS ROMANIZED AS "m"+ DOT ABOVE (0307) THEN THE CONSONANT
- # OTHERWISE LETTER NOONU MAPS TO "n"
- U+0782U+0780=mU+0307U+0780
- U+0782U+0781=mU+0307U+0781
- U+0782U+0783=mU+0307U+0783
- U+0782U+0784=mU+0307U+0784
- U+0782U+0785=mU+0307U+0785
- U+0782U+0786=mU+0307U+0786
- U+0782U+0788=mU+0307U+0788
- U+0782U+0789=mU+0307U+0789
- U+0782U+078A=mU+0307U+078A
- U+0782U+0791=mU+0307U+0791
- U+0782U+078B=mU+0307U+078B
- U+0782U+078C=mU+0307U+078C
- U+0782U+078D=mU+0307U+078D
- U+0782U+078E=mU+0307U+078E
- U+0782U+078F=mU+0307U+078F
- U+0782U+0790=mU+0307U+0790
- U+0782U+0792=mU+0307U+0792
- U+0782U+0794=mU+0307U+0794
- U+0782U+0795=mU+0307U+0795
- U+0782U+0796=mU+0307U+0796
- U+0782U+0797=mU+0307U+0797
- U+0782U+0798=mU+0307U+0798
- U+0782U+0799=mU+0307U+0799
- U+0782U+079A=mU+0307U+079A
- U+0782U+079B=mU+0307U+079B
- U+0782U+079C=mU+0307U+079C
- U+0782U+079D=mU+0307U+079D
- U+0782U+079E=mU+0307U+079E
- U+0782U+07A0=mU+0307U+07A0
- U+0782U+07A1=mU+0307U+07A1
- U+0782U+07A2=mU+0307U+07A2
- U+0782U+07A3=mU+0307U+07A3
- U+0782U+07A4=mU+0307U+07A4
- U+0782U+07A5=mU+0307U+07A5
- # THAANA FINAL ALIFU WITH SUKUN (SILENCE) MARK
- # IS ROMANIZED WITH "h"+LOW LINE (0332)
- U+0787U+07B0U+0020=hU+0332U+0020
- # THAANA SHAVIYANI WITH SUKUN (SILENCE) MARK
- # IS ROMANIZED WITH "h"+BREVE BELOW
- U+0781U+07B0=hU+032E
- U+0787U+07B0U+0780=hh
- U+0780=h
- # THAANA ALIF WITH SUKUN AND SHAVIYANI
- U+0787U+07B0U+0781=sU+0301sU+0301
- U+0787U+07B0=hU+0332
- U+0781=sU+0301
- U+0787U+07B0U+0782=nn
- U+0782=n
- U+0787U+07B0U+0783=rr
- U+0783=r
- U+0787U+07B0U+0784=bb
- U+0784=b
- U+0787U+07B0U+0785=lU+0323
- U+0785=lU+0323
- U+0787U+07B0U+0786=kk
- U+0786=k
- U+0787U+07B0U+0788=vv
- U+0788=v
- U+0787U+07B0U+0789=mm
- U+0789=m
- U+0787U+07B0U+078A=ff
- U+078A=f
- # THAANA LETTER "D/d" WITH DOT BELOW (0323)
- U+0787U+07B0U+0791=dU+0323dU+0323
- U+0791=dU+0323
- U+0787U+07B0U+078B=dd
- U+078B=d
- # THAANA LETTER "T/t" WITH DOT BELOW (0323)
- U+078CU+07B0U+078C=tU+0324t
- U+0787U+07B0U+078C=tt
- U+0793=tU+0323
- U+078C=t
- U+0787U+07B0U+078D=ll
- U+078D=l
- U+0787U+07B0U+078E=gg
- U+078E=g
- # THAANA LETTER "N/n' WITH TILDE (0303)
- U+0787U+07B0U+078F=nU+0303nU+0303
- U+078F=nU+0303
- U+0787U+07B0U+0790=ss
- U+0790=s
- U+0787U+07B0U+0792=zz
- U+0792=z
- U+0787U+07B0U+0794=yy
- U+0794=y
- U+0787U+07B0U+0795=pp
- U+0795=p
- U+0787U+07B0U+0796=jj
- U+0796=j
- U+0787U+07B0U+0797=cc
- U+0797=c
- # THAANA EXTENSION FOR ARABIC LETTER TTAA
- U+0787U+07B0U+0798=thth
- U+0798=th
- # THAANA EXTENSION FOR ARABIC LETTER HHAA
- U+0787U+07B0U+0799=hU+0323hU+0323
- U+0799=hU+0323
- # THAANA EXTENSION FOR ARABIC LETTER KHAA
- U+0787U+07B0U+079A=khkh
- U+079A=kh
- # THAANA EXTENSION FOR ARABIC LETTER THAALU
- U+0787U+07B0U+079B=dhdh
- U+079B=dh
- # THAANA EXTENSION FOR ARABIC LETTER ZAA (NEWER LETTER)
- U+0787U+07B0U+079C=zU+0332zU+0332
- U+079C=zU+0332
- # THAANA EXTENSION FOR ARABIC LETTER SHEENU
- U+0787U+07B0U+079D=shsh
- U+079D=sh
- # THAANA EXTENSION FOR ARABIC LETTER SAADHU
- U+0787U=07B0U+079E=sU+0323sU+0323
- U+079E=sU+0323
- # THAANA EXTENSION FOR ARABIC LETTER TO
- U+0787U=07B0U+07A0=tU+0324tU+0324
- U+07A0=tU+0324
- # THAANA EXTENSION FOR ARABIC LETTER ZO
- U+0787U=07B0U+07A1=dU+0332dU+0332
- U+07A1=dU+0332
- # THAANA EXTENSION FOR ARABIC LETTER AINU
- U+0787U=07B0U+07A2=U+02BBU+02BB
- U+07A2=U+02BB
- # THAANA EXTENSION FOR ARABIC LETTER GHAINU
- U+0787U+07B0U+07A3=ghgh
- U+07A3=gh
- # THAANA EXTENSION FOR ARABIC LETTER QAAFU
- U+0787U+07B0U+07A4=qq
- U+07A4=q
- # THAANA EXTENSION FOR ARABIC LETTER WAAVU (NEWER LETTER)
- U+0787U+07B0U+07A5=ww
- U+07A5=w
- # INITIAL VOWELS FOLLOWING ALIF (ALIF OMITTED IN ROMANIZATION)
- U+0020U+0787U+07A7=U+0020aU+0304
- U+0020U+0787U+07A6=U+0020a
- U+0020U+0787U+07AD=U+0020eU+0304
- U+0020U+0787U+07AC=U+0020e
- U+0020U+0787U+07A9=U+0020iU+0304
- U+0020U+0787U+07A8=U+0020i
- U+0020U+0787U+07AF=U+0020oU+0304
- U+0020U+0787U+07AE=U+0020o
- U+0020U+0787U+07AB=U+0020uU+0304
- U+0020U+0787U+07AB=U+0020u
- # THAANA ALIF APPEARING MEDIALLY WITH ANY VOWEL
- # IS ROMANIZED WITH APOSTROPHE FOLLOWED BY THE SAME VOWEL
- U+0787U+07A7=U+0027aU+0304
- U+0787U+07A6=U+0027a
- U+0787U+07AD=U+0027eU+0304
- U+0787U+07AC=U+0027e
- U+0787U+07A9=U+0027iU+0304
- U+0787U+07A8=U+0027i
- U+0787U+07AF=U+0027oU+0304
- U+0787U+07AE=U+0027o
- U+0787U+07AB=U+0027uU+0304
- U+0787U+07AB=U+0027u
- # THAANA MEDIAL OR FINAL VOWELS OVER CONSONANTS EXCEPT ALIF
- # THIS PRODUCES NO UPPERCASE UPPERCASE VOWELS
- U+07A7=aU+0304
- U+07A6=a
- U+07AD=eU+0304
- U+07AC=e
- U+07A9=iU+0304
- U+07A8=i
- U+07AF=oU+0304
- U+07AE=o
- U+07AB=uU+0304
- U+07AB=u
- # THAANA LETTER ALIF--ANY REMAINING AFTER CONVERSION MAP TO APOSTROPHE
- U+0787=U+0027
|