123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382 |
- # version 1.1.1
- # Original table by David Bucknum
- # Last updated 25 January 2019
- # Modified by WK with testing by Arabic Cat Staff LOC-CAIRO
- # Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin Scripts Conceptually"
- [General]
- Name=Arabic
- Truncation=%
- [RomanToScript]
- FieldsIncluded=100 110 111 130 240 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
- SubfieldsAlwaysExcluded=uvxy0123456789
- OtherSubfieldsExcludedByTag=100/e 110/e 111/j 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
- Subfield6Code=(3
- IncludeFormattingCharactersLcPattern=True
- # RDA boilerplate phrases not transliterated:
- Place of publication not identified=Place of publication not identified
- publisher not identified=publisher not identified
- # Punctuation marks:
- # %=U+066A ; cannot transliterate the truncation character
- *=U+066D
- ,=U+060C
- ;=U+061B
- ?=U+061F
- # Exceptions for specific words
- # Allah
- AllaU+0304h=U+0627U+0644U+0644U+0647
- # Qur'an
- QurU+02BCaU+0304n=U+0642U+0631U+0622U+0646
- # lillah
- lillaU+0304h=U+0644U+0644U+0647
- # billah
- billaU+0304h=U+0628U+0644U+0644U+0647
- # Rahman
- RahU+0323maU+0304n=U+0631U+062DU+0645U+0646
- # Ruwat
- RuwaU+0304t=U+0631U+0648U+0627U+0629
- ruwaU+0304t=U+0631U+0648U+0627U+0629
- # Hadha
- HaU+0304dhaU+0304=U+0647U+0630U+0627
- haU+0304dhaU+0304=U+0647U+0630U+0627
- # Hadhihi
- HaU+0304dhiU+0304hi=U+0647U+0630U+0647
- haU+0304dhiU+0304hi=U+0647U+0630U+0647
- # dhalika
- dhaU+0304lika=U+0630U+0644U+0643
- # Ibn when it appears in the middle of a name sequence
- ibn=U+0628U+0646
- # H[dot below]aya[macron]t
- hU+0323ayau+0304t=U+062DU+064AU+0627U+0629
- HU+0323ayau+0304t=U+062DU+064AU+0627U+0629
- # "sh[dot below] as in "Ishaq"
- %shU+0323%=U+0633U+062D
- # "s[prime]h" combos
- %sU+02B9h%=U+0633U+0647
- # "th[dot below]"
- %thU+0323%=U+062AU+062D
- # dh[dot under]
- %dhU+0323%=U+062FU+062D
- # La-hu
- la-hu=U+0644U+0647
- # Mi'ah
- MiU+02BEah=U+0645U+0627U+0626U+0629
- MiU+02BCah=U+0645U+0627U+0626U+0629
- miU+02BEah=U+0645U+0627U+0626U+0629
- miU+02BCah=U+0645U+0627U+0626U+0629
- # Mi'at
- MiU+02BEat=U+0645U+0627U+0626U+0629
- MiU+02BCat=U+0645U+0627U+0626U+0629
- miU+02BEat=U+0645U+0627U+0626U+0629
- miU+02BCat=U+0645U+0627U+0626U+0629
- # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu will technically use U+06F0-06F9. This needs further discussion with PSD as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
- # Edition statements with Latin number
- al-TU+0323abU+02BBah 1=U+0627U+0644U+0637U+0628U+0639U+0629 1
- al-TU+0323abU+02BBah 2=U+0627U+0644U+0637U+0628U+0639U+0629 2
- al-TU+0323abU+02BBah 3=U+0627U+0644U+0637U+0628U+0639U+0629 3
- al-TU+0323abU+02BBah 4=U+0627U+0644U+0637U+0628U+0639U+0629 4
- al-TU+0323abU+02BBah 5=U+0627U+0644U+0637U+0628U+0639U+0629 5
- al-TU+0323abU+02BBah 6=U+0627U+0644U+0637U+0628U+0639U+0629 6
- al-TU+0323abU+02BBah 7=U+0627U+0644U+0637U+0628U+0639U+0629 7
- al-TU+0323abU+02BBah 8=U+0627U+0644U+0637U+0628U+0639U+0629 8
- al-TU+0323abU+02BBah 9=U+0627U+0644U+0637U+0628U+0639U+0629 9
- # Use Basic Arabic-Indic U+0660-0669
- 0=U+0660
- 1=U+0661
- 2=U+0662
- 3=U+0663
- 4=U+0664
- 5=U+0665
- 6=U+0666
- 7=U+0667
- 8=U+0668
- 9=U+0669
- # Hyphenated prefixes:
- wa-=U+0648
- bi-=U+0628
- al-=U+0627U+0644
- lil-=U+0644U+0644
- li-=U+0644
- laU+0304-=U+0644
- fiU+0304-=U+0641U+064A
- ka-=U+0643
- # Vowels and vowel/consonant combinations
- %ah=U+0629
- %at=U+0629
- #tanwin
- %an=U+0627
- # ayn-alif combo
- %U+02BBaU+0304U+02BE=U+0639U+0627U+0621
- %U+02BBaU+0304U+02BC=U+0639U+0627U+0621
- U+02BBAU+0304=U+0639U+0627
- U+02BBaU+0304=U+0639U+0627
- U+02BBIU+0304=U+0639U+064A
- U+02BBiU+0304=U+0639U+064A
- U+02BBUU+0304=U+0639U+0648
- U+02BBuU+0304=U+0639U+0648
- U+02BBU=U+0639
- U+02BBu=U+0639
- U+02BBA%=U+0639
- #U+02BBa%=U+0639
- # alif and hamzas for all occasions
- # truncation necessary? It seems to work fine with.
- %iU+0304U+02BEah=U+064AU+0626U+0629
- %iU+0304U+02BCah=U+064AU+0626U+0629
- %iU+0304U+02BEat=U+064AU+0626U+0629
- %iU+0304U+02BCat=U+064AU+0626U+0629
- %iU+02BEaU+0304=U+0626U+0627
- %iU+02BCaU+0304=U+0626U+0627
- %iU+02BE=U+0626
- %iU+02BC=U+0626
- aU+0304U+02BEaU+0304=U+0627U+0621U+0627
- aU+0304U+02BCaU+0304=U+0627U+0621U+0627
-
- aU+02BE=U+0623
- aU+02BC=U+0623
- U+02BEi=U+0626
- U+02BCi=U+0626
- U+02BEaU+0304=U+0622
- U+02BCaU+0304=U+0622
- U+02BEa=U+0623
- U+02BCa=U+0623
- yU+02BCah=U+064AU+0626U+0629
- yU+02BEah=U+064AU+0626U+0629
- yU+02BCat=U+064AU+0626U+0629
- yU+02BEat=U+064AU+0626U+0629
- # A
- aU+0304U+02BCiU+0304=U+0627U+0626U+064A
- aU+0304U+02BEiU+0304=U+0627U+0626U+064A
- aU+0304U+02BCi=U+0627U+0626
- aU+0304U+02BEi=U+0627U+0626
- aU+0304U+02BC=U+0627U+0621
- aU+0304U+02BE=U+0627U+0621
- AU+0304%=U+0622
- aU+0304%=U+0622
- AU+0304=U+0627
- aU+0304=U+0627
- # These next two lines were intended to convert to alif-ayn when it is at the beginning of a word, definite or indefinine (i.e. al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
- AU+02BB%=U+0623U+0639
- aU+02BB%=U+0623U+0639
- aU+02BB=U+0639
- AU+0301=U+0649
- aU+0301=U+0649
- ayy=U+064A
- A%=U+0623
- a%=U+0627
- A=U+0623
- a=
- # I - Capital I at beginning of word is usually alif hamzah-below.
- %iU+0304=U+064A
- iU+0304y=U+064A
- iy=U+064A
- IU+0304%=U+0625U+064A
- iU+0304=U+064A
- U+02BBI%=U+0639
- #iU+02BB=U+0625U+0639
- IU+02BE=U+0627U+0626
- IU+02BC=U+0627U+0626
- iU+02BE=U+0626
- iU+02BC=U+0627U+0626
- I%=U+0625
- i%=U+0625
- I=U+0625
- i=
- # U
- uU+0304U+02BE=U+0624
- uU+0304U+02BC=U+0624
- UU+0304w%=U+0623U+0648
- uU+0304w%=U+0623U+0648
- UU+0304%=U+0623U+0648
- uU+0304%=U+0623U+0648
- uU+0304w=U+0648
- uU+0304=U+0648
- uU+02BE=U+0624
- uU+02BC=U+0624
- U%=U+0623
- u%=U+0623
- U=U+0623
- u=
- # Consonants, with tashdid added
- B=U+0628
- bb=U+0628
- b=U+0628
- Th=U+062B
- thth=U+062B
- th=U+062B
- TU+0323=U+0637
- tU+0323tU+0323=U+0637
- tU+0323=U+0637
- T=U+062A
- tt=U+062A
- t=U+062A
- J=U+062C
- jj=U+062C
- j=U+062C
- HU+0323=U+062D
- hU+0323hU+0323=U+062D
- hU+0323=U+062D
- H=U+0647
- hh=U+0647
- h=U+0647
- Kh=U+062E
- khkh=U+062E
- kh=U+062E
- K=U+0643
- kk=U+0643
- k=U+0643
- Dh=U+0630
- dhdh=U+0630
- dh=U+0630
- DU+0323=U+0636
- dU+0323dU+0323=U+0636
- dU+0323=U+0636
- D=U+062F
- dd=U+062F
- d=U+062F
- R=U+0631
- rr=U+0631
- r=U+0631
- ZU+0323=U+0638
- zU+0323zU+0323=U+0638
- zU+0323=U+0638
- Z=U+0632
- zz=U+0632
- z=U+0632
- Sh=U+0634
- shsh=U+0634
- sh=U+0634
- SU+0323=U+0635
- sU+0323sU+0323=U+0635
- sU+0323=U+0635
- S=U+0633
- ss=U+0633
- s=U+0633
- Gh=U+063A
- ghgh=U+063A
- gh=U+063A
- F=U+0641
- ff=U+0641
- f=U+0641
- Q=U+0642
- qq=U+0642
- q=U+0642
- L=U+0644
- ll=U+0644
- l=U+0644
- M=U+0645
- mm=U+0645
- m=U+0645
- N=U+0646
- nn=U+0646
- n=U+0646
- W=U+0648
- ww=U+0648
- w=U+0648
- Y=U+064A
- yy=U+064A
- y=U+064A
- # non-Arabic consonants:
- P=U+067E
- p=U+067E
- Ch=U+0686
- ch=U+0686
- V=U+06A4
- v=U+06A4
- G=U+06AF
- g=U+06AF
- # Diacritic characters:
- # ain (U+0639) - not transliterated alone:
- U+02BB=U+0639
- # hamza - not romanized
- # =U+0621
- # hamza (alone in final position)
- %U+02BE=U+0621
- %U+02BC=U+0621
- # Do not know what, if anything, is needed here:
- # tatweel:
- # =U+0640
- # fathatan:
- # =U+064B
- # dammatan:
- # =U+064C
- # kasratan:
- # =U+064D
- # fatha:
- # =U+064E
- # damma:
- # =U+064F
- # kasra:
- # =U+0650
- # shadda:
- # =U+0651
- # sukun:
- # =U+0652
- # superscript alef:
- # =U+0670
- # alef wasla
- # =U+0671
- [ScriptToRoman]
|