# version 1.1.1
# Original table by David Bucknum
# Last updated 25 January 2019
# Modified by WK with testing by Arabic Cat Staff LOC-CAIRO 
# Additional info from R. Vassie, [n.d.] "Marrying the Arabic and Latin Scripts Conceptually" 

[General]
Name=Arabic
Truncation=%

[RomanToScript]
FieldsIncluded=100 110 111 130 240 245 246 250 260 264 440 490 600 610 611 630 651 700 710 711 730 740 800 830
SubfieldsAlwaysExcluded=uvxy0123456789
OtherSubfieldsExcludedByTag=100/e 110/e 111/j 246/i 260/c 264/c 650/a 700/e 700/i 710/e 710/i 711/i 711/j 730/i
Subfield6Code=(3
IncludeFormattingCharactersLcPattern=True

# RDA boilerplate phrases not transliterated:
Place of publication not identified=Place of publication not identified
publisher not identified=publisher not identified

# Punctuation marks:
# %=U+066A ; cannot transliterate the truncation character
*=U+066D
,=U+060C
;=U+061B
?=U+061F

# Exceptions for specific words 
# Allah
AllaU+0304h=U+0627U+0644U+0644U+0647

# Qur'an
QurU+02BCaU+0304n=U+0642U+0631U+0622U+0646

# lillah
lillaU+0304h=U+0644U+0644U+0647

# billah
billaU+0304h=U+0628U+0644U+0644U+0647

# Rahman
RahU+0323maU+0304n=U+0631U+062DU+0645U+0646

# Ruwat
RuwaU+0304t=U+0631U+0648U+0627U+0629
ruwaU+0304t=U+0631U+0648U+0627U+0629

# Hadha
HaU+0304dhaU+0304=U+0647U+0630U+0627
haU+0304dhaU+0304=U+0647U+0630U+0627

# Hadhihi
HaU+0304dhiU+0304hi=U+0647U+0630U+0647
haU+0304dhiU+0304hi=U+0647U+0630U+0647

# dhalika
dhaU+0304lika=U+0630U+0644U+0643

# Ibn when it appears in the middle of a name sequence
ibn=U+0628U+0646

# H[dot below]aya[macron]t
hU+0323ayau+0304t=U+062DU+064AU+0627U+0629
HU+0323ayau+0304t=U+062DU+064AU+0627U+0629

# "sh[dot below] as in "Ishaq"

%shU+0323%=U+0633U+062D

# "s[prime]h" combos

%sU+02B9h%=U+0633U+0647

# "th[dot below]"

%thU+0323%=U+062AU+062D

# dh[dot under] 

%dhU+0323%=U+062FU+062D

# La-hu

la-hu=U+0644U+0647

# Mi'ah
MiU+02BEah=U+0645U+0627U+0626U+0629
MiU+02BCah=U+0645U+0627U+0626U+0629
miU+02BEah=U+0645U+0627U+0626U+0629
miU+02BCah=U+0645U+0627U+0626U+0629

# Mi'at
MiU+02BEat=U+0645U+0627U+0626U+0629
MiU+02BCat=U+0645U+0627U+0626U+0629
miU+02BEat=U+0645U+0627U+0626U+0629
miU+02BCat=U+0645U+0627U+0626U+0629

# Numbers (I have set these to Hindi numbers. Note that Persian and Urdu will technically use U+06F0-06F9. This needs further discussion with PSD as RLIN21 used Hindi numbers, Connexion and Voyager does not.)

# Edition statements with Latin number
al-TU+0323abU+02BBah 1=U+0627U+0644U+0637U+0628U+0639U+0629 1
al-TU+0323abU+02BBah 2=U+0627U+0644U+0637U+0628U+0639U+0629 2
al-TU+0323abU+02BBah 3=U+0627U+0644U+0637U+0628U+0639U+0629 3
al-TU+0323abU+02BBah 4=U+0627U+0644U+0637U+0628U+0639U+0629 4
al-TU+0323abU+02BBah 5=U+0627U+0644U+0637U+0628U+0639U+0629 5
al-TU+0323abU+02BBah 6=U+0627U+0644U+0637U+0628U+0639U+0629 6
al-TU+0323abU+02BBah 7=U+0627U+0644U+0637U+0628U+0639U+0629 7
al-TU+0323abU+02BBah 8=U+0627U+0644U+0637U+0628U+0639U+0629 8
al-TU+0323abU+02BBah 9=U+0627U+0644U+0637U+0628U+0639U+0629 9

# Use Basic Arabic-Indic U+0660-0669
0=U+0660
1=U+0661
2=U+0662
3=U+0663
4=U+0664
5=U+0665
6=U+0666
7=U+0667
8=U+0668
9=U+0669

# Hyphenated prefixes:
wa-=U+0648
bi-=U+0628
al-=U+0627U+0644
lil-=U+0644U+0644
li-=U+0644
laU+0304-=U+0644
fiU+0304-=U+0641U+064A
ka-=U+0643

# Vowels and vowel/consonant combinations
%ah=U+0629
%at=U+0629

#tanwin
%an=U+0627

# ayn-alif combo
%U+02BBaU+0304U+02BE=U+0639U+0627U+0621
%U+02BBaU+0304U+02BC=U+0639U+0627U+0621

U+02BBAU+0304=U+0639U+0627
U+02BBaU+0304=U+0639U+0627

U+02BBIU+0304=U+0639U+064A
U+02BBiU+0304=U+0639U+064A

U+02BBUU+0304=U+0639U+0648
U+02BBuU+0304=U+0639U+0648
U+02BBU=U+0639
U+02BBu=U+0639

U+02BBA%=U+0639
#U+02BBa%=U+0639

# alif and hamzas for all occasions

# truncation necessary? It seems to work fine with. 

%iU+0304U+02BEah=U+064AU+0626U+0629
%iU+0304U+02BCah=U+064AU+0626U+0629

%iU+0304U+02BEat=U+064AU+0626U+0629
%iU+0304U+02BCat=U+064AU+0626U+0629

%iU+02BEaU+0304=U+0626U+0627
%iU+02BCaU+0304=U+0626U+0627

%iU+02BE=U+0626
%iU+02BC=U+0626
aU+0304U+02BEaU+0304=U+0627U+0621U+0627
aU+0304U+02BCaU+0304=U+0627U+0621U+0627
 
aU+02BE=U+0623
aU+02BC=U+0623
U+02BEi=U+0626
U+02BCi=U+0626
U+02BEaU+0304=U+0622
U+02BCaU+0304=U+0622
U+02BEa=U+0623
U+02BCa=U+0623

yU+02BCah=U+064AU+0626U+0629
yU+02BEah=U+064AU+0626U+0629

yU+02BCat=U+064AU+0626U+0629
yU+02BEat=U+064AU+0626U+0629

# A

aU+0304U+02BCiU+0304=U+0627U+0626U+064A
aU+0304U+02BEiU+0304=U+0627U+0626U+064A

aU+0304U+02BCi=U+0627U+0626
aU+0304U+02BEi=U+0627U+0626
aU+0304U+02BC=U+0627U+0621
aU+0304U+02BE=U+0627U+0621
AU+0304%=U+0622
aU+0304%=U+0622
AU+0304=U+0627
aU+0304=U+0627

# These next two lines were intended to convert to alif-ayn when it is at the beginning of a word, definite or indefinine (i.e. al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l" 
AU+02BB%=U+0623U+0639
aU+02BB%=U+0623U+0639
aU+02BB=U+0639
AU+0301=U+0649
aU+0301=U+0649

ayy=U+064A
A%=U+0623
a%=U+0627
A=U+0623
a=

# I - Capital I at beginning of word is usually alif hamzah-below.

%iU+0304=U+064A
iU+0304y=U+064A
iy=U+064A
IU+0304%=U+0625U+064A
iU+0304=U+064A
U+02BBI%=U+0639

#iU+02BB=U+0625U+0639

IU+02BE=U+0627U+0626
IU+02BC=U+0627U+0626
iU+02BE=U+0626
iU+02BC=U+0627U+0626

I%=U+0625
i%=U+0625
I=U+0625
i=

# U 

uU+0304U+02BE=U+0624
uU+0304U+02BC=U+0624
UU+0304w%=U+0623U+0648
uU+0304w%=U+0623U+0648
UU+0304%=U+0623U+0648
uU+0304%=U+0623U+0648
uU+0304w=U+0648
uU+0304=U+0648
uU+02BE=U+0624
uU+02BC=U+0624

U%=U+0623
u%=U+0623
U=U+0623
u=

# Consonants, with tashdid added 

B=U+0628
bb=U+0628
b=U+0628
Th=U+062B
thth=U+062B
th=U+062B
TU+0323=U+0637
tU+0323tU+0323=U+0637
tU+0323=U+0637
T=U+062A
tt=U+062A
t=U+062A
J=U+062C
jj=U+062C
j=U+062C
HU+0323=U+062D
hU+0323hU+0323=U+062D
hU+0323=U+062D
H=U+0647
hh=U+0647
h=U+0647
Kh=U+062E
khkh=U+062E
kh=U+062E
K=U+0643
kk=U+0643
k=U+0643
Dh=U+0630
dhdh=U+0630
dh=U+0630
DU+0323=U+0636
dU+0323dU+0323=U+0636
dU+0323=U+0636
D=U+062F
dd=U+062F
d=U+062F
R=U+0631
rr=U+0631
r=U+0631
ZU+0323=U+0638
zU+0323zU+0323=U+0638
zU+0323=U+0638
Z=U+0632
zz=U+0632
z=U+0632
Sh=U+0634
shsh=U+0634
sh=U+0634
SU+0323=U+0635
sU+0323sU+0323=U+0635
sU+0323=U+0635
S=U+0633
ss=U+0633
s=U+0633
Gh=U+063A
ghgh=U+063A
gh=U+063A
F=U+0641
ff=U+0641
f=U+0641
Q=U+0642
qq=U+0642
q=U+0642
L=U+0644
ll=U+0644
l=U+0644
M=U+0645
mm=U+0645
m=U+0645
N=U+0646
nn=U+0646
n=U+0646
W=U+0648
ww=U+0648
w=U+0648
Y=U+064A
yy=U+064A
y=U+064A

# non-Arabic consonants:
P=U+067E
p=U+067E
Ch=U+0686
ch=U+0686
V=U+06A4
v=U+06A4
G=U+06AF
g=U+06AF

# Diacritic characters:
# ain (U+0639) - not transliterated alone:
U+02BB=U+0639
# hamza - not romanized
# =U+0621
# hamza (alone in final position)
%U+02BE=U+0621
%U+02BC=U+0621

# Do not know what, if anything, is needed here:
# tatweel:
# =U+0640
# fathatan:
# =U+064B
# dammatan:
# =U+064C
# kasratan:
# =U+064D
# fatha:
# =U+064E
# damma:
# =U+064F
# kasra:
# =U+0650
# shadda:
# =U+0651
# sukun:
# =U+0652
# superscript alef:
# =U+0670
# alef wasla
# =U+0671

[ScriptToRoman]