blob: ff09b5d604874cb4a5ab3883d3e2e37d0ec2157b [file] [log] [blame]
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
#
# File: Latin_InterIndic.txt
# Generated from CLDR
#
# Latin-InterIndic
#:: NFD;
#\u0E00 reserved
#consonants
$chandrabindu=\uE001;
$anusvara=\uE002;
$visarga=\uE003;
#\u0E004 reserved
# w←vowel→ represents the stand-alone form
$wa=\uE005;
$waa=\uE006;
$wi=\uE007;
$wii=\uE008;
$wu=\uE009;
$wuu=\uE00A;
$wr=\uE00B;
$wl=\uE00C;
$wce=\uE00D; # LETTER CANDRA E
$wse=\uE00E; # LETTER SHORT E
$we=\uE00F; # ए LETTER E
$wai=\uE010;
$wco=\uE011; # LETTER CANDRA O
$wso=\uE012; # LETTER SHORT O
$wo=\uE013; # ओ LETTER O
$wau=\uE014;
$ka=\uE015;
$kha=\uE016;
$ga=\uE017;
$gha=\uE018;
$nga=\uE019;
$ca=\uE01A;
$cha=\uE01B;
$ja=\uE01C;
$jha=\uE01D;
$nya=\uE01E;
$tta=\uE01F;
$ttha=\uE020;
$dda=\uE021;
$ddha=\uE022;
$nna=\uE023;
$ta=\uE024;
$tha=\uE025;
$da=\uE026;
$dha=\uE027;
$na=\uE028;
$ena=\uE029; #compatibility
$pa=\uE02A;
$pha=\uE02B;
$ba=\uE02C;
$bha=\uE02D;
$ma=\uE02E;
$ya=\uE02F;
$ra=\uE030;
$rra=\uE031;
$la=\uE032;
$lla=\uE033;
$ela=\uE034; #compatibility
$va=\uE035;
$vva=\uE081;
$sha=\uE036;
$ssa=\uE037;
$sa=\uE038;
$ha=\uE039;
#\u093A Reserved
#\u093B Reserved
$nukta=\uE03C;
$avagraha=\uE03D; # SIGN AVAGRAHA
# ←vowel→ represents the dependent form
$aa=\uE03E;
$i=\uE03F;
$ii=\uE040;
$u=\uE041;
$uu=\uE042;
$rh=\uE043;
$rrh=\uE044;
$ce=\uE045; #VOWEL SIGN CANDRA E
$se=\uE046; #VOWEL SIGN SHORT E
$e=\uE047;
$ai=\uE048;
$co=\uE049; # VOWEL SIGN CANDRA O
$so=\uE04A; # VOWEL SIGN SHORT O
$o=\uE04B; # ो
$au=\uE04C;
$virama=\uE04D;
# \u094E Reserved
# \u094F Reserved
$om = \uE050; # OM
# \u0951→; # UNMAPPED STRESS SIGN UDATTA
# \u0952→; # UNMAPPED STRESS SIGN ANUDATTA
# \u0953→; # UNMAPPED GRAVE ACCENT
# \u0954→; # UNMAPPED ACUTE ACCENT
$lm = \uE055;# Telugu Length Mark
$ailm=\uE056;# AI Length Mark
$aulm=\uE057;# AU Length Mark
#urdu compatibity forms
$uka=\uE058;
$ukha=\uE059;
$ugha=\uE05A;
$ujha=\uE05B;
$uddha=\uE05C;
$udha=\uE05D;
$ufa=\uE05E;
$uya=\uE05F;
$wrr=\uE060;
$wll=\uE061;
$lh=\uE062;
$llh=\uE063;
$danda=\uE064;
$doubleDanda=\uE065;
$zero=\uE066; # DIGIT ZERO
$one=\uE067; # DIGIT ONE
$two=\uE068; # DIGIT TWO
$three=\uE069; # DIGIT THREE
$four=\uE06A; # DIGIT FOUR
$five=\uE06B; # DIGIT FIVE
$six=\uE06C; # DIGIT SIX
$seven=\uE06D; # DIGIT SEVEN
$eight=\uE06E; # DIGIT EIGHT
$nine=\uE06F; # DIGIT NINE
$dgs=\uE082;
# For all other scripts
$ecp0=\uE070;
$ecp1=\uE071;
$ecp2=\uE072;
$ecp3=\uE073;
$ecp4=\uE074;
$ecp5=\uE075;
$ecp6=\uE076;
$ecp7=\uE077;
$ecp8=\uE078;
$ecp9=\uE079;
$ecpA=\uE07A;
$ecpB=\uE07B;
$ecpC=\uE07C;
$ecpD=\uE07D;
$ecpE=\uE07E;
$ecpF=\uE07F;
# Khanda-ta
$kta=\uE083;
# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
$depVowelBelow=[\uE041-\uE044];
$endThing=[$danda$doubleDanda];
# $x was originally called '§'; $z was '%'
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
\u0315 → $avagraha;
\u0303→$chandrabindu$anusvara;
m\u0310→$chandrabindu;
h\u0323→$visarga;
x→$ka$virama$sa;
# convert to independent forms at start of word or syllable:
# dependent forms for roundtrip
\u0314a\u0304→$aa;
\u0314ai→$ai;
\u0314au→$au;
\u0314ii→$ii;
\u0314i\u0304→$ii;
\u0314i→$i;
\u0314u\u0304→$uu;
\u0314u→$u;
\u0314r\u0325\u0304→$rrh;
\u0314r\u0325→$rh;
\u0314l\u0325\u0304→$llh;
\u0314lh→$lh;
\u0314l\u0325→$lh;
\u0314e\u0304→$e;
\u0314o\u0304→$o;
\u0314a→;
\u0314e\u0306→$ce;
\u0314o\u0306→$co;
\u0314e→$se;
\u0314o→$so;
# preceeded by consonants
$consonants{ a\u0304→$aa;
$consonants{ ai→$ai;
$consonants{ au→$au;
$consonants{ ii→$ii;
$consonants{ i\u0304→$ii;
$consonants{ i→$i;
$consonants{ u\u0304→$uu;
$consonants{ u→$u;
$consonants{ r\u0325\u0304→$rrh;
$consonants{ r\u0325a→$rh;
$consonants{ r\u0325→$rh;
$consonants{ l\u0325\u0304→$llh;
$consonants{ lh→$lh;
$consonants{ l\u0325→$lh;
$consonants{ e\u0304→$e;
$consonants{ o\u0304→$o;
$consonants{ e\u0306→$ce;
$consonants{ o\u0306→$co;
$consonants{ e→$se;
$consonants{ o→$so;
# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
a\u0304→$waa;
ai→$wai;
au→$wau;
i\u0304→$wii;
i→$wi;
u\u0304→$wuu;
u→$wu;
r\u0325\u0304→$wrr;
r\u0325→$wr;
l\u0325\u0304→$wll;
lh→$wl;
l\u0325→$wl;
e\u0304→$we;
o\u0304→$wo;
a→$wa;
e\u0306→$wce;
o\u0306→$wco;
e→$wse;
''om→$om;
o→$wso;
# rules for anusvara
n}r\u0325 → $na|$virama;
n}l\u0325 → $na|$virama;
n}na → $na|$virama;
n\u0307}[kg] → $anusvara;
n\u0307}n\u0307 → $anusvara;
n\u0304}[cj] → $anusvara;
n\u0304}n\u0303 → $anusvara;
n\u0323}[tdn]\u0323 → $anusvara;
n}[tdn] → $anusvara;
m}[pbm] → $anusvara;
n}[ylvshr] → $anusvara;
m\u0307 → $anusvara;
#urdu compatibility
q→$uka|$virama;
k\u0331h\u0331→$ukha |$virama;
g\u0307→ $ugha | $virama;
z → $ujha |$virama;
f → $ufa|$virama;
t\u0331→$kta;
# dev
y\u0307→$uya|$virama;
l\u0331→$ela|$virama;
n\u0331→$ena|$virama;
n\u0307→$nga|$virama;
n\u0303→$nya|$virama;
n\u0323→$nna|$virama;
t\u0323h→$ttha|$virama;
t\u0323→$tta|$virama;
r\u0323h→$udha|$virama;
r\u0323→$uddha|$virama;
d\u0323h→$ddha|$virama;
d\u0323→$dda|$virama;
kh→$kha|$virama;
k→$ka|$virama;
gh→$gha|$virama;
g→$ga|$virama;
ch→$cha|$virama;
c→$ca|$virama;
jh→$jha|$virama;
j→$ja|$virama;
ny→$nya|$virama;
tth→$ttha|$virama;
ddh→$ddha|$virama;
th→$tha|$virama;
t→$ta|$virama;
dh→$dha|$virama;
d→$da|$virama;
n→$na|$virama;
ph→$pha|$virama;
p→$pa|$virama;
bh→$bha|$virama;
b→$ba|$virama;
m→$ma|$virama;
y→$ya|$virama;
r\u0331→$rra|$virama;
r→$ra|$virama;
l\u0323→$lla|$virama;
l→$la|$virama;
v→$va|$virama;
w\u0307→$vva|$virama;
w→$va|$virama;
sh→$sha|$virama;
ss→$ssa|$virama;
s\u0323→$ssa|$virama;
s\u0301→$sha|$virama;
s→$sa|$virama;
h→$ha|$virama;
'.'→$danda;
$danda'.'→$doubleDanda;
$depVowelAbove{'~'→$anusvara;
$depVowelBelow{'~'→$chandrabindu;
# convert to dependent forms after consonant with no vowel:
# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
#$virama aa→$aa;
$virama a\u0304→$aa;
$virama ai→$ai;
$virama au→$au;
$virama ii→$ii;
$virama i\u0304→$ii;
$virama i→$i;
#$virama uu→$uu;
$virama u\u0304→$uu;
$virama u→$u;
#$virama rrh→$rrh;
$virama r\u0325\u0304→$rrh;
#$virama rh→$rh;
$virama r\u0325a→$rh;
$virama r\u0325→$rh;
$virama l\u0325\u0304→$llh;
$virama lh→$lh;
$virama l\u0325→$lh;
$virama e\u0304→$e;
$virama o\u0304→$o;
$virama a→;
$virama e\u0306→$ce;
$virama o\u0306→$co;
$virama e→$se;
$virama o→$so;
# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
#$virama''aa→$waa;
$virama''a\u0304→$waa;
$virama''ai→$wai;
$virama''au→$wau;
#$virama''ii→$wii;
$virama''i\u0304→$wii;
$virama''i→$wi;
#$virama''uu→$wuu;
$virama''u\u0304→$wuu;
$virama''u→$wu;
#$virama''rrh→$wrr;
$virama''r\u0325\u0304→$wrr;
#$virama''rh→$wr;
$virama''r\u0325→$wr;
$virama''l\u0325\u0304→$wll;
#$virama''lh→$wl;
$virama''l\u0325→$wl;
$virama''e\u0304→$we;
$virama''o\u0304→$wo;
$virama''a→$wa;
$virama''e\u0306→$wce;
$virama''o\u0306→$wco;
$virama''e→$wse;
$virama''o→$wso;
# no virama
''a\u0304→$waa;
''ai→$wai;
''au→$wau;
''i\u0304→$wii;
''i→$wi;
''u\u0304→$wuu;
''u→$wu;
''r\u0325\u0304→$wrr;
''r\u0325→$wr;
''l\u0325\u0304→$wll;
''l\u0325→$wl;
''e\u0304→$we;
''o\u0304→$wo;
''a→$wa;
''e\u0306→$wce;
''o\u0306→$wco;
''e→$wse;
''o→$wso;
$virama } [$z] → $virama;
$virama } ' ' → $virama ;
$virama}$endThing→;
ʔ→$dgs; # Glottal Stop
0→$zero;
1→$one;
2→$two;
3→$three;
4→$four;
5→$five;
6→$six;
7→$seven;
8→$eight;
9→$nine;
''→;
#:: NFC (NFD) ;