blob: a9515e6992e4a7a881a143159cdf57e485c952fc [file] [log] [blame]
#--------------------------------------------------------------------
# Copyright (c) 2001-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Latin
# :: NFD (NFC) ;
#\u0e00 reserved
#consonants
$chandrabindu=\ue001;
$anusvara=\ue002;
$visarga=\ue003;
#\u0e004 reserved
# w<vowel> represents the stand-alone form
$wa=\ue005;
$waa=\ue006;
$wi=\ue007;
$wii=\ue008;
$wu=\ue009;
$wuu=\ue00a;
$wr=\ue00b;
$wl=\ue00c;
$wce=\ue00d; # LETTER CANDRA E
$wse=\ue00e; # LETTER SHORT E
$we=\ue00f; # \u090f LETTER E
$wai=\ue010;
$wco=\ue011; # LETTER CANDRA O
$wso=\ue012; # LETTER SHORT O
$wo=\ue013; # \u0913 LETTER O
$wau=\ue014;
$ka=\ue015;
$kha=\ue016;
$ga=\ue017;
$gha=\ue018;
$nga=\ue019;
$ca=\ue01a;
$cha=\ue01b;
$ja=\ue01c;
$jha=\ue01d;
$nya=\ue01e;
$tta=\ue01f;
$ttha=\ue020;
$dda=\ue021;
$ddha=\ue022;
$nna=\ue023;
$ta=\ue024;
$tha=\ue025;
$da=\ue026;
$dha=\ue027;
$na=\ue028;
$ena=\ue029; #compatibility
$pa=\ue02a;
$pha=\ue02b;
$ba=\ue02c;
$bha=\ue02d;
$ma=\ue02e;
$ya=\ue02f;
$ra=\ue030;
$rra=\ue031;
$la=\ue032;
$lla=\ue033;
$ela=\ue034; #compatibility
$va=\ue035;
$sha=\ue036;
$ssa=\ue037;
$sa=\ue038;
$ha=\ue039;
#\u093a Reserved
#\u093b Reserved
$nukta=\ue03c;
$avagraha=\ue03d; # SIGN AVAGRAHA
# <vowel> represents the dependent form
$aa=\ue03e;
$i=\ue03f;
$ii=\ue040;
$u=\ue041;
$uu=\ue042;
$rh=\ue043;
$lh=\ue044;
$ce=\ue045; #VOWEL SIGN CANDRA E
$se=\ue046; #VOWEL SIGN SHORT E
$e=\ue047;
$ai=\ue048;
$co=\ue049; # VOWEL SIGN CANDRA O
$so=\ue04a; # VOWEL SIGN SHORT O
$o=\ue04b; # \u094b
$au=\ue04c;
$virama=\ue04d;
# \u094e Reserved
# \u094f Reserved
#\u0950>\ue050; # OM
# \u0951>; # UNMAPPED STRESS SIGN UDATTA
# \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
# \u0953>; # UNMAPPED GRAVE ACCENT
# \u0954>; # UNMAPPED ACUTE ACCENT
$lm = \ue055;# Telugu Length Mark
$ailm=\ue056;# AI Length Mark
$aulm=\ue057;# AU Length Mark
#urdu compatibity forms
$uka=\ue058;
$ukha=\ue059;
$ugha=\ue05a;
$ujha=\ue05b;
$uddha=\ue05c;
$udha=\ue05d;
$ufa=\ue05e;
$uya=\ue05f;
$wrr=\ue060;
$wll=\ue061;
$rrh=\ue062;
$llh=\ue063;
$danda=\ue064;
$doubleDanda=\ue065;
$zero=\ue066; # DIGIT ZERO
$one=\ue067; # DIGIT ONE
$two=\ue068; # DIGIT TWO
$three=\ue069; # DIGIT THREE
$four=\ue06a; # DIGIT FOUR
$five=\ue06b; # DIGIT FIVE
$six=\ue06c; # DIGIT SIX
$seven=\ue06d; # DIGIT SEVEN
$eight=\ue06e; # DIGIT EIGHT
$nine=\ue06f; # DIGIT NINE
# For all other scripts
$ecp0=\ue070;
$ecp1=\ue071;
$ecp2=\ue072;
$ecp3=\ue073;
$ecp4=\ue074;
$ecp5=\ue075;
$ecp6=\ue076;
$ecp7=\ue077;
$ecp8=\ue078;
$ecp9=\ue079;
$ecpA=\ue07a;
$ecpB=\ue07b;
$ecpC=\ue07c;
$ecpD=\ue07d;
$ecpE=\ue07e;
$ecpF=\ue07f;
# \u0970>; # UNMAPPED ABBREVIATION SIGN
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
$depVowelBelow=[\ue041-\ue044];
$endThing=[$danda$doubleDanda \u005c\u005cu0000-\udfff\ue080-\ufffd];
# $x was originally called '&'; $z was '%'
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$consonants=[$ka-$ha $virama];
######################################################################
# convert from Native letters to Latin letters
######################################################################
#transliterations for anusvara
$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;
$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;
$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;
$anusvara} [$ta$tha$da$dha$na] > n ;
$anusvara} [$pa$pha$ba$bha$ma] > m ;
$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;
$anusvara>'-'m\u0307;
# normal consonants
$cha}$x>ch;
$cha>cha;
$ca$virama}$ha>c'';
$ca}$x>c;
$ca>ca;
$jha}$x>jh;
$jha>jha;
$ja$virama}$ha>j'';
$ja}$x>j;
$ja>ja;
#$nya}$x>ny;
#$nya>nya;
$nya }$x>n\u0303 ;
$nya > n\u0303a ;
$ttha}$x>t\u0323h;
$tta$virama}$ha>t\u0323'';
$tta}$x>t\u0323;
$ddha}$x>d\u0323h;
$dda}$x$ha>d\u0323'';
$dda}$x>d\u0323;
$dha}$x>dh;
$da$virama}$ha>d'';
$da$virama}$ddha>d'';
$da$virama}$dda>d'';
$da$virama}$dha>d'';
#$da$virama}$da>dda;
$da}$x>d;
$tha}$x>th;
$ta$virama}$ha>t'';
$ta$virama}$ttha>t'';
$ta$virama}$tta>t'';
$ta$virama}$tha>t'';
$tta>t\u0323a;
$ttha>t\u0323ha;
#$ta$virama}$ta>tta;
$ta}$x>t;
$tha>tha;
$ta>ta;
$dda>d\u0323a;
$dha>dha;
$ddha>d\u0323ha;
$da>da;
$nna}$x>n\u0323 ;
$nna>n\u0323a ;
$na$virama}$ga>n'';
$na$virama}$ya>n'';
$na}$x>n;
$na>na;
$kha}$x>kh;
$kha>kha;
$ka$virama}$ha>k'';
$ka}$x>k;
$ka>ka;
$gha}$x>gh;
$gha>gha;
$ga$virama}$ha>g'';
$ga}$x>g;
$ga>ga;
#ng<$nga}$x;
#nga<$nga;
$nga}$x>n\u0307;
$nga>n\u0307a ;
$pha}$x>ph;
$pha>pha;
$pa$virama}$ha>p'';
$pa}$x>p;
$pa>pa;
$bha}$x>bh;
$bha>bha;
$ba$virama}$ha>b'';
$ba}$x>b;
$ba>ba;
$ma$virama}$ma>m'';
#$ma$virama}$anusvara>m'';
$ma}$x>m;
$ma>ma;
$ya}$x>y;
$ya>ya;
$ra$virama}$ha>r'';
$ra}$x>r;
$ra>ra;
$la$virama}$ha>l'';
$la}$x>l;
$la>la;
$lla$virama}$ha>l\u0323'';
$lla}$x>l\u0323;
$lla>l\u0323a;
$va}$x>v;
$va>va;
$sha}$x>s\u0301;
$ssa}$x>s\u0323;
$sa$virama}$ha>s'';
$sa$virama}$sha>s'';
$sa$virama}$ssa>s'';
$sa$virama}$sa>s'';
$sa}$x>s;
$sha>s\u0301a;
$ssa>s\u0323a;
$sa>sa;
$ha}$x>h;
$ha>ha;
# Urdu compatibility
$uya}$x > y\u0307 ;
$uya > y\u0307a ;
$ela}$x > l\u0331 ;
$ela > l\u0331a ;
$ena}$x > n\u0331 ;
$ena > n\u0331a ;
$uka}$x > q ;
$uka > qa ;
$ukha}$x > k\u0323 ;
$ukha > k\u0323a ;
$ugha}$x > g\u0307 ;
$ugha > g\u0307a ;
$ujha}$x > z ;
$ujha > za ;
$udha}$x > r\u0323h ;
$udha > r\u0323ha;
$uddha}$x> r\u0323 ;
$uddha > r\u0323a ;
$ufa}$x > f\u0323 ;
$ufa > f\u0323a ;
# dependent vowels (should never occur except following consonants)
$aa > a\u0304 ;
$ai > ai ;
$au > au ;
$ii > i\u0304 ;
$i > i ;
$uu > u\u0304 ;
$u > u ;
$rrh > r\u0325\u0304 ;
$rh}$consonants>r\u0325;
$rh > r\u0325a ;
$llh > l\u0325\u0304 ;
$lh > l\u0325 ;
$e > e\u0304 ;
$o > o\u0304 ;
#extra vowels
$ce > e\u0306 ;
$co > o\u0306 ;
$se > e ;
$so > o ;
# independent vowels (when following consonants)
a}$waa > ''a\u0304 ;
$z}$waa > ''a\u0304 ;
a}$wai > ''ai ;
$z}$wai > ''ai ;
a}$wau > ''au ;
$z}$wau > ''au ;
a}$wii > ''i\u0304 ;
$z}$wii > ''i\u0304 ;
a}$wi > ''i ;
$z}$wi > ''i ;
a}$wuu > ''u\u0304 ;
$z}$wuu > ''u\u0304 ;
a}$wu > ''u ;
$z}$wu > ''u ;
$z}$wrr > ''r\u0325\u0304 ;
$z}$wr > ''r\u0325 ;
$z}$wll > ''l\u0325\u0304 ;
$z}$wl > ''l\u0325 ;
$z}$we > ''e\u0304 ;
$z}$wo > ''o\u0304 ;
a}$wa > ''a ;
$z}$wa > ''a ;
#extra vowels
$z}$wce > ''e\u0306 ;
$z}$wco > ''o\u0306 ;
$z}$wse > ''e ;
$z}$wso > ''o ;
# independent vowels (otherwise)
$waa > a\u0304 ;
$wai > ai ;
$wau > au ;
$wii > i\u0304 ;
$wi > i ;
$wuu > u\u0304 ;
$wu > u ;
$wrr > r\u0325\u0304 ;
$wr > r\u0325 ;
$wll > l\u0325\u0304 ;
$wl > l\u0325 ;
$we > e\u0304 ;
$wo > o\u0304 ;
$wa > a ;
#extra vowels
$wce > e\u0306 ;
$wco > o\u0306 ;
$wse > e ;
$wso > o ;
#stress marks
$avagraha > \u0315;
$chandrabindu$anusvara>'-'\u0303;
$chandrabindu > '-'m\u0310;
$visarga>'-'h\u0323;
#numbers
$zero > 0;
$one > 1;
$two > 2;
$three > 3;
$four > 4;
$five > 5;
$six > 6;
$seven > 7;
$eight > 8;
$nine > 9;
# blow away any remaining viramas
$virama>;
# :: NFC;