| #-------------------------------------------------------------------- |
| # Copyright (c) 1999-2004, International Business Machines |
| # Corporation and others. All Rights Reserved. |
| #-------------------------------------------------------------------- |
| |
| # Latin-InterIndic |
| #:: NFD; |
| #\u0e00 reserved |
| #consonants |
| $chandrabindu=\ue001; |
| $anusvara=\ue002; |
| $visarga=\ue003; |
| #\u0e004 reserved |
| # w<vowel> represents the stand-alone form |
| $wa=\ue005; |
| $waa=\ue006; |
| $wi=\ue007; |
| $wii=\ue008; |
| $wu=\ue009; |
| $wuu=\ue00a; |
| $wr=\ue00b; |
| $wl=\ue00c; |
| $wce=\ue00d; # LETTER CANDRA E |
| $wse=\ue00e; # LETTER SHORT E |
| $we=\ue00f; # \u090f LETTER E |
| $wai=\ue010; |
| $wco=\ue011; # LETTER CANDRA O |
| $wso=\ue012; # LETTER SHORT O |
| $wo=\ue013; # \u0913 LETTER O |
| $wau=\ue014; |
| $ka=\ue015; |
| $kha=\ue016; |
| $ga=\ue017; |
| $gha=\ue018; |
| $nga=\ue019; |
| $ca=\ue01a; |
| $cha=\ue01b; |
| $ja=\ue01c; |
| $jha=\ue01d; |
| $nya=\ue01e; |
| $tta=\ue01f; |
| $ttha=\ue020; |
| $dda=\ue021; |
| $ddha=\ue022; |
| $nna=\ue023; |
| $ta=\ue024; |
| $tha=\ue025; |
| $da=\ue026; |
| $dha=\ue027; |
| $na=\ue028; |
| $ena=\ue029; #compatibility |
| $pa=\ue02a; |
| $pha=\ue02b; |
| $ba=\ue02c; |
| $bha=\ue02d; |
| $ma=\ue02e; |
| $ya=\ue02f; |
| $ra=\ue030; |
| $rra=\ue031; |
| $la=\ue032; |
| $lla=\ue033; |
| $ela=\ue034; #compatibility |
| $va=\ue035; |
| $vva=\ue081; |
| $sha=\ue036; |
| $ssa=\ue037; |
| $sa=\ue038; |
| $ha=\ue039; |
| #\u093a Reserved |
| #\u093b Reserved |
| $nukta=\ue03c; |
| $avagraha=\ue03d; # SIGN AVAGRAHA |
| # <vowel> represents the dependent form |
| $aa=\ue03e; |
| $i=\ue03f; |
| $ii=\ue040; |
| $u=\ue041; |
| $uu=\ue042; |
| $rh=\ue043; |
| $lh=\ue044; |
| $ce=\ue045; #VOWEL SIGN CANDRA E |
| $se=\ue046; #VOWEL SIGN SHORT E |
| $e=\ue047; |
| $ai=\ue048; |
| $co=\ue049; # VOWEL SIGN CANDRA O |
| $so=\ue04a; # VOWEL SIGN SHORT O |
| $o=\ue04b; # \u094b |
| $au=\ue04c; |
| $virama=\ue04d; |
| # \u094e Reserved |
| # \u094f Reserved |
| $om = \ue050; # OM |
| # \u0951>; # UNMAPPED STRESS SIGN UDATTA |
| # \u0952>; # UNMAPPED STRESS SIGN ANUDATTA |
| # \u0953>; # UNMAPPED GRAVE ACCENT |
| # \u0954>; # UNMAPPED ACUTE ACCENT |
| $lm = \ue055;# Telugu Length Mark |
| $ailm=\ue056;# AI Length Mark |
| $aulm=\ue057;# AU Length Mark |
| #urdu compatibity forms |
| $uka=\ue058; |
| $ukha=\ue059; |
| $ugha=\ue05a; |
| $ujha=\ue05b; |
| $uddha=\ue05c; |
| $udha=\ue05d; |
| $ufa=\ue05e; |
| $uya=\ue05f; |
| $wrr=\ue060; |
| $wll=\ue061; |
| $rrh=\ue062; |
| $llh=\ue063; |
| $danda=\ue064; |
| $doubleDanda=\ue065; |
| $zero=\ue066; # DIGIT ZERO |
| $one=\ue067; # DIGIT ONE |
| $two=\ue068; # DIGIT TWO |
| $three=\ue069; # DIGIT THREE |
| $four=\ue06a; # DIGIT FOUR |
| $five=\ue06b; # DIGIT FIVE |
| $six=\ue06c; # DIGIT SIX |
| $seven=\ue06d; # DIGIT SEVEN |
| $eight=\ue06e; # DIGIT EIGHT |
| $nine=\ue06f; # DIGIT NINE |
| # For all other scripts |
| $ecp0=\ue070; |
| $ecp1=\ue071; |
| $ecp2=\ue072; |
| $ecp3=\ue073; |
| $ecp4=\ue074; |
| $ecp5=\ue075; |
| $ecp6=\ue076; |
| $ecp7=\ue077; |
| $ecp8=\ue078; |
| $ecp9=\ue079; |
| $ecpA=\ue07a; |
| $ecpB=\ue07b; |
| $ecpC=\ue07c; |
| $ecpD=\ue07d; |
| $ecpE=\ue07e; |
| $ecpF=\ue07f; |
| # \u0970>; # UNMAPPED ABBREVIATION SIGN |
| $depVowelAbove=[\ue03e-\ue040\ue045-\ue04c]; |
| $depVowelBelow=[\ue041-\ue044]; |
| $endThing=[$danda$doubleDanda]; |
| # $x was originally called '&'; $z was '%' |
| $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co]; |
| $z=[bcdfghjklmnpqrstvwxyz]; |
| $consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]]; |
| \u0315 > $avagraha; |
| \u0303>$chandrabindu$anusvara; |
| m\u0310>$chandrabindu; |
| h\u0323>$visarga; |
| x>$ka$virama$sa; |
| # convert to independent forms at start of word or syllable: |
| # dependent forms for roundtrip |
| \u0314a\u0304>$aa; |
| \u0314ai>$ai; |
| \u0314au>$au; |
| \u0314ii>$ii; |
| \u0314i\u0304>$ii; |
| \u0314i>$i; |
| \u0314u\u0304>$uu; |
| \u0314u>$u; |
| \u0314r\u0325\u0304>$rrh; |
| \u0314r\u0325>$rh; |
| \u0314l\u0325\u0304>$llh; |
| \u0314lh>$lh; |
| \u0314l\u0325>$lh; |
| \u0314e\u0304>$e; |
| \u0314o\u0304>$o; |
| \u0314a>; |
| \u0314e\u0306>$ce; |
| \u0314o\u0306>$co; |
| \u0314e>$se; |
| \u0314o>$so; |
| |
| # preceeded by consonants |
| $consonants{ a\u0304>$aa; |
| $consonants{ ai>$ai; |
| $consonants{ au>$au; |
| $consonants{ ii>$ii; |
| $consonants{ i\u0304>$ii; |
| $consonants{ i>$i; |
| $consonants{ u\u0304>$uu; |
| $consonants{ u>$u; |
| $consonants{ r\u0325\u0304>$rrh; |
| $consonants{ r\u0325a>$rh; |
| $consonants{ r\u0325>$rh; |
| $consonants{ l\u0325\u0304>$llh; |
| $consonants{ lh>$lh; |
| $consonants{ l\u0325>$lh; |
| $consonants{ e\u0304>$e; |
| $consonants{ o\u0304>$o; |
| $consonants{ e\u0306>$ce; |
| $consonants{ o\u0306>$co; |
| $consonants{ e>$se; |
| $consonants{ o>$so; |
| |
| # e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai}) |
| a\u0304>$waa; |
| ai>$wai; |
| au>$wau; |
| i\u0304>$wii; |
| i>$wi; |
| u\u0304>$wuu; |
| u>$wu; |
| r\u0325\u0304>$wrr; |
| r\u0325>$wr; |
| l\u0325\u0304>$wll; |
| lh>$wl; |
| l\u0325>$wl; |
| e\u0304>$we; |
| o\u0304>$wo; |
| a>$wa; |
| e\u0306>$wce; |
| o\u0306>$wco; |
| e>$wse; |
| ''om>$om; |
| o>$wso; |
| |
| # rules for anusvara |
| n}r\u0325 > $na|$virama; |
| n}l\u0325 > $na|$virama; |
| n}na > $na|$virama; |
| n\u0307}[kg] > $anusvara; |
| n\u0307}n\u0307 > $anusvara; |
| n\u0304}[cj] > $anusvara; |
| n\u0304}n\u0303 > $anusvara; |
| n\u0323}[tdn]\u0323 > $anusvara; |
| n}[tdn] > $anusvara; |
| m}[pbm] > $anusvara; |
| n}[ylvshr] > $anusvara; |
| m\u0307 > $anusvara; |
| |
| #urdu compatibility |
| q>$uka|$virama; |
| k\u0331h\u0331>$ukha |$virama; |
| g\u0307> $ugha | $virama; |
| z > $ujha |$virama; |
| f > $ufa|$virama; |
| |
| # dev |
| y\u0307>$uya|$virama; |
| l\u0331>$ela|$virama; |
| n\u0331>$ena|$virama; |
| n\u0307>$nga|$virama; |
| n\u0303>$nya|$virama; |
| n\u0323>$nna|$virama; |
| t\u0323h>$ttha|$virama; |
| t\u0323>$tta|$virama; |
| r\u0323h>$udha|$virama; |
| r\u0323>$uddha|$virama; |
| d\u0323h>$ddha|$virama; |
| d\u0323>$dda|$virama; |
| kh>$kha|$virama; |
| k>$ka|$virama; |
| gh>$gha|$virama; |
| g>$ga|$virama; |
| ch>$cha|$virama; |
| c>$ca|$virama; |
| jh>$jha|$virama; |
| j>$ja|$virama; |
| ny>$nya|$virama; |
| tth>$ttha|$virama; |
| ddh>$ddha|$virama; |
| th>$tha|$virama; |
| t>$ta|$virama; |
| dh>$dha|$virama; |
| d>$da|$virama; |
| n>$na|$virama; |
| ph>$pha|$virama; |
| p>$pa|$virama; |
| bh>$bha|$virama; |
| b>$ba|$virama; |
| m>$ma|$virama; |
| y>$ya|$virama; |
| r\u0331>$rra|$virama; |
| r>$ra|$virama; |
| l\u0323>$lla|$virama; |
| l>$la|$virama; |
| v>$va|$virama; |
| w\u0307>$vva|$virama; |
| w>$va|$virama; |
| sh>$sha|$virama; |
| ss>$ssa|$virama; |
| s\u0323>$ssa|$virama; |
| s\u0301>$sha|$virama; |
| s>$sa|$virama; |
| h>$ha|$virama; |
| '.'>$danda; |
| $danda'.'>$doubleDanda; |
| $depVowelAbove{'~'>$anusvara; |
| $depVowelBelow{'~'>$chandrabindu; |
| # convert to dependent forms after consonant with no vowel: |
| # e.g. kai -> {ka}{virama}ai -> {ka}{ai} |
| #$virama aa>$aa; |
| $virama a\u0304>$aa; |
| $virama ai>$ai; |
| $virama au>$au; |
| $virama ii>$ii; |
| $virama i\u0304>$ii; |
| $virama i>$i; |
| #$virama uu>$uu; |
| $virama u\u0304>$uu; |
| $virama u>$u; |
| #$virama rrh>$rrh; |
| $virama r\u0325\u0304>$rrh; |
| #$virama rh>$rh; |
| $virama r\u0325a>$rh; |
| $virama r\u0325>$rh; |
| $virama l\u0325\u0304>$llh; |
| $virama lh>$lh; |
| $virama l\u0325>$lh; |
| $virama e\u0304>$e; |
| $virama o\u0304>$o; |
| $virama a>; |
| $virama e\u0306>$ce; |
| $virama o\u0306>$co; |
| $virama e>$se; |
| $virama o>$so; |
| |
| |
| # otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai} |
| #$virama''aa>$waa; |
| $virama''a\u0304>$waa; |
| $virama''ai>$wai; |
| $virama''au>$wau; |
| #$virama''ii>$wii; |
| $virama''i\u0304>$wii; |
| $virama''i>$wi; |
| #$virama''uu>$wuu; |
| $virama''u\u0304>$wuu; |
| $virama''u>$wu; |
| #$virama''rrh>$wrr; |
| $virama''r\u0325\u0304>$wrr; |
| #$virama''rh>$wr; |
| $virama''r\u0325>$wr; |
| $virama''l\u0325\u0304>$wll; |
| #$virama''lh>$wl; |
| $virama''l\u0325>$wl; |
| $virama''e\u0304>$we; |
| $virama''o\u0304>$wo; |
| $virama''a>$wa; |
| $virama''e\u0306>$wce; |
| $virama''o\u0306>$wco; |
| $virama''e>$wse; |
| $virama''o>$wso; |
| # no virama |
| ''a\u0304>$waa; |
| ''ai>$wai; |
| ''au>$wau; |
| ''i\u0304>$wii; |
| ''i>$wi; |
| ''u\u0304>$wuu; |
| ''u>$wu; |
| ''r\u0325\u0304>$wrr; |
| ''r\u0325>$wr; |
| ''l\u0325\u0304>$wll; |
| ''l\u0325>$wl; |
| ''e\u0304>$we; |
| ''o\u0304>$wo; |
| ''a>$wa; |
| ''e\u0306>$wce; |
| ''o\u0306>$wco; |
| ''e>$wse; |
| ''o>$wso; |
| |
| $virama } [$z] > $virama; |
| $virama } ' ' > $virama ; |
| $virama}$endThing>; |
| 0>$zero; |
| 1>$one; |
| 2>$two; |
| 3>$three; |
| 4>$four; |
| 5>$five; |
| 6>$six; |
| 7>$seven; |
| 8>$eight; |
| 9>$nine; |
| ''>; |
| #:: NFC (NFD) ; |