| //-------------------------------------------------------------------- |
| // Copyright (c) 2001-2004, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| |
| // InterIndic-Latin |
| |
| InterIndic_Latin{ |
| Rule{ |
| // ":: NFD (NFC) ;" |
| //\u0e00 reserved |
| //consonants |
| "$chandrabindu=\ue001;" |
| "$anusvara=\ue002;" |
| "$visarga=\ue003;" |
| //\u0e004 reserved |
| // w<vowel> represents the stand-alone form |
| "$wa=\ue005;" |
| "$waa=\ue006;" |
| "$wi=\ue007;" |
| "$wii=\ue008;" |
| "$wu=\ue009;" |
| "$wuu=\ue00a;" |
| "$wr=\ue00b;" |
| "$wl=\ue00c;" |
| |
| "$wce=\ue00d;" // LETTER CANDRA E |
| "$wse=\ue00e;" // LETTER SHORT E |
| |
| "$we=\ue00f;" // \u090f LETTER E |
| "$wai=\ue010;" |
| |
| "$wco=\ue011;" // LETTER CANDRA O |
| "$wso=\ue012;" // LETTER SHORT O |
| |
| "$wo=\ue013;" // \u0913 LETTER O |
| "$wau=\ue014;" |
| |
| "$ka=\ue015;" |
| "$kha=\ue016;" |
| "$ga=\ue017;" |
| "$gha=\ue018;" |
| "$nga=\ue019;" |
| |
| "$ca=\ue01a;" |
| "$cha=\ue01b;" |
| "$ja=\ue01c;" |
| "$jha=\ue01d;" |
| "$nya=\ue01e;" |
| |
| "$tta=\ue01f;" |
| "$ttha=\ue020;" |
| "$dda=\ue021;" |
| "$ddha=\ue022;" |
| "$nna=\ue023;" |
| |
| "$ta=\ue024;" |
| "$tha=\ue025;" |
| "$da=\ue026;" |
| "$dha=\ue027;" |
| "$na=\ue028;" |
| "$ena=\ue029;" //compatibility |
| |
| "$pa=\ue02a;" |
| "$pha=\ue02b;" |
| "$ba=\ue02c;" |
| "$bha=\ue02d;" |
| "$ma=\ue02e;" |
| |
| "$ya=\ue02f;" |
| "$ra=\ue030;" |
| "$rra=\ue031;" |
| "$la=\ue032;" |
| "$lla=\ue033;" |
| "$ela=\ue034;" //compatibility |
| "$va=\ue035;" |
| |
| "$sha=\ue036;" |
| "$ssa=\ue037;" |
| "$sa=\ue038;" |
| "$ha=\ue039;" |
| //\u093a Reserved |
| //\u093b Reserved |
| "$nukta=\ue03c;" |
| "$avagraha=\ue03d;" // SIGN AVAGRAHA |
| |
| // <vowel> represents the dependent form |
| "$aa=\ue03e;" |
| "$i=\ue03f;" |
| "$ii=\ue040;" |
| "$u=\ue041;" |
| "$uu=\ue042;" |
| "$rh=\ue043;" |
| "$lh=\ue044;" |
| "$ce=\ue045;" //VOWEL SIGN CANDRA E |
| "$se=\ue046;" //VOWEL SIGN SHORT E |
| "$e=\ue047;" |
| "$ai=\ue048;" |
| "$co=\ue049;" // VOWEL SIGN CANDRA O |
| "$so=\ue04a;" // VOWEL SIGN SHORT O |
| "$o=\ue04b;" // \u094b |
| "$au=\ue04c;" |
| "$virama=\ue04d;" |
| // \u094e Reserved |
| // \u094f Reserved |
| //"\u0950>\ue050;" // OM |
| // \u0951>; // UNMAPPED STRESS SIGN UDATTA |
| // \u0952>; // UNMAPPED STRESS SIGN ANUDATTA |
| // \u0953>; // UNMAPPED GRAVE ACCENT |
| // \u0954>; // UNMAPPED ACUTE ACCENT |
| |
| "$lm = \ue055;"// Telugu Length Mark |
| "$ailm=\ue056;"// AI Length Mark |
| "$aulm=\ue057;"// AU Length Mark |
| |
| //urdu compatibity forms |
| "$uka=\ue058;" |
| "$ukha=\ue059;" |
| "$ugha=\ue05a;" |
| "$ujha=\ue05b;" |
| "$uddha=\ue05c;" |
| "$udha=\ue05d;" |
| "$ufa=\ue05e;" |
| "$uya=\ue05f;" |
| |
| "$wrr=\ue060;" |
| "$wll=\ue061;" |
| "$rrh=\ue062;" |
| "$llh=\ue063;" |
| |
| "$danda=\ue064;" |
| "$doubleDanda=\ue065;" |
| |
| "$zero=\ue066;" // DIGIT ZERO |
| "$one=\ue067;" // DIGIT ONE |
| "$two=\ue068;" // DIGIT TWO |
| "$three=\ue069;" // DIGIT THREE |
| "$four=\ue06a;" // DIGIT FOUR |
| "$five=\ue06b;" // DIGIT FIVE |
| "$six=\ue06c;" // DIGIT SIX |
| "$seven=\ue06d;" // DIGIT SEVEN |
| "$eight=\ue06e;" // DIGIT EIGHT |
| "$nine=\ue06f;" // DIGIT NINE |
| |
| // For all other scripts |
| "$ecp0=\ue070;" |
| "$ecp1=\ue071;" |
| "$ecp2=\ue072;" |
| "$ecp3=\ue073;" |
| "$ecp4=\ue074;" |
| "$ecp5=\ue075;" |
| "$ecp6=\ue076;" |
| "$ecp7=\ue077;" |
| "$ecp8=\ue078;" |
| "$ecp9=\ue079;" |
| "$ecpA=\ue07a;" |
| "$ecpB=\ue07b;" |
| "$ecpC=\ue07c;" |
| "$ecpD=\ue07d;" |
| "$ecpE=\ue07e;" |
| "$ecpF=\ue07f;" |
| |
| |
| // \u0970>; // UNMAPPED ABBREVIATION SIGN |
| |
| "$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];" |
| "$depVowelBelow=[\ue041-\ue044];" |
| "$endThing=[$danda$doubleDanda \u005c\u005cu0000-\udfff\ue080-\ufffd];" |
| |
| // $x was originally called '&'; $z was '%' |
| "$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];" |
| "$z=[bcdfghjklmnpqrstvwxyz];" |
| |
| |
| //##################################################################### |
| // convert from Native letters to Latin letters |
| //##################################################################### |
| |
| //transliterations for anusvara |
| "$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;" |
| "$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;" |
| "$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;" |
| "$anusvara} [$ta$tha$da$dha$na] > n ;" |
| "$anusvara} [$pa$pha$ba$bha$ma] > m ;" |
| "$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;" |
| "$anusvara>'-'m\u0307;" |
| |
| // normal consonants |
| |
| "$cha}$x>ch;" |
| "$cha>cha;" |
| "$ca$virama}$ha>c'';" |
| "$ca}$x>c;" |
| "$ca>ca;" |
| "$jha}$x>jh;" |
| "$jha>jha;" |
| "$ja$virama}$ha>j'';" |
| "$ja}$x>j;" |
| "$ja>ja;" |
| //"$nya}$x>ny;" |
| //"$nya>nya;" |
| "$nya }$x>n\u0303 ;" |
| "$nya > n\u0303a ;" |
| |
| "$ttha}$x>t\u0323h;" |
| "$tta$virama}$ha>t\u0323'';" |
| "$tta}$x>t\u0323;" |
| "$ddha}$x>d\u0323h;" |
| "$dda}$x$ha>d\u0323'';" |
| "$dda}$x>d\u0323;" |
| "$dha}$x>dh;" |
| "$da$virama}$ha>d'';" |
| "$da$virama}$ddha>d'';" |
| "$da$virama}$dda>d'';" |
| "$da$virama}$dha>d'';" |
| //"$da$virama}$da>dda;" |
| "$da}$x>d;" |
| "$tha}$x>th;" |
| "$ta$virama}$ha>t'';" |
| "$ta$virama}$ttha>t'';" |
| "$ta$virama}$tta>t'';" |
| "$ta$virama}$tha>t'';" |
| "$tta>t\u0323a;" |
| "$ttha>t\u0323ha;" |
| //"$ta$virama}$ta>tta;" |
| "$ta}$x>t;" |
| "$tha>tha;" |
| "$ta>ta;" |
| "$dda>d\u0323a;" |
| "$dha>dha;" |
| "$ddha>d\u0323ha;" |
| "$da>da;" |
| "$nna}$x>n\u0323 ;" |
| "$nna>n\u0323a ;" |
| "$na$virama}$ga>n'';" |
| "$na$virama}$ya>n'';" |
| "$na}$x>n;" |
| "$na>na;" |
| |
| "$kha}$x>kh;" |
| "$kha>kha;" |
| "$ka$virama}$ha>k'';" |
| "$ka}$x>k;" |
| "$ka>ka;" |
| "$gha}$x>gh;" |
| "$gha>gha;" |
| "$ga$virama}$ha>g'';" |
| "$ga}$x>g;" |
| "$ga>ga;" |
| //"ng<$nga}$x;" |
| //"nga<$nga;" |
| "$nga}$x>n\u0307;" |
| "$nga>n\u0307a ;" |
| |
| "$pha}$x>ph;" |
| "$pha>pha;" |
| "$pa$virama}$ha>p'';" |
| "$pa}$x>p;" |
| "$pa>pa;" |
| "$bha}$x>bh;" |
| "$bha>bha;" |
| "$ba$virama}$ha>b'';" |
| "$ba}$x>b;" |
| "$ba>ba;" |
| "$ma$virama}$ma>m'';" |
| //"$ma$virama}$anusvara>m'';" |
| "$ma}$x>m;" |
| "$ma>ma;" |
| |
| "$ya}$x>y;" |
| "$ya>ya;" |
| "$ra$virama}$ha>r'';" |
| "$ra}$x>r;" |
| "$ra>ra;" |
| "$la$virama}$ha>l'';" |
| "$la}$x>l;" |
| "$la>la;" |
| "$lla$virama}$ha>l\u0323'';" |
| "$lla}$x>l\u0323;" |
| "$lla>l\u0323a;" |
| "$va}$x>v;" |
| "$va>va;" |
| "$sha}$x>s\u0301;" |
| "$ssa}$x>s\u0323;" |
| "$sa$virama}$ha>s'';" |
| "$sa$virama}$sha>s'';" |
| "$sa$virama}$ssa>s'';" |
| "$sa$virama}$sa>s'';" |
| "$sa}$x>s;" |
| "$sha>s\u0301a;" |
| "$ssa>s\u0323a;" |
| "$sa>sa;" |
| "$ha}$x>h;" |
| "$ha>ha;" |
| |
| // Urdu compatibility |
| "$uya}$x > y\u0307 ;" |
| "$uya > y\u0307a ;" |
| "$ela}$x > l\u0331 ;" |
| "$ela > l\u0331a ;" |
| "$ena}$x > n\u0331 ;" |
| "$ena > n\u0331a ;" |
| |
| "$uka}$x > q ;" |
| "$uka > qa ;" |
| "$ukha}$x > k\u0323 ;" |
| "$ukha > k\u0323a ;" |
| "$ugha}$x > g\u0307 ;" |
| "$ugha > g\u0307a ;" |
| "$ujha}$x > z ;" |
| "$ujha > za ;" |
| "$udha}$x > r\u0323h ;" |
| "$udha > r\u0323ha;" |
| "$uddha}$x> r\u0323 ;" |
| "$uddha > r\u0323a ;" |
| "$ufa}$x > f\u0323 ;" |
| "$ufa > f\u0323a ;" |
| |
| // dependent vowels (should never occur except following consonants) |
| |
| "$aa > a\u0304 ;" |
| "$ai > ai ;" |
| "$au > au ;" |
| "$ii > i\u0304 ;" |
| "$i > i ;" |
| "$uu > u\u0304 ;" |
| "$u > u ;" |
| "$rrh > r\u0325\u0304 ;" |
| "$rh > r\u0325 ;" |
| "$llh > l\u0325\u0304 ;" |
| "$lh > l\u0325 ;" |
| "$e > e\u0304 ;" |
| "$o > o\u0304 ;" |
| //extra vowels |
| "$ce > e\u0306 ;" |
| "$co > o\u0306 ;" |
| "$se > e ;" |
| "$so > o ;" |
| |
| // independent vowels (when following consonants) |
| |
| "a}$waa > ''a\u0304 ;" |
| "$z}$waa > ''a\u0304 ;" |
| "a}$wai > ''ai ;" |
| "$z}$wai > ''ai ;" |
| "a}$wau > ''au ;" |
| "$z}$wau > ''au ;" |
| "a}$wii > ''i\u0304 ;" |
| "$z}$wii > ''i\u0304 ;" |
| "a}$wi > ''i ;" |
| "$z}$wi > ''i ;" |
| "a}$wuu > ''u\u0304 ;" |
| "$z}$wuu > ''u\u0304 ;" |
| "a}$wu > ''u ;" |
| "$z}$wu > ''u ;" |
| "$z}$wrr > ''r\u0325\u0304 ;" |
| "$z}$wr > ''r\u0325 ;" |
| "$z}$wll > ''l\u0325\u0304 ;" |
| "$z}$wl > ''l\u0325 ;" |
| "$z}$we > ''e\u0304 ;" |
| "$z}$wo > ''o\u0304 ;" |
| "a}$wa > ''a ;" |
| "$z}$wa > ''a ;" |
| //extra vowels |
| "$z}$wce > ''e\u0306 ;" |
| "$z}$wco > ''o\u0306 ;" |
| "$z}$wse > ''e ;" |
| "$z}$wso > ''o ;" |
| |
| // independent vowels (otherwise) |
| "$waa > a\u0304 ;" |
| "$wai > ai ;" |
| "$wau > au ;" |
| "$wii > i\u0304 ;" |
| "$wi > i ;" |
| "$wuu > u\u0304 ;" |
| "$wu > u ;" |
| "$wrr > r\u0325\u0304 ;" |
| "$wr > r\u0325 ;" |
| "$wll > l\u0325\u0304 ;" |
| "$wl > l\u0325 ;" |
| "$we > e\u0304 ;" |
| "$wo > o\u0304 ;" |
| "$wa > a ;" |
| //extra vowels |
| "$wce > e\u0306 ;" |
| "$wco > o\u0306 ;" |
| "$wse > e ;" |
| "$wso > o ;" |
| |
| //stress marks |
| "$avagraha > \u0315;" |
| "$chandrabindu$anusvara>'-'\u0303;" |
| "$chandrabindu > '-'m\u0310;" |
| "$visarga>'-'h\u0323;" |
| |
| |
| //numbers |
| "$zero > 0;" |
| "$one > 1;" |
| "$two > 2;" |
| "$three > 3;" |
| "$four > 4;" |
| "$five > 5;" |
| "$six > 6;" |
| "$seven > 7;" |
| "$eight > 8;" |
| "$nine > 9;" |
| |
| // blow away any remaining viramas |
| "$virama>;" |
| // ":: NFC;" |
| } |
| } |