| // -*- Coding: utf-8; -*- |
| //-------------------------------------------------------------------- |
| // Copyright (c) 1999-2002, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| // THIS IS A MACHINE-GENERATED FILE |
| // Tool: dumpicurules.bat |
| // Source: ../../../impl/data/Transliterator_InterIndic_Latin.txt |
| // Date: Sat Jul 27 10:31:07 2002 |
| //-------------------------------------------------------------------- |
| |
| // InterIndic_Latin |
| |
| t_InterIndic_Latn { |
| Rule { |
| //-------------------------------------------------------------------- |
| //-------------------------------------------------------------------- |
| //-------------------------------------------------------------------- |
| |
| // InterIndic-Latin |
| //\u0e00 reserved |
| //consonants |
| "$chandrabindu=\ue001;" |
| "$anusvara=\ue002;" |
| "$visarga=\ue003;" |
| //\u0e004 reserved |
| // w<vowel> represents the stand-alone form |
| "$wa=\ue005;" |
| "$waa=\ue006;" |
| "$wi=\ue007;" |
| "$wii=\ue008;" |
| "$wu=\ue009;" |
| "$wuu=\ue00a;" |
| "$wr=\ue00b;" |
| "$wl=\ue00c;" |
| "$wce=\ue00d;" // LETTER CANDRA E |
| "$wse=\ue00e;" // LETTER SHORT E |
| "$we=\ue00f;" // \u090f LETTER E |
| "$wai=\ue010;" |
| "$wco=\ue011;" // LETTER CANDRA O |
| "$wso=\ue012;" // LETTER SHORT O |
| "$wo=\ue013;" // \u0913 LETTER O |
| "$wau=\ue014;" |
| "$ka=\ue015;" |
| "$kha=\ue016;" |
| "$ga=\ue017;" |
| "$gha=\ue018;" |
| "$nga=\ue019;" |
| "$ca=\ue01a;" |
| "$cha=\ue01b;" |
| "$ja=\ue01c;" |
| "$jha=\ue01d;" |
| "$nya=\ue01e;" |
| "$tta=\ue01f;" |
| "$ttha=\ue020;" |
| "$dda=\ue021;" |
| "$ddha=\ue022;" |
| "$nna=\ue023;" |
| "$ta=\ue024;" |
| "$tha=\ue025;" |
| "$da=\ue026;" |
| "$dha=\ue027;" |
| "$na=\ue028;" |
| "$ena=\ue029;" //compatibility |
| "$pa=\ue02a;" |
| "$pha=\ue02b;" |
| "$ba=\ue02c;" |
| "$bha=\ue02d;" |
| "$ma=\ue02e;" |
| "$ya=\ue02f;" |
| "$ra=\ue030;" |
| "$rra=\ue031;" |
| "$la=\ue032;" |
| "$lla=\ue033;" |
| "$ela=\ue034;" //compatibility |
| "$va=\ue035;" |
| "$sha=\ue036;" |
| "$ssa=\ue037;" |
| "$sa=\ue038;" |
| "$ha=\ue039;" |
| //\u093a Reserved |
| //\u093b Reserved |
| "$nukta=\ue03c;" |
| "$avagraha=\ue03d;" // SIGN AVAGRAHA |
| // <vowel> represents the dependent form |
| "$aa=\ue03e;" |
| "$i=\ue03f;" |
| "$ii=\ue040;" |
| "$u=\ue041;" |
| "$uu=\ue042;" |
| "$rh=\ue043;" |
| "$lh=\ue044;" |
| "$ce=\ue045;" //VOWEL SIGN CANDRA E |
| "$se=\ue046;" //VOWEL SIGN SHORT E |
| "$e=\ue047;" |
| "$ai=\ue048;" |
| "$co=\ue049;" // VOWEL SIGN CANDRA O |
| "$so=\ue04a;" // VOWEL SIGN SHORT O |
| "$o=\ue04b;" // \u094b |
| "$au=\ue04c;" |
| "$virama=\ue04d;" |
| // \u094e Reserved |
| // \u094f Reserved |
| "$om=\ue050;" // OM |
| "\ue051>;" // UNMAPPED STRESS SIGN UDATTA |
| "\ue052>;" // UNMAPPED STRESS SIGN ANUDATTA |
| "\ue053>;" // UNMAPPED GRAVE ACCENT |
| "\ue054>;" // UNMAPPED ACUTE ACCENT |
| "$lm = \ue055;"// Telugu Length Mark |
| "$ailm=\ue056;"// AI Length Mark |
| "$aulm=\ue057;"// AU Length Mark |
| //urdu compatibity forms |
| "$uka=\ue058;" |
| "$ukha=\ue059;" |
| "$ugha=\ue05a;" |
| "$ujha=\ue05b;" |
| "$uddha=\ue05c;" |
| "$udha=\ue05d;" |
| "$ufa=\ue05e;" |
| "$uya=\ue05f;" |
| "$wrr=\ue060;" |
| "$wll=\ue061;" |
| "$rrh=\ue062;" |
| "$llh=\ue063;" |
| "$danda=\ue064;" |
| "$doubleDanda=\ue065;" |
| "$zero=\ue066;" // DIGIT ZERO |
| "$one=\ue067;" // DIGIT ONE |
| "$two=\ue068;" // DIGIT TWO |
| "$three=\ue069;" // DIGIT THREE |
| "$four=\ue06a;" // DIGIT FOUR |
| "$five=\ue06b;" // DIGIT FIVE |
| "$six=\ue06c;" // DIGIT SIX |
| "$seven=\ue06d;" // DIGIT SEVEN |
| "$eight=\ue06e;" // DIGIT EIGHT |
| "$nine=\ue06f;" // DIGIT NINE |
| // For all other scripts |
| "$ecp0=\ue070;" |
| "$ecp1=\ue071;" |
| "$ecp2=\ue072;" |
| "$ecp3=\ue073;" |
| "$ecp4=\ue074;" |
| "$ecp5=\ue075;" |
| "$ecp6=\ue076;" |
| "$ecp7=\ue077;" |
| "$ecp8=\ue078;" |
| "$ecp9=\ue079;" |
| "$ecpA=\ue07a;" |
| "$ecpB=\ue07b;" |
| "$ecpC=\ue07c;" |
| "$ecpD=\ue07d;" |
| "$ecpE=\ue07e;" |
| "$ecpF=\ue07f;" |
| // \u0970>; # UNMAPPED ABBREVIATION SIGN |
| "$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];" |
| "$depVowelBelow=[\ue041-\ue044];" |
| // $x was originally called '&'; $z was '%' |
| "$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];" |
| "$z=[bcdfghjklmnpqrstvwxyz];" |
| "$vowels=[aeiour\u0304\u0325\u0306];" |
| "$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];" |
| //##################################################################### |
| // convert from Native letters to Latin letters |
| //##################################################################### |
| //transliterations for anusvara |
| "$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;" |
| "$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;" |
| "$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;" |
| "$anusvara} [$ta$tha$da$dha$na] > n ;" |
| "$anusvara} [$pa$pha$ba$bha$ma] > m ;" |
| "$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;" |
| "$anusvara> m\u0307;" |
| |
| // Urdu compatibility |
| "$ya$nukta}$x > y\u0307 ;" |
| "$ya$nukta$virama > y\u0307 ;" |
| "$ya$nukta > y\u0307a ;" |
| |
| "$la$nukta }$x > l\u0331 ;" |
| "$la$nukta$virama > l\u0331 ;" |
| "$la$nukta > l\u0331a ;" |
| |
| "$na$nukta }$x > n\u0331 ;" |
| "$na$nukta$virama > n\u0331 ;" |
| "$na$nukta > n\u0331a ;" |
| |
| "$ena }$x > n\u0331 ;" |
| "$ena$virama > n\u0331 ;" |
| "$ena > n\u0331a ;" |
| "$uka > qa ;" |
| "$ka$nukta }$x > q ;" |
| "$ka$nukta$virama > q ;" |
| "$ka$nukta > qa ;" |
| "$kha$nukta }$x > k\u0331h\u0331 ;" |
| "$kha$nukta$virama > k\u0331h\u0331 ;" |
| "$kha$nukta > k\u0331h\u0331a ;" |
| "$ukha$virama > k\u0331h\u0331;" |
| "$ukha > k\u0331h\u0331a;" |
| "$ugha > g\u0307a ;" |
| "$ga$nukta }$x > g\u0307 ;" |
| "$ga$nukta$virama > g\u0307 ;" |
| "$ga$nukta > g\u0307a ;" |
| |
| "$ujha > za ;" |
| "$ja$nukta }$x > z ;" |
| "$ja$nukta$virama > z ;" |
| "$ja$nukta > za ;" |
| "$ddha$nukta}$x > r\u0323h ;" |
| "$ddha$nukta$virama > r\u0323h ;" |
| "$ddha$nukta > r\u0323ha;" |
| |
| "$uddha}$x > r\u0323 ;" |
| "$uddha$virama > r\u0323 ;" |
| "$uddha > r\u0323a;" |
| |
| "$udha > r\u0323a ;" |
| "$dda$nukta}$x > r\u0323 ;" |
| "$dda$nukta$virama > r\u0323 ;" |
| "$dda$nukta > r\u0323a ;" |
| "$pha$nukta }$x > f ;" |
| "$pha$nukta$virama > f ;" |
| "$pha$nukta > fa ;" |
| "$ufa }$x > f ;" |
| "$ufa$virama > f ;" |
| "$ufa > fa ;" |
| |
| "$ra$nukta}$x > r\u0331;" |
| "$ra$nukta$virama > r\u0331;" |
| "$ra$nukta > r\u0331a;" |
| "$lla$nukta}$x > l\u0331;" |
| "$lla$nukta$virama > l\u0331;" |
| "$lla$nukta > l\u0331a;" |
| |
| "$ela}$x > l\u0331;" |
| "$ela$virama > l\u0331;" |
| "$ela > l\u0331a;" |
| |
| "$uya}$x > y\u0307;" |
| "$uya$virama > y\u0307;" |
| "$uya > y\u0307a;" |
| |
| |
| // normal consonants |
| "$ka$virama}$ha>k'';" |
| "$ka}$x>k;" |
| "$ka$virama>k;" |
| "$ka>ka;" |
| "$kha}$x>kh;" |
| "$kha$virama>kh;" |
| "$kha>kha;" |
| "$ga$virama}$ha>g'';" |
| "$ga}$x>g;" |
| "$ga$virama>g;" |
| "$ga>ga;" |
| |
| "$gha}$x>gh;" |
| "$gha$virama>gh;" |
| "$gha>gha;" |
| |
| "$nga}$x>n\u0307;" |
| "$nga$virama>n\u0307;" |
| "$nga>n\u0307a ;" |
| "$ca$virama}$ha>c'';" |
| "$ca}$x>c;" |
| "$ca$virama>c;" |
| "$ca>ca;" |
| |
| "$cha}$x>ch;" |
| "$cha$virama>ch;" |
| "$cha>cha;" |
| "$ja$virama}$ha>j'';" |
| "$ja}$x>j;" |
| "$ja$virama>j;" |
| "$ja>ja;" |
| |
| "$jha}$x>jh;" |
| "$jha$virama>jh;" |
| "$jha>jha;" |
| |
| "$nya }$x>n\u0303 ;" |
| "$nya$virama>n\u0303;" |
| "$nya > n\u0303a ;" |
| |
| |
| "$tta$virama}$ha>t\u0323'';" |
| "$tta}$x>t\u0323;" |
| "$tta$virama>t\u0323;" |
| "$tta>t\u0323a;" |
| |
| "$ttha}$x>t\u0323h;" |
| "$ttha$virama>t\u0323h;" |
| "$ttha>t\u0323ha;" |
| "$dda}$x$ha>d\u0323'';" |
| "$dda}$x>d\u0323;" |
| "$dda$virama>d\u0323;" |
| "$dda>d\u0323a;" |
| |
| "$ddha}$x>d\u0323h;" |
| "$ddha$virama>d\u0323h;" |
| "$ddha>d\u0323ha;" |
| |
| "$nna}$x>n\u0323 ;" |
| "$nna$virama>n\u0323;" |
| "$nna>n\u0323a ;" |
| |
| |
| "$ta$virama}$ha>t'';" |
| "$ta$virama}$ttha>t'';" |
| "$ta$virama}$tta>t'';" |
| "$ta$virama}$tha>t'';" |
| "$ta}$x>t;" |
| "$ta$virama>t;" |
| "$ta>ta;" |
| "$tha}$x>th;" |
| "$tha$virama>th;" |
| "$tha>tha;" |
| |
| "$da$virama}$ha>d'';" |
| "$da$virama}$ddha>d'';" |
| "$da$virama}$dda>d'';" |
| "$da$virama}$dha>d'';" |
| "$da}$x>d;" |
| "$da$virama>d;" |
| "$da>da;" |
| "$dha}$x>dh;" |
| "$dha$virama>dh;" |
| "$dha>dha;" |
| "$na$virama}$ga>n'';" |
| "$na$virama}$ya>n'';" |
| "$na}$x>n;" |
| "$na$virama>n;" |
| "$na>na;" |
| |
| |
| "$pa$virama}$ha>p'';" |
| "$pa}$x>p;" |
| "$pa$virama>p;" |
| "$pa>pa;" |
| "$pha}$x>ph;" |
| "$pha$virama>ph;" |
| "$pha>pha;" |
| "$ba$virama}$ha>b'';" |
| "$ba}$x>b;" |
| "$ba$virama>b;" |
| "$ba>ba;" |
| |
| "$bha}$x>bh;" |
| "$bha$virama>bh;" |
| "$bha>bha;" |
| |
| "$ma$virama}$ma>m'';" |
| "$ma}$x>m;" |
| "$ma$virama>m;" |
| "$ma>ma;" |
| |
| "$ya}$x>y;" |
| "$ya$virama>y;" |
| "$ya>ya;" |
| "$ra$virama}$ha>r'';" |
| "$ra}$x>r;" |
| "$ra$virama>r;" |
| "$ra>ra;" |
| "$rra$virama}$ha>r\u0331'';" |
| "$rra}$x>r\u0331;" |
| "$rra$virama>r\u0331;" |
| "$rra>r\u0331a;" |
| "$la$virama}$ha>l'';" |
| "$la}$x>l;" |
| "$la$virama>l;" |
| "$la>la;" |
| "$lla$virama}$ha>l\u0323'';" |
| "$lla}$x>l\u0323;" |
| "$lla$virama>l\u0323;" |
| "$lla>l\u0323a;" |
| "$va}$x>v;" |
| "$va$virama>v;" |
| "$va>va;" |
| "$sa$virama}$ha>s'';" |
| "$sa$virama}$sha>s'';" |
| "$sa$virama}$ssa>s'';" |
| "$sa$virama}$sa>s'';" |
| "$sa}$x>s;" |
| "$sa$virama>s;" |
| |
| //for gurmukhi |
| "$sa$nukta}$x>s\u0301;" |
| "$sa$nukta$virama>s\u0301;" |
| "$sa$nukta>s\u0301a;" |
| "$sa>sa;" |
| |
| "$sha}$x>s\u0301;" |
| "$sha$virama>s\u0301;" |
| "$sha>s\u0301a;" |
| |
| "$ssa}$x>s\u0323;" |
| "$ssa$virama>s\u0323;" |
| "$ssa>s\u0323a;" |
| "$ha}$x>h;" |
| "$ha$virama>h;" |
| "$ha>ha;" |
| |
| // dependent vowels (should never occur except following consonants) |
| "$forceIndependentMatra{$aa > \u0314a\u0304 ;" |
| "$forceIndependentMatra{$ai > \u0314ai ;" |
| "$forceIndependentMatra{$au > \u0314au ;" |
| "$forceIndependentMatra{$ii > \u0314i\u0304 ;" |
| "$forceIndependentMatra{$i > \u0314i ;" |
| "$forceIndependentMatra{$uu > \u0314u\u0304 ;" |
| "$forceIndependentMatra{$u > \u0314u ;" |
| "$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;" |
| "$forceIndependentMatra{$rh > \u0314r\u0325 ;" |
| "$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;" |
| "$forceIndependentMatra{$lh > \u0314l\u0325 ;" |
| "$forceIndependentMatra{$e > \u0314e\u0304 ;" |
| "$forceIndependentMatra{$o > \u0314o\u0304 ;" |
| //extra vowels |
| "$forceIndependentMatra{$ce > \u0314e\u0306 ;" |
| "$forceIndependentMatra{$co > \u0314o\u0306 ;" |
| "$forceIndependentMatra{$se > \u0314e ;" |
| "$forceIndependentMatra{$so > \u0314o ;" |
| "$forceIndependentMatra{$nukta >;" // Nukta cannot appear independently or as first character |
| "$forceIndependentMatra{$virama >;" // Virama cannot appear independently or as first character |
| "$aa > a\u0304 ;" |
| "$ai > ai ;" |
| "$au > au ;" |
| "$ii > i\u0304 ;" |
| "$i > i ;" |
| "$uu > u\u0304 ;" |
| "$u > u ;" |
| "$rrh > r\u0325\u0304 ;" |
| "$rh > r\u0325 ;" |
| "$llh > l\u0325\u0304 ;" |
| "$lh > l\u0325 ;" |
| "$e > e\u0304 ;" |
| "$o > o\u0304 ;" |
| //extra vowels |
| "$ce > e\u0306 ;" |
| "$co > o\u0306 ;" |
| "$se > e ;" |
| "$so > o ;" |
| //dependent vowels when following independent vowels. Generally Illegal only for roundtripping |
| "$waa} $x > a\u0304\u0314 ;" |
| "$wai} $x > ai\u0314 ;" |
| "$wau} $x > au\u0314 ;" |
| "$wii} $x > i\u0304\u0314 ;" |
| "$wi } $x > i\u0314 ;" |
| "$wuu} $x > u\u0304\u0314 ;" |
| "$wu } $x > u\u0314 ;" |
| "$wrr} $x > r\u0325\u0304\u0314 ;" |
| "$wr } $x > r\u0325\u0314 ;" |
| "$wll} $x > l\u0325\u0304\u0314 ;" |
| "$wl } $x > l\u0325\u0314 ;" |
| "$we } $x > e\u0304\u0314 ;" |
| "$wo } $x > o\u0304\u0314 ;" |
| "$wa } $x > a\u0314 ;" |
| //extra vowels |
| "$wce} $x > e\u0306\u0314 ;" |
| "$wco} $x > o\u0306\u0314 ;" |
| "$wse} $x > e\u0314 ;" |
| "$wso} $x > o\u0314 ;" |
| "$om} $x > ''om\u0314 ;" |
| |
| // independent vowels when preceeded by vowels |
| "$vowels{$waa > ''a\u0304 ;" |
| "$vowels{$wai > ''ai ;" |
| "$vowels{$wau > ''au ;" |
| "$vowels{$wii > ''i\u0304 ;" |
| "$vowels{$wi > ''i ;" |
| "$vowels{$wuu > ''u\u0304 ;" |
| "$vowels{$wu > ''u ;" |
| "$vowels{$wrr > ''r\u0325\u0304 ;" |
| "$vowels{$wr > ''r\u0325 ;" |
| "$vowels{$wll > ''l\u0325\u0304 ;" |
| "$vowels{$wl > ''l\u0325 ;" |
| "$vowels{$we > ''e\u0304 ;" |
| "$vowels{$wo > ''o\u0304 ;" |
| "$vowels{$wa > ''a ;" |
| //extra vowels |
| "$vowels{$wce > ''e\u0306 ;" |
| "$vowels{$wco > ''o\u0306 ;" |
| "$vowels{$wse > ''e ;" |
| "$vowels{$wso > ''o ;" |
| |
| // independent vowels (otherwise) |
| "$waa > a\u0304 ;" |
| "$wai > ai ;" |
| "$wau > au ;" |
| "$wii > i\u0304 ;" |
| "$wi > i ;" |
| "$wuu > u\u0304 ;" |
| "$wu > u ;" |
| "$wrr > r\u0325\u0304 ;" |
| "$wr > r\u0325 ;" |
| "$wll > l\u0325\u0304 ;" |
| "$wl > l\u0325 ;" |
| "$we > e\u0304 ;" |
| "$wo > o\u0304 ;" |
| "$wa > a ;" |
| //extra vowels |
| "$wce > e\u0306 ;" |
| "$wco > o\u0306 ;" |
| "$wse > e ;" |
| "$wso > o ;" |
| "$om > ''om ;" |
| |
| //stress marks |
| "$avagraha > \u0315;" |
| "$chandrabindu$anusvara>\u0303;" |
| "$chandrabindu > m\u0310;" |
| "$visarga>h\u0323;" |
| //numbers |
| "$zero > 0;" |
| "$one > 1;" |
| "$two > 2;" |
| "$three > 3;" |
| "$four > 4;" |
| "$five > 5;" |
| "$six > 6;" |
| "$seven > 7;" |
| "$eight > 8;" |
| "$nine > 9;" |
| "$lm >;" |
| "$ailm >;" |
| "$aulm >;" |
| "$ecp0 >;" |
| "$ecp1 >;" |
| "$ecp2 >;" |
| "$ecp3 >;" |
| "$ecp4 >;" |
| "$ecp5 >;" |
| "$ecp6 >;" |
| "$ecp7 >;" |
| "$ecp8 >;" |
| "$ecp9 >;" |
| "$ecpA >;" |
| "$ecpB >;" |
| "$ecpC >;" |
| "$ecpD >;" |
| "$ecpE >;" |
| "$ecpF >;" |
| "$danda>'.';" |
| "$doubleDanda>'.';" |
| } |
| } |