| // -*- Coding: utf-8; -*- |
| //-------------------------------------------------------------------- |
| // Copyright (c) 1999-2001, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| // THIS IS A MACHINE-GENERATED FILE |
| // Tool: dumpicurules.bat |
| // Source: ../../text/resources/Transliterator_Latin_InterIndic.txt |
| // Date: Mon Dec 3 11:44:30 2001 |
| //-------------------------------------------------------------------- |
| |
| // Latin_InterIndic |
| |
| translit_Latin_InterIndic { |
| Rule { |
| //-------------------------------------------------------------------- |
| // Copyright (c) 2001-2004, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| // Latin-InterIndic |
| //:: NFD; |
| //\u0e00 reserved |
| //consonants |
| "$chandrabindu=\ue001;" |
| "$anusvara=\ue002;" |
| "$visarga=\ue003;" |
| //\u0e004 reserved |
| // w<vowel> represents the stand-alone form |
| "$wa=\ue005;" |
| "$waa=\ue006;" |
| "$wi=\ue007;" |
| "$wii=\ue008;" |
| "$wu=\ue009;" |
| "$wuu=\ue00a;" |
| "$wr=\ue00b;" |
| "$wl=\ue00c;" |
| "$wce=\ue00d;" // LETTER CANDRA E |
| "$wse=\ue00e;" // LETTER SHORT E |
| "$we=\ue00f;" // \u090f LETTER E |
| "$wai=\ue010;" |
| "$wco=\ue011;" // LETTER CANDRA O |
| "$wso=\ue012;" // LETTER SHORT O |
| "$wo=\ue013;" // \u0913 LETTER O |
| "$wau=\ue014;" |
| "$ka=\ue015;" |
| "$kha=\ue016;" |
| "$ga=\ue017;" |
| "$gha=\ue018;" |
| "$nga=\ue019;" |
| "$ca=\ue01a;" |
| "$cha=\ue01b;" |
| "$ja=\ue01c;" |
| "$jha=\ue01d;" |
| "$nya=\ue01e;" |
| "$tta=\ue01f;" |
| "$ttha=\ue020;" |
| "$dda=\ue021;" |
| "$ddha=\ue022;" |
| "$nna=\ue023;" |
| "$ta=\ue024;" |
| "$tha=\ue025;" |
| "$da=\ue026;" |
| "$dha=\ue027;" |
| "$na=\ue028;" |
| "$ena=\ue029;" //compatibility |
| "$pa=\ue02a;" |
| "$pha=\ue02b;" |
| "$ba=\ue02c;" |
| "$bha=\ue02d;" |
| "$ma=\ue02e;" |
| "$ya=\ue02f;" |
| "$ra=\ue030;" |
| "$rra=\ue031;" |
| "$la=\ue032;" |
| "$lla=\ue033;" |
| "$ela=\ue034;" //compatibility |
| "$va=\ue035;" |
| "$sha=\ue036;" |
| "$ssa=\ue037;" |
| "$sa=\ue038;" |
| "$ha=\ue039;" |
| //\u093a Reserved |
| //\u093b Reserved |
| "$nukta=\ue03c;" |
| "$avagraha=\ue03d;" // SIGN AVAGRAHA |
| // <vowel> represents the dependent form |
| "$aa=\ue03e;" |
| "$i=\ue03f;" |
| "$ii=\ue040;" |
| "$u=\ue041;" |
| "$uu=\ue042;" |
| "$rh=\ue043;" |
| "$lh=\ue044;" |
| "$ce=\ue045;" //VOWEL SIGN CANDRA E |
| "$se=\ue046;" //VOWEL SIGN SHORT E |
| "$e=\ue047;" |
| "$ai=\ue048;" |
| "$co=\ue049;" // VOWEL SIGN CANDRA O |
| "$so=\ue04a;" // VOWEL SIGN SHORT O |
| "$o=\ue04b;" // \u094b |
| "$au=\ue04c;" |
| "$virama=\ue04d;" |
| // \u094e Reserved |
| // \u094f Reserved |
| "$om = \ue050;" // OM |
| // \u0951>; # UNMAPPED STRESS SIGN UDATTA |
| // \u0952>; # UNMAPPED STRESS SIGN ANUDATTA |
| // \u0953>; # UNMAPPED GRAVE ACCENT |
| // \u0954>; # UNMAPPED ACUTE ACCENT |
| "$lm = \ue055;"// Telugu Length Mark |
| "$ailm=\ue056;"// AI Length Mark |
| "$aulm=\ue057;"// AU Length Mark |
| //urdu compatibity forms |
| "$uka=\ue058;" |
| "$ukha=\ue059;" |
| "$ugha=\ue05a;" |
| "$ujha=\ue05b;" |
| "$uddha=\ue05c;" |
| "$udha=\ue05d;" |
| "$ufa=\ue05e;" |
| "$uya=\ue05f;" |
| "$wrr=\ue060;" |
| "$wll=\ue061;" |
| "$rrh=\ue062;" |
| "$llh=\ue063;" |
| "$danda=\ue064;" |
| "$doubleDanda=\ue065;" |
| "$zero=\ue066;" // DIGIT ZERO |
| "$one=\ue067;" // DIGIT ONE |
| "$two=\ue068;" // DIGIT TWO |
| "$three=\ue069;" // DIGIT THREE |
| "$four=\ue06a;" // DIGIT FOUR |
| "$five=\ue06b;" // DIGIT FIVE |
| "$six=\ue06c;" // DIGIT SIX |
| "$seven=\ue06d;" // DIGIT SEVEN |
| "$eight=\ue06e;" // DIGIT EIGHT |
| "$nine=\ue06f;" // DIGIT NINE |
| // For all other scripts |
| "$ecp0=\ue070;" |
| "$ecp1=\ue071;" |
| "$ecp2=\ue072;" |
| "$ecp3=\ue073;" |
| "$ecp4=\ue074;" |
| "$ecp5=\ue075;" |
| "$ecp6=\ue076;" |
| "$ecp7=\ue077;" |
| "$ecp8=\ue078;" |
| "$ecp9=\ue079;" |
| "$ecpA=\ue07a;" |
| "$ecpB=\ue07b;" |
| "$ecpC=\ue07c;" |
| "$ecpD=\ue07d;" |
| "$ecpE=\ue07e;" |
| "$ecpF=\ue07f;" |
| // \u0970>; # UNMAPPED ABBREVIATION SIGN |
| "$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];" |
| "$depVowelBelow=[\ue041-\ue044];" |
| "$endThing=[$danda$doubleDanda];" |
| // $x was originally called '&'; $z was '%' |
| "$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];" |
| "$z=[bcdfghjklmnpqrstvwxyz];" |
| "$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];" |
| "\u0315 > $avagraha;" |
| "\u0303>$chandrabindu$anusvara;" |
| "m\u0310>$chandrabindu;" |
| "h\u0323>$visarga;" |
| "x>$ka$virama$sa;" |
| // convert to independent forms at start of word or syllable: |
| // dependent forms for roundtrip |
| "\u0314a\u0304>$aa;" |
| "\u0314ai>$ai;" |
| "\u0314au>$au;" |
| "\u0314ii>$ii;" |
| "\u0314i\u0304>$ii;" |
| "\u0314i>$i;" |
| "\u0314u\u0304>$uu;" |
| "\u0314u>$u;" |
| "\u0314r\u0325\u0304>$rrh;" |
| "\u0314r\u0325>$rh;" |
| "\u0314l\u0325\u0304>$llh;" |
| "\u0314lh>$lh;" |
| "\u0314l\u0325>$lh;" |
| "\u0314e\u0304>$e;" |
| "\u0314o\u0304>$o;" |
| "\u0314a>;" |
| "\u0314e\u0306>$ce;" |
| "\u0314o\u0306>$co;" |
| "\u0314e>$se;" |
| "\u0314o>$so;" |
| |
| // preceeded by consonants |
| "$consonants{ a\u0304>$aa;" |
| "$consonants{ ai>$ai;" |
| "$consonants{ au>$au;" |
| "$consonants{ ii>$ii;" |
| "$consonants{ i\u0304>$ii;" |
| "$consonants{ i>$i;" |
| "$consonants{ u\u0304>$uu;" |
| "$consonants{ u>$u;" |
| "$consonants{ r\u0325\u0304>$rrh;" |
| "$consonants{ r\u0325a>$rh;" |
| "$consonants{ r\u0325>$rh;" |
| "$consonants{ l\u0325\u0304>$llh;" |
| "$consonants{ lh>$lh;" |
| "$consonants{ l\u0325>$lh;" |
| "$consonants{ e\u0304>$e;" |
| "$consonants{ o\u0304>$o;" |
| "$consonants{ e\u0306>$ce;" |
| "$consonants{ o\u0306>$co;" |
| "$consonants{ e>$se;" |
| "$consonants{ o>$so;" |
| |
| // e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai}) |
| "a\u0304>$waa;" |
| "ai>$wai;" |
| "au>$wau;" |
| "i\u0304>$wii;" |
| "i>$wi;" |
| "u\u0304>$wuu;" |
| "u>$wu;" |
| "r\u0325\u0304>$wrr;" |
| "r\u0325>$wr;" |
| "l\u0325\u0304>$wll;" |
| "lh>$wl;" |
| "l\u0325>$wl;" |
| "e\u0304>$we;" |
| "o\u0304>$wo;" |
| "a>$wa;" |
| "e\u0306>$wce;" |
| "o\u0306>$wco;" |
| "e>$wse;" |
| "''om>$om;" |
| "o>$wso;" |
| |
| // rules for anusvara |
| "n}r\u0325 > $na|$virama;" |
| "n}l\u0325 > $na|$virama;" |
| "n}na > $na|$virama;" |
| "n\u0307}[kg] > $anusvara;" |
| "n\u0307}n\u0307 > $anusvara;" |
| "n\u0304}[cj] > $anusvara;" |
| "n\u0304}n\u0303 > $anusvara;" |
| "n\u0323}[tdn]\u0323 > $anusvara;" |
| "n}[tdn] > $anusvara;" |
| "m}[pbm] > $anusvara;" |
| "n}[ylvshr] > $anusvara;" |
| "m\u0307 > $anusvara;" |
| |
| //urdu compatibility |
| "q>$uka|$virama;" |
| "k\u0331h\u0331>$ukha |$virama;" |
| "g\u0307> $ugha | $virama;" |
| "z > $ujha |$virama;" |
| "f > $ufa|$virama;" |
| |
| // dev |
| "y\u0307>$uya|$virama;" |
| "l\u0331>$ela|$virama;" |
| "n\u0331>$ena|$virama;" |
| "n\u0307>$nga|$virama;" |
| "n\u0303>$nya|$virama;" |
| "n\u0323>$nna|$virama;" |
| "t\u0323h>$ttha|$virama;" |
| "t\u0323>$tta|$virama;" |
| "r\u0323h>$udha|$virama;" |
| "r\u0323>$uddha|$virama;" |
| "d\u0323h>$ddha|$virama;" |
| "d\u0323>$dda|$virama;" |
| "kh>$kha|$virama;" |
| "k>$ka|$virama;" |
| "gh>$gha|$virama;" |
| "g>$ga|$virama;" |
| "ch>$cha|$virama;" |
| "c>$ca|$virama;" |
| "jh>$jha|$virama;" |
| "j>$ja|$virama;" |
| "ny>$nya|$virama;" |
| "tth>$ttha|$virama;" |
| "ddh>$ddha|$virama;" |
| "th>$tha|$virama;" |
| "t>$ta|$virama;" |
| "dh>$dha|$virama;" |
| "d>$da|$virama;" |
| "n>$na|$virama;" |
| "ph>$pha|$virama;" |
| "p>$pa|$virama;" |
| "bh>$bha|$virama;" |
| "b>$ba|$virama;" |
| "m>$ma|$virama;" |
| "y>$ya|$virama;" |
| "r\u0331>$rra|$virama;" |
| "r>$ra|$virama;" |
| "l\u0323>$lla|$virama;" |
| "l>$la|$virama;" |
| "v>$va|$virama;" |
| "w>$va|$virama;" |
| "sh>$sha|$virama;" |
| "ss>$ssa|$virama;" |
| "s\u0323>$ssa|$virama;" |
| "s\u0301>$sha|$virama;" |
| "s>$sa|$virama;" |
| "h>$ha|$virama;" |
| "'.'>$danda;" |
| "$danda'.'>$doubleDanda;" |
| "$depVowelAbove{'~'>$anusvara;" |
| "$depVowelBelow{'~'>$chandrabindu;" |
| // convert to dependent forms after consonant with no vowel: |
| // e.g. kai -> {ka}{virama}ai -> {ka}{ai} |
| //$virama aa>$aa; |
| "$virama a\u0304>$aa;" |
| "$virama ai>$ai;" |
| "$virama au>$au;" |
| "$virama ii>$ii;" |
| "$virama i\u0304>$ii;" |
| "$virama i>$i;" |
| //$virama uu>$uu; |
| "$virama u\u0304>$uu;" |
| "$virama u>$u;" |
| //$virama rrh>$rrh; |
| "$virama r\u0325\u0304>$rrh;" |
| //$virama rh>$rh; |
| "$virama r\u0325a>$rh;" |
| "$virama r\u0325>$rh;" |
| "$virama l\u0325\u0304>$llh;" |
| "$virama lh>$lh;" |
| "$virama l\u0325>$lh;" |
| "$virama e\u0304>$e;" |
| "$virama o\u0304>$o;" |
| "$virama a>;" |
| "$virama e\u0306>$ce;" |
| "$virama o\u0306>$co;" |
| "$virama e>$se;" |
| "$virama o>$so;" |
| |
| |
| // otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai} |
| //$virama''aa>$waa; |
| "$virama''a\u0304>$waa;" |
| "$virama''ai>$wai;" |
| "$virama''au>$wau;" |
| //$virama''ii>$wii; |
| "$virama''i\u0304>$wii;" |
| "$virama''i>$wi;" |
| //$virama''uu>$wuu; |
| "$virama''u\u0304>$wuu;" |
| "$virama''u>$wu;" |
| //$virama''rrh>$wrr; |
| "$virama''r\u0325\u0304>$wrr;" |
| //$virama''rh>$wr; |
| "$virama''r\u0325>$wr;" |
| "$virama''l\u0325\u0304>$wll;" |
| //$virama''lh>$wl; |
| "$virama''l\u0325>$wl;" |
| "$virama''e\u0304>$we;" |
| "$virama''o\u0304>$wo;" |
| "$virama''a>$wa;" |
| "$virama''e\u0306>$wce;" |
| "$virama''o\u0306>$wco;" |
| "$virama''e>$wse;" |
| "$virama''o>$wso;" |
| // no virama |
| "''a\u0304>$waa;" |
| "''ai>$wai;" |
| "''au>$wau;" |
| "''i\u0304>$wii;" |
| "''i>$wi;" |
| "''u\u0304>$wuu;" |
| "''u>$wu;" |
| "''r\u0325\u0304>$wrr;" |
| "''r\u0325>$wr;" |
| "''l\u0325\u0304>$wll;" |
| "''l\u0325>$wl;" |
| "''e\u0304>$we;" |
| "''o\u0304>$wo;" |
| "''a>$wa;" |
| "''e\u0306>$wce;" |
| "''o\u0306>$wco;" |
| "''e>$wse;" |
| "''o>$wso;" |
| |
| "$virama } [$z] > $virama;" |
| "$virama } ' ' > $virama ;" |
| "$virama}$endThing>;" |
| "0>$zero;" |
| "1>$one;" |
| "2>$two;" |
| "3>$three;" |
| "4>$four;" |
| "5>$five;" |
| "6>$six;" |
| "7>$seven;" |
| "8>$eight;" |
| "9>$nine;" |
| "''>;" |
| //:: NFC (NFD) ; |
| } |
| } |