| //-------------------------------------------------------------------- |
| // Copyright (C) 1999, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| // Date Name Description |
| // 11/17/99 aliu Creation. |
| //-------------------------------------------------------------------- |
| |
| // Latin-Devanagari |
| |
| ldevan { |
| Rule { |
| //##################################################################### |
| // Keyboard Transliteration Table |
| //##################################################################### |
| // Conversions should be: |
| // 1. complete |
| // * convert every sequence of Latin letters (a to z plus apostrophe) |
| // to a sequence of Native letters |
| // * convert every sequence of Native letters to Latin letters |
| // 2. reversable |
| // * any string of Native converted to Latin and back should be the same |
| // * this is not true for English converted to Native & back, e.g.: |
| // k -> {kaf} -> k |
| // c -> {kaf} -> k |
| //##################################################################### |
| // Sequences of Latin letters may convert to a single Native letter. |
| // When this is the case, an apostrophe can be used to indicate separate |
| // letters.$ |
| // E.g. sh -> {shin} |
| // s'h -> {sin}{heh} |
| // ss -> {sad} |
| // s's -> {sin}{shadda} |
| //##################################################################### |
| // To Do: |
| // finish adding shadda, add sokoon, fix uppercase |
| // make two transliteration tables: one with vowels, one without |
| //##################################################################### |
| // Modifications |
| // Devanagari Transliterator: broken up with consonsants/vowels |
| //##################################################################### |
| // Unicode character name definitions |
| //##################################################################### |
| |
| //consonants |
| "candrabindu=\u0901;" |
| "bindu=\u0902;" |
| "visarga=\u0903;" |
| |
| // w<vowel> represents the stand-alone form |
| "wa=\u0905;" |
| "waa=\u0906;" |
| "wi=\u0907;" |
| "wii=\u0908;" |
| "wu=\u0909;" |
| "wuu=\u090A;" |
| "wr=\u090B;" |
| "wl=\u090C;" |
| "we=\u090F;" |
| "wai=\u0910;" |
| "wo=\u0913;" |
| "wau=\u0914;" |
| |
| "ka=\u0915;" |
| "kha=\u0916;" |
| "ga=\u0917;" |
| "gha=\u0918;" |
| "nga=\u0919;" |
| |
| "ca=\u091A;" |
| "cha=\u091B;" |
| "ja=\u091C;" |
| "jha=\u091D;" |
| "nya=\u091E;" |
| |
| "tta=\u091F;" |
| "ttha=\u0920;" |
| "dda=\u0921;" |
| "ddha=\u0922;" |
| "nna=\u0923;" |
| |
| "ta=\u0924;" |
| "tha=\u0925;" |
| "da=\u0926;" |
| "dha=\u0927;" |
| "na=\u0928;" |
| |
| "pa=\u092A;" |
| "pha=\u092B;" |
| "ba=\u092C;" |
| "bha=\u092D;" |
| "ma=\u092E;" |
| |
| "ya=\u092F;" |
| "ra=\u0930;" |
| "rra=\u0931;" |
| "la=\u0933;" |
| "va=\u0935;" |
| |
| "sha=\u0936;" |
| "ssa=\u0937;" |
| "sa=\u0938;" |
| "ha=\u0939;" |
| |
| // <vowel> represents the dependent form |
| "aa=\u093E;" |
| "i=\u093F;" |
| "ii=\u0940;" |
| "u=\u0941;" |
| "uu=\u0942;" |
| "rh=\u0943;" |
| "lh=\u0944;" |
| "e=\u0947;" |
| "ai=\u0948;" |
| "o=\u094B;" |
| "au=\u094C;" |
| |
| "virama=\u094D;" |
| |
| "wrr=\u0960;" |
| "rrh=\u0962;" |
| |
| "danda=\u0964;" |
| "doubleDanda=\u0965;" |
| "depVowelAbove=[\u093E-\u0940\u0945-\u094C];" |
| "depVowelBelow=[\u0941-\u0944];" |
| // Ech: Double escape U+0000, so UnicodeString doesn't consider it |
| // to be the end of the string. This is only necessary for U+0000 |
| // right now. [liu] |
| "endThing=[{danda}{doubleDanda}\\u0000-\u08FF\u0980-\uFFFF];" |
| |
| "&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}];" |
| "%=[bcdfghjklmnpqrstvwxyz];" |
| |
| //##################################################################### |
| // convert from Latin letters to Native letters |
| //##################################################################### |
| //Hindi>\u092d\u093e\u0930\u0924--\u0020\u0926\u0947\u0936\u0020\u092c\u0928\u094d\u0927\u0941\u002e |
| |
| // special forms with no good conversion |
| |
| "mm>{bindu};" |
| "x>{visarga};" |
| |
| // convert to independent forms at start of word or syllable: |
| // e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai}) |
| // Moved up [LIU] |
| |
| "aa>{waa};" |
| "ai>{wai};" |
| "au>{wau};" |
| "ii>{wii};" |
| "i>{wi};" |
| "uu>{wuu};" |
| "u>{wu};" |
| "rrh>{wrr};" |
| "rh>{wr};" |
| "lh>{wl};" |
| "e>{we};" |
| "o>{wo};" |
| "a>{wa};" |
| |
| // normal consonants |
| |
| "kh>{kha}|{virama};" |
| "k>{ka}|{virama};" |
| "q>{ka}|{virama};" |
| "gh>{gha}|{virama};" |
| "g>{ga}|{virama};" |
| "ng>{nga}|{virama};" |
| "ch>{cha}|{virama};" |
| "c>{ca}|{virama};" |
| "jh>{jha}|{virama};" |
| "j>{ja}|{virama};" |
| "ny>{nya}|{virama};" |
| "tth>{ttha}|{virama};" |
| "tt>{tta}|{virama};" |
| "ddh>{ddha}|{virama};" |
| "dd>{dda}|{virama};" |
| "nn>{nna}|{virama};" |
| "th>{tha}|{virama};" |
| "t>{ta}|{virama};" |
| "dh>{dha}|{virama};" |
| "d>{da}|{virama};" |
| "n>{na}|{virama};" |
| "ph>{pha}|{virama};" |
| "p>{pa}|{virama};" |
| "bh>{bha}|{virama};" |
| "b>{ba}|{virama};" |
| "m>{ma}|{virama};" |
| "y>{ya}|{virama};" |
| "r>{ra}|{virama};" |
| "l>{la}|{virama};" |
| "v>{va}|{virama};" |
| "f>{va}|{virama};" |
| "w>{va}|{virama};" |
| "sh>{sha}|{virama};" |
| "ss>{ssa}|{virama};" |
| "s>{sa}|{virama};" |
| "z>{sa}|{virama};" |
| "h>{ha}|{virama};" |
| |
| ".>{danda};" |
| "{danda}.>{doubleDanda};" |
| "{depVowelAbove})~>{bindu};" |
| "{depVowelBelow})~>{candrabindu};" |
| |
| // convert to dependent forms after consonant with no vowel: |
| // e.g. kai -> {ka}{virama}ai -> {ka}{ai} |
| |
| "{virama}aa>{aa};" |
| "{virama}ai>{ai};" |
| "{virama}au>{au};" |
| "{virama}ii>{ii};" |
| "{virama}i>{i};" |
| "{virama}uu>{uu};" |
| "{virama}u>{u};" |
| "{virama}rrh>{rrh};" |
| "{virama}rh>{rh};" |
| "{virama}lh>{lh};" |
| "{virama}e>{e};" |
| "{virama}o>{o};" |
| "{virama}a>;" |
| |
| // otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai} |
| |
| "{virama}''aa>{waa};" |
| "{virama}''ai>{wai};" |
| "{virama}''au>{wau};" |
| "{virama}''ii>{wii};" |
| "{virama}''i>{wi};" |
| "{virama}''uu>{wuu};" |
| "{virama}''u>{wu};" |
| "{virama}''rrh>{wrr};" |
| "{virama}''rh>{wr};" |
| "{virama}''lh>{wl};" |
| "{virama}''e>{we};" |
| "{virama}''o>{wo};" |
| "{virama}''a>{wa};" |
| |
| "{virama}({endThing}>;" |
| |
| // convert any left-over apostrophes used for separation |
| |
| "''>;" |
| |
| //##################################################################### |
| // convert from Native letters to Latin letters |
| //##################################################################### |
| |
| // special forms with no good conversion |
| |
| "mm<{bindu};" |
| "x<{visarga};" |
| |
| // normal consonants |
| |
| "kh<{kha}(&;" |
| "kha<{kha};" |
| "k''<{ka}{virama}({ha};" |
| "k<{ka}(&;" |
| "ka<{ka};" |
| "gh<{gha}(&;" |
| "gha<{gha};" |
| "g''<{ga}{virama}({ha};" |
| "g<{ga}(&;" |
| "ga<{ga};" |
| "ng<{nga}(&;" |
| "nga<{nga};" |
| "ch<{cha}(&;" |
| "cha<{cha};" |
| "c''<{ca}{virama}({ha};" |
| "c<{ca}(&;" |
| "ca<{ca};" |
| "jh<{jha}(&;" |
| "jha<{jha};" |
| "j''<{ja}{virama}({ha};" |
| "j<{ja}(&;" |
| "ja<{ja};" |
| "ny<{nya}(&;" |
| "nya<{nya};" |
| "tth<{ttha}(&;" |
| "ttha<{ttha};" |
| "tt''<{tta}{virama}({ha};" |
| "tt<{tta}(&;" |
| "tta<{tta};" |
| "ddh<{ddha}(&;" |
| "ddha<{ddha};" |
| "dd''<{dda}(&{ha};" |
| "dd<{dda}(&;" |
| "dda<{dda};" |
| "dh<{dha}(&;" |
| "dha<{dha};" |
| "d''<{da}{virama}({ha};" |
| "d''<{da}{virama}({ddha};" |
| "d''<{da}{virama}({dda};" |
| "d''<{da}{virama}({dha};" |
| "d''<{da}{virama}({da};" |
| "d<{da}(&;" |
| "da<{da};" |
| "th<{tha}(&;" |
| "tha<{tha};" |
| "t''<{ta}{virama}({ha};" |
| "t''<{ta}{virama}({ttha};" |
| "t''<{ta}{virama}({tta};" |
| "t''<{ta}{virama}({tha};" |
| "t''<{ta}{virama}({ta};" |
| "t<{ta}(&;" |
| "ta<{ta};" |
| "n''<{na}{virama}({ga};" |
| "n''<{na}{virama}({ya};" |
| "n<{na}(&;" |
| "na<{na};" |
| "ph<{pha}(&;" |
| "pha<{pha};" |
| "p''<{pa}{virama}({ha};" |
| "p<{pa}(&;" |
| "pa<{pa};" |
| "bh<{bha}(&;" |
| "bha<{bha};" |
| "b''<{ba}{virama}({ha};" |
| "b<{ba}(&;" |
| "ba<{ba};" |
| "m''<{ma}{virama}({ma};" |
| "m''<{ma}{virama}({bindu};" |
| "m<{ma}(&;" |
| "ma<{ma};" |
| "y<{ya}(&;" |
| "ya<{ya};" |
| "r''<{ra}{virama}({ha};" |
| "r<{ra}(&;" |
| "ra<{ra};" |
| "l''<{la}{virama}({ha};" |
| "l<{la}(&;" |
| "la<{la};" |
| "v<{va}(&;" |
| "va<{va};" |
| "sh<{sha}(&;" |
| "sha<{sha};" |
| "ss<{ssa}(&;" |
| "ssa<{ssa};" |
| "s''<{sa}{virama}({ha};" |
| "s''<{sa}{virama}({sha};" |
| "s''<{sa}{virama}({ssa};" |
| "s''<{sa}{virama}({sa};" |
| "s<{sa}(&;" |
| "sa<{sa};" |
| "h<{ha}(&;" |
| "ha<{ha};" |
| |
| // dependent vowels (should never occur except following consonants) |
| |
| "aa<{aa};" |
| "ai<{ai};" |
| "au<{au};" |
| "ii<{ii};" |
| "i<{i};" |
| "uu<{uu};" |
| "u<{u};" |
| "rrh<{rrh};" |
| "rh<{rh};" |
| "lh<{lh};" |
| "e<{e};" |
| "o<{o};" |
| |
| // independent vowels (when following consonants) |
| |
| "''aa<a){waa};" |
| "''aa<%){waa};" |
| "''ai<a){wai};" |
| "''ai<%){wai};" |
| "''au<a){wau};" |
| "''au<%){wau};" |
| "''ii<a){wii};" |
| "''ii<%){wii};" |
| "''i<a){wi};" |
| "''i<%){wi};" |
| "''uu<a){wuu};" |
| "''uu<%){wuu};" |
| "''u<a){wu};" |
| "''u<%){wu};" |
| "''rrh<%){wrr};" |
| "''rh<%){wr};" |
| "''lh<%){wl};" |
| "''e<%){we};" |
| "''o<%){wo};" |
| "''a<a){wa};" |
| "''a<%){wa};" |
| |
| |
| // independent vowels (otherwise) |
| |
| "aa<{waa};" |
| "ai<{wai};" |
| "au<{wau};" |
| "ii<{wii};" |
| "i<{wi};" |
| "uu<{wuu};" |
| "u<{wu};" |
| "rrh<{wrr};" |
| "rh<{wr};" |
| "lh<{wl};" |
| "e<{we};" |
| "o<{wo};" |
| "a<{wa};" |
| |
| // blow away any remaining viramas |
| |
| "<{virama};" |
| } |
| } |