| #-------------------------------------------------------------------- |
| # Copyright (c) 1999-2004, International Business Machines |
| # Corporation and others. All Rights Reserved. |
| #-------------------------------------------------------------------- |
| |
| # Thai-Latin |
| # This set of rules follows ISO 11940 |
| # see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf |
| # except that that does not mention an implicit vowel, so we use ọ |
| # |
| # The transcription is fairly ugly, so we ought to also do the UNGEGN version |
| # see: http://www.eki.ee/wgrs/rom1_th.pdf |
| # and probably make that the main variant. |
| |
| # Note: this is an internal file. The NFD/NFC is handled externally, in the index |
| # The insertion of spaces between words, the reversal of the vowels |
| # and the conversion of space to semicolon are done *outside* of these rules. |
| # So as far as these rules are concerned, the vowels are in logical order! |
| |
| # insert implicit vowel (and remove it going the other way) |
| # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically |
| #$consonant = [ก-ฮ]; |
| #$vowel = [ะ-ฺเ-ไ็]; |
| |
| #{ ( $consonant ) } [^$vowel ] > | $1 ; |
| # > ọ ; |
| # < ọ ; |
| |
| $notAbove = [^\p{ccc=0}\p{ccc=above}] ; |
| $notBelow = [^\p{ccc=0}\p{ccc=below}] ; |
| |
| # Consonants |
| # Warning: the 'h's need to be handled carefully! |
| # What we really want to say is the following, but we can't |
| # $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ; |
| |
| # Since the only accents we care about that could cause problems are free-standing accents below, we use instead: |
| $freeStandingBelow = [\u0325 ]; |
| $hAccent = [ ̄ ̣]; |
| $notHAccent0 = [^$freeStandingBelow$hAccent]; |
| $notHAccent1 = $freeStandingBelow [^$hAccent]; |
| |
| ห > h̄ ; # THAI CHARACTER HO HIP |
| ห | $1 < h ($notAbove*) ̄; # backward case, account for reordering |
| ฮ <> ḥ ; # THAI CHARACTER HO NOKHUK |
| |
| ข <> k̄h ; # THAI CHARACTER KHO KHAI |
| ฃ <> ḳ̄h ; # THAI CHARACTER KHO KHUAT |
| ฅ <> kʹh ; # THAI CHARACTER KHO KHON |
| ฆ <> ḳh ; # THAI CHARACTER KHO RAKHANG |
| ค < kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI |
| ค <> kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI |
| ก <> k ; # THAI CHARACTER KO KAI |
| |
| ภ <> p̣h ; # THAI CHARACTER PHO SAMPHAO |
| ผ <> p̄h ; # THAI CHARACTER PHO PHUNG |
| พ < ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN |
| พ <> ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN |
| ป <> p ; # THAI CHARACTER PO PLA |
| |
| ฉ <> c̄h ; # THAI CHARACTER CHO CHING |
| ฌ <> c̣h ; # THAI CHARACTER CHO CHOE |
| ช < ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG |
| ช <> ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG |
| จ <> c ; # THAI CHARACTER CHO CHAN |
| |
| ฐ <> ṭ̄h ; # THAI CHARACTER THO THAN |
| ฑ <> ṯh ; # THAI CHARACTER THO NANGMONTHO |
| ฒ <> tʹh ; # THAI CHARACTER THO PHUTHAO |
| ถ <> t̄h ; # THAI CHARACTER THO THUNG |
| ธ <> ṭh ; # THAI CHARACTER THO THONG |
| ท < th } $notHAccent1 ; # THAI CHARACTER THO THAHAN |
| ท <> th } $notHAccent0 ; # THAI CHARACTER THO THAHAN |
| #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. |
| ฏ <> t̩ ; # THAI CHARACTER TO PATAK |
| ต <> t ; # THAI CHARACTER TO TAO |
| |
| # since there is no singleton g (generated), don't worry about that. |
| ง <> ng ; # THAI CHARACTER NGO NGU |
| ณ <> ṇ ; # THAI CHARACTER NO NEN |
| น <> n ; # THAI CHARACTER NO NU |
| |
| ญ <> ỵ ; # THAI CHARACTER YO YING |
| ฎ <> ḍ ; # THAI CHARACTER DO CHADA |
| ด <> d ; # THAI CHARACTER DO DEK |
| |
| บ <> b ; # THAI CHARACTER BO BAIMAI |
| ฝ <> f̄ ; # THAI CHARACTER FO FA |
| ฝ | $1 < f ($notAbove*) ̄; # backward case, account for reordering |
| |
| ม <> m ; # THAI CHARACTER MO MA |
| ย <> y ; # THAI CHARACTER YO YAK |
| ร <> r ; # THAI CHARACTER RO RUA |
| ฤ <> v ; # THAI CHARACTER RU |
| ฦ <> ł ; # THAI CHARACTER LU |
| ว <> w ; # THAI CHARACTER WO WAEN |
| |
| ศ <> ṣ̄ ; # THAI CHARACTER SO SALA*** |
| ศ | $1 < s ̣ ($notAbove*) ̄; # backward case, account for reordering |
| ษ <> s̄ʹ ; # THAI CHARACTER SO RUSI |
| ส > s̄ ; # THAI CHARACTER SO SUA*** |
| ส | $1 < s ($notAbove*) ̄; # backward case, account for reordering |
| |
| ฬ <> ḷ ; # THAI CHARACTER LO CHULA |
| ล <> l ; # THAI CHARACTER LO LING |
| ฟ <> f ; # THAI CHARACTER FO FAN |
| |
| อ <> x ; # THAI CHARACTER O ANG |
| ซ <> s ; # THAI CHARACTER SO SO |
| |
| # vowels |
| |
| ั <> ạ ; # THAI CHARACTER MAI HAN-AKAT |
| |
| า > ā ; # THAI CHARACTER SARA AA |
| า | $1 < a ($notAbove*) ̄; # backward case, account for reordering |
| |
| # We deviate from ISO for SARA AM for disambiguation |
| ำ > a ̉; # THAI CHARACTER SARA AM |
| ำ | $1 < a ($notAbove*) ̉ ; # backward case, account for reordering |
| |
| ะ <> a ; # THAI CHARACTER SARA A |
| ี <> ī ; # THAI CHARACTER SARA II |
| ี | $1 < i ($notAbove*) ̄ ; # backward case, account for reordering |
| |
| ื <> ụ̄ ; # THAI CHARACTER SARA UEE |
| ื | $1 < u ̣ ($notAbove*) ̄ ; # backward case, account for reordering |
| |
| ึ <> ụ ; # THAI CHARACTER SARA UE |
| ู <> ū ; # THAI CHARACTER SARA UU |
| ู | $1 < u ($notAbove*) ̄ ; # backward case, account for reordering |
| |
| ุ <> u ; # THAI CHARACTER SARA U |
| |
| ฯ <> ‡ ; # THAI CHARACTER PAIYANNOI |
| |
| # ฿ <> XXX ; # THAI CURRENCY SYMBOL BAHT |
| |
| เ <> e ; # THAI CHARACTER SARA E |
| แ <> æ ; # THAI CHARACTER SARA AE |
| โ <> o ; # THAI CHARACTER SARA O |
| ใ <> ı ; # THAI CHARACTER SARA AI MAIMUAN |
| ไ <> ị ; # THAI CHARACTER SARA AI MAIMALAI |
| ๅ <> ɨ ; # THAI CHARACTER LAKKHANGYAO |
| ็ <> ̆ ; # THAI CHARACTER MAITAIKHU |
| ่ <> ̀ ; # THAI CHARACTER MAI EK |
| ้ <> ̂ ; # THAI CHARACTER MAI THO |
| ๊ <> ́ ; # THAI CHARACTER MAI TRI |
| ๋ <> ̌ ; # THAI CHARACTER MAI CHATTAWA |
| ์ <> ̒ ; # THAI CHARACTER THANTHAKHAT |
| ๎ <> '~' ; # THAI CHARACTER YAMAKKAN |
| |
| # We deviate from ISO for disambiguation |
| ํ <> ̊ ; # THAI CHARACTER NIKHAHIT |
| |
| ๏ <> § ; # THAI CHARACTER FONGMAN |
| |
| ๐ <> 0 ; # THAI DIGIT ZERO |
| ๑ <> 1 ; # THAI DIGIT ONE |
| ๒ <> 2 ; # THAI DIGIT TWO |
| ๓ <> 3 ; # THAI DIGIT THREE |
| ๔ <> 4 ; # THAI DIGIT FOUR |
| ๕ <> 5 ; # THAI DIGIT FIVE |
| ๖ <> 6 ; # THAI DIGIT SIX |
| ๗ <> 7 ; # THAI DIGIT SEVEN |
| ๘ <> 8 ; # THAI DIGIT EIGHT |
| ๙ <> 9 ; # THAI DIGIT NINE |
| |
| ๚ <> '||' ; # THAI CHARACTER ANGKHANKHU |
| |
| ๛ <> » ; # THAI CHARACTER KHOMUT |
| ๆ <> « ; # THAI CHARACTER MAIYAMOK |
| |
| # moved down to make shorter first |
| #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. |
| ฺ <> ˌ ; # THAI CHARACTER PHINTHU |
| ิ <> i ; # THAI CHARACTER SARA I |
| |
| # fallbacks |
| |
| | k < g ; |
| | k < h ; |
| | c < j ; |
| | k < q ; |
| | s < z ; |
| |
| :: (lower); |