| # © 2016 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| # Generated using tools/cldr/cldr-to-icu/build-icu-data.xml |
| # |
| # File: my_my_FONIPA.txt |
| # Generated from CLDR |
| # |
| |
| # Pronunciation rules for Burmese. |
| # |
| # The following rules are lexical and heuristic: lexical in the sense |
| # that they generate phoneme strings which may further undergo |
| # post-lexical phonological processes, in particular voicing, to |
| # result in actual surface forms; heuristic in the sense that they try |
| # to resolve ambiguities, especially around reduced vowels, in a |
| # systematic way that may be incorrect in many situations. Vowel |
| # reduction depends on many factors, such as morphemic structure, |
| # which are not available here. |
| # |
| # Definitions |
| # |
| # Dependent vowel signs |
| $vs_AA = \u102B; |
| $vs_aa = \u102C; |
| $vs_i = \u102D; |
| $vs_ii = \u102E; |
| $vs_u = \u102F; |
| $vs_uu = \u1030; |
| $vs_e = \u1031; |
| $vs_ai = \u1032; |
| # Various signs |
| $anusvara = \u1036; |
| $visarga = \u1038; |
| $virama = \u1039; |
| $asat = \u103A; |
| # Dependent (medial) consonant signs |
| $med_y = \u103B; |
| $med_r = \u103C; |
| $med_w = \u103D; |
| $med_h = \u103E; |
| # Independent letters and letter-like punctuation symbols |
| $independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055]; |
| $creaky = \u0330; |
| $high = \u0301; |
| $low = \u0300; |
| $coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused |
| # |
| # Preprocessing |
| # |
| ::NFC; |
| # Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical. |
| $vs_AA → $vs_aa; |
| # Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A. |
| # Hmm, what would happen if the syllable ending in kinzi had non-low tone? |
| င\u103A $virama → င\u103A; |
| # Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT. |
| $virama → $asat; |
| # Unstack U+103F GREAT SA. |
| ဿ → သ\u103Aသ; |
| # Insert a syllable boundary marker /./ before every independent letter. |
| ::Null; |
| [^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.; |
| # Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else. |
| ::Null; |
| ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky; |
| ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə; |
| # Allow for additional coda consonants. |
| # |
| # This only covers a few of the cases in which full coda consonants |
| # can appear in loanwords. The general situation is somewhat rare and |
| # is more easily dealt with in a formalism that can impose structural |
| # constraints on syllables more easily. |
| ::Null; |
| $asat ($visarga)? [\u1000-\u102A] { $asat → ; |
| # Deal with ၎င\u103Aး early. |
| ၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ; |
| # |
| # Rhymes |
| # |
| ::Null; |
| က\u103A → ɛʔ; |
| ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/ |
| င\u1037\u103A → ɪ $creaky ɴ; |
| င\u103Aး → ɪ $high ɴ; |
| င\u103A → ɪ $low ɴ; |
| စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/ |
| ဉ\u1037\u103A → ɪ $creaky ɴ; |
| ဉ\u103Aး → ɪ $high ɴ; |
| ဉ\u103A → ɪ $low ɴ; |
| ည\u1037\u103A → ɛ $creaky; |
| ည\u103Aး → ɛ $high; |
| ည\u103A → ɛ $low; |
| ဏ\u1037\u103A → a $creaky ɴ; |
| ဏ\u103Aး → a $high ɴ; |
| ဏ\u103A → a $low ɴ; |
| တ\u103A → aʔ; |
| န\u1037\u103A → a $creaky ɴ; |
| န\u103Aး → a $high ɴ; |
| န\u103A → a $low ɴ; |
| ပ\u103A → aʔ; |
| မ\u1037\u103A → a $creaky ɴ; |
| မ\u103Aး → a $high ɴ; |
| မ\u103A → a $low ɴ; |
| ယ\u1037\u103A → ɛ $creaky; |
| ယ\u103Aး → ɛ $high; |
| ယ\u103A → ɛ $low; |
| သ\u103A → aʔ; |
| $vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ; |
| $vs_aa ဉ\u103Aး → ɪ $high ɴ; |
| $vs_aa ဉ\u103A → ɪ $low ɴ; |
| $vs_aa တ\u103A → aʔ; |
| $vs_aa ဏ\u1037\u103A → a $creaky ɴ; |
| $vs_aa ဏ\u103Aး → a $high ɴ; |
| $vs_aa ဏ\u103A → a $low ɴ; |
| $vs_aa န\u1037\u103A → a $creaky ɴ; |
| $vs_aa န\u103Aး → a $high ɴ; |
| $vs_aa န\u103A → a $low ɴ; |
| $vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell) |
| $vs_aa ယ\u1037\u103A → ɛ $creaky; |
| $vs_aa ယ\u103Aး → ɛ $high; |
| $vs_aa ယ\u103A → ɛ $low; |
| $vs_aa \u1037 → a $creaky; # redundant creaky tone |
| $vs_aa း → a $high; |
| $vs_aa → a $low; |
| $vs_i က\u103A → eɪ\u032Fʔ; |
| $vs_i စ\u103A → eɪ\u032Fʔ; |
| $vs_i တ\u103A → eɪ\u032Fʔ; |
| $vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ; |
| $vs_i န\u103Aး → e $high ɪ\u032Fɴ; |
| $vs_i န\u103A → e $low ɪ\u032Fɴ; |
| $vs_i ပ\u103A → eɪ\u032Fʔ; |
| $vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ; |
| $vs_i မ\u103Aး → e $high ɪ\u032Fɴ; |
| $vs_i မ\u103A → e $low ɪ\u032Fɴ; |
| $vs_i $vs_u က\u103A → aɪ\u032Fʔ; |
| $vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ; |
| $vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ; |
| $vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ; |
| $vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ; |
| $vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ; |
| $vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ; |
| $vs_i $vs_u ယ\u1037\u103A → o $creaky; |
| $vs_i $vs_u ယ\u103Aး → o $high; |
| $vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/ |
| $vs_i $vs_u \u1037 → o $creaky; |
| $vs_i $vs_u း → o $high; |
| $vs_i $vs_u → o $low; |
| $vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ; |
| $vs_i $anusvara း → e $high ɪ\u032Fɴ; |
| $vs_i $anusvara → e $low ɪ\u032Fɴ; |
| $vs_i → i $creaky; |
| $vs_ii \u1037 → i $creaky; # this does not usually occur |
| $vs_ii း → i $high; |
| $vs_ii → i $low; |
| $vs_u က\u103A → oʊ\u032Fʔ; |
| $vs_u ဂ\u103A → oʊ\u032Fʔ; |
| $vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ; |
| $vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ; |
| $vs_u ဏ\u103A → o $low ʊ\u032Fɴ; |
| $vs_u တ\u103A → oʊ\u032Fʔ; |
| $vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ; |
| $vs_u န\u103Aး → o $high ʊ\u032Fɴ; |
| $vs_u န\u103A → o $low ʊ\u032Fɴ; |
| $vs_u ပ\u103A → oʊ\u032Fʔ; |
| $vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ; |
| $vs_u မ\u103Aး → o $high ʊ\u032Fɴ; |
| $vs_u မ\u103A → o $low ʊ\u032Fɴ; |
| $vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ; |
| $vs_u $anusvara း → o $high ʊ\u032Fɴ; |
| $vs_u $anusvara → o $low ʊ\u032Fɴ; |
| $vs_u → u $creaky; |
| $vs_uu \u1037 → u $creaky; # this does not usually occur |
| $vs_uu း → u $high; |
| $vs_uu → u $low; |
| $vs_e တ\u103A → ɪʔ; |
| $vs_e $vs_aa က\u103A → aʊ\u032Fʔ; |
| $vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ; |
| $vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ; |
| $vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ; |
| $vs_e $vs_aa \u1037 → ɔ $creaky; |
| $vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur |
| $vs_e $vs_aa \u103A → ɔ $low; |
| $vs_e $vs_aa → ɔ $high; |
| $vs_e \u1037 → e $creaky; |
| $vs_e း → e $high; |
| $vs_e → e $low; |
| $vs_ai \u1037 → ɛ $creaky; |
| $vs_ai း → ɛ $high; # redundant high tone; this does not usually occur |
| $vs_ai → ɛ $high; |
| $anusvara \u1037 → a $creaky ɴ; |
| $anusvara း → a $high ɴ; |
| $anusvara → a $low ɴ; |
| $med_w တ\u103A → ʊʔ; |
| $med_w န\u1037\u103A → ʊ $creaky ɴ; |
| $med_w န\u103Aး → ʊ $high ɴ; |
| $med_w န\u103A → ʊ $low ɴ; |
| $med_w ပ\u103A → ʊʔ; |
| $med_w မ\u1037\u103A → ʊ $creaky ɴ; |
| $med_w မ\u103Aး → ʊ $high ɴ; |
| $med_w မ\u103A → ʊ $low ɴ; |
| # |
| # Medials |
| # |
| ::Null; |
| # Palatalization of the velar stops before MEDIAL YA and MEDIAL RA: |
| # velar + /j/ ==> modern palatals. |
| ကျ → t\u0361ɕ; |
| ချ → t\u0361ɕʰ; |
| ဂျ → d\u0361ʑ; |
| ဃျ → d\u0361ʑ; |
| ကြ → t\u0361ɕ; |
| ခြ → t\u0361ɕʰ; |
| ဂြ → d\u0361ʑ; |
| ဃြ → d\u0361ʑ; |
| # Remove redundant MEDIAL YA and MEDIAL RA after initial YA. |
| ယ { [$med_y $med_r] → ; |
| # Reorder the medials so that U+103E SIGN MEDIAL HA comes before any |
| # other medials. |
| # First, push U+103E MEDIAL HA before U+103D MEDIAL WA. |
| \u103D \u103E → \u103E \u103D; |
| ::Null; |
| # Now MEDIAL WA comes last. |
| # Produce the palatal ʃ from (SA|LA)+YA+HA. |
| သျ\u103E → ʃ; |
| လျ\u103E → ʃ; |
| # Second, push U+103E MEDIAL HA before U+103C MEDIAL RA. |
| \u103C \u103E → \u103E \u103C; |
| ::Null; |
| # Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA. |
| \u103B \u103E → \u103E \u103B; |
| ::Null; |
| # Consume MEDIAL HA and apply devoicing. |
| င\u103E → ŋ\u030A; |
| ဉ\u103E → ɲ\u0325; |
| ည\u103E → ɲ\u0325; |
| ဏ\u103E → n\u0325; |
| န\u103E → n\u0325; |
| မ\u103E → m\u0325; |
| ယ\u103E → ʃ; |
| ရ\u103E → ʃ; |
| လ\u103E → l\u0325; |
| ဝ\u103E → w\u0325; |
| ဠ\u103E → l\u0325; |
| # Drop any remaining U+103E MEDIAL HA. |
| \u103E → ; |
| # Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and |
| # U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this |
| \u103B } \u103D → ; |
| \u103C } \u103D → ; |
| \u103B → j; |
| \u103C → j; |
| \u103D → w; |
| # |
| # Initials |
| # |
| # Velars |
| က → k; |
| ခ → kʰ; |
| ဂ → ɡ; |
| ဃ → ɡ; |
| င → ŋ; |
| # Historic palatals |
| စ → s; |
| ဆ → sʰ; |
| ဇ → z; |
| ဈ → z; |
| ဉ → ɲ; |
| ည → ɲ; |
| # Alveolars |
| ဋ → t; |
| ဌ → tʰ; |
| ဍ → d; |
| ဎ → d; |
| ဏ → n; |
| # Historic dentals ==> alveolars |
| တ → t; |
| ထ → tʰ; |
| ဒ → d; |
| ဓ → d; |
| န → n; |
| # Labials |
| ပ → p; |
| ဖ → pʰ; |
| ဗ → b; |
| ဘ → b; |
| မ → m; |
| # Other letters |
| ယ → j; |
| ရ → j; # historic /r/ |
| လ\u103A → ; # final, typically not pronounced in native words |
| လ → l; |
| ဝ → w; |
| သ → θ; # historic /s/ ==> modern dental |
| ဟ → h; |
| ဠ → l; |
| အ → ʔ; |
| # Independent vowels |
| ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur |
| ဣး → ʔí; # this does not usually occur |
| ဣ → ʔḭ; |
| ဤ\u1037 → ʔḭ; # this does not usually occur |
| ဤး → ʔí; # this does not usually occur |
| ဤ → ʔì; |
| ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur |
| ဥး → ʔú; # this does not usually occur |
| ဥ → ʔṵ; |
| ဦ\u1037 → ʔṵ; # this does not usually occur |
| ဦး → ʔú; |
| ဦ → ʔù; |
| ဧ\u1037 → ʔḛ; # this does not usually occur |
| ဧး → ʔé; |
| ဧ → ʔè; |
| ဩ\u1037 → ʔɔ\u0330; # this does not usually occur |
| ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur |
| ဩ → ʔɔ\u0301; |
| ဪ\u1037 → ʔɔ\u0330; # this does not usually occur |
| ဪး → ʔɔ\u0301; # this does not usually occur |
| ဪ → ʔɔ\u0300; |
| # Various signs |
| ၌ → n\u0325aɪ\u032Fʔ; |
| ၍ → jwḛ; |
| # ၎င\u103Aး was handled earlier. |
| ၏ → ʔḭ; |
| # |
| # Postprocessing |
| # |
| # Delete any remaining U+103A ASAT. |
| $asat → ; |
| # Delete zero-width space, non-joiner, joiner. |
| [\u200B-\u200D] → ; |
| ::NFC; |
| |