blob: 7713d6eb2d7d42f6e142e539b08fc51d199ff57c [file] [log] [blame]
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html#License
#
# File: my_my_FONIPA.txt
# Generated from CLDR
#
# Pronunciation rules for Burmese.
#
# The following rules are lexical and heuristic: lexical in the sense
# that they generate phoneme strings which may further undergo
# post-lexical phonological processes, in particular voicing, to
# result in actual surface forms; heuristic in the sense that they try
# to resolve ambiguities, especially around reduced vowels, in a
# systematic way that may be incorrect in many situations. Vowel
# reduction depends on many factors, such as morphemic structure,
# which are not available here.
#
# Definitions
#
# Dependent vowel signs
$vs_AA = \u102B;
$vs_aa = \u102C;
$vs_i = \u102D;
$vs_ii = \u102E;
$vs_u = \u102F;
$vs_uu = \u1030;
$vs_e = \u1031;
$vs_ai = \u1032;
# Various signs
$anusvara = \u1036;
$visarga = \u1038;
$virama = \u1039;
$asat = \u103A;
# Dependent (medial) consonant signs
$med_y = \u103B;
$med_r = \u103C;
$med_w = \u103D;
$med_h = \u103E;
# Independent letters and letter-like punctuation symbols
$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
$creaky = \u0330;
$high = \u0301;
$low = \u0300;
$coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused
#
# Preprocessing
#
::NFC;
# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
$vs_AA $vs_aa;
# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
# Hmm, what would happen if the syllable ending in kinzi had non-low tone?
\u103A $virama \u103A;
# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
$virama $asat;
# Unstack U+103F GREAT SA.
\u103Aသ;
# Insert a syllable boundary marker /./ before every independent letter.
::Null;
[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] \.;
# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
::Null;
([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] $1 a $creaky;
([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. $1 ə;
# Allow for additional coda consonants.
#
# This only covers a few of the cases in which full coda consonants
# can appear in loanwords. The general situation is somewhat rare and
# is more easily dealt with in a formalism that can impose structural
# constraints on syllables more easily.
::Null;
$asat ($visarga)? [\u1000-\u102A] { $asat ;
# Deal with ၎င\u103Aး early.
၎င\u103A lə\.ɡa $high ʊ\u032Fɴ;
#
# Rhymes
#
::Null;
က\u103A ɛʔ;
\u103A ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
\u1037\u103A ɪ $creaky ɴ;
\u103A ɪ $high ɴ;
\u103A ɪ $low ɴ;
\u103A ɪʔ; # maybe sometimes /eɪ\u032Fʔ/
\u1037\u103A ɪ $creaky ɴ;
\u103A ɪ $high ɴ;
\u103A ɪ $low ɴ;
\u1037\u103A ɛ $creaky;
\u103A ɛ $high;
\u103A ɛ $low;
\u1037\u103A a $creaky ɴ;
\u103A a $high ɴ;
\u103A a $low ɴ;
\u103A aʔ;
\u1037\u103A a $creaky ɴ;
\u103A a $high ɴ;
\u103A a $low ɴ;
\u103A aʔ;
\u1037\u103A a $creaky ɴ;
\u103A a $high ɴ;
\u103A a $low ɴ;
\u1037\u103A ɛ $creaky;
\u103A ɛ $high;
\u103A ɛ $low;
\u103A aʔ;
$vs_aa \u1037\u103A ɪ $creaky ɴ;
$vs_aa \u103A ɪ $high ɴ;
$vs_aa \u103A ɪ $low ɴ;
$vs_aa \u103A aʔ;
$vs_aa \u1037\u103A a $creaky ɴ;
$vs_aa \u103A a $high ɴ;
$vs_aa \u103A a $low ɴ;
$vs_aa \u1037\u103A a $creaky ɴ;
$vs_aa \u103A a $high ɴ;
$vs_aa \u103A a $low ɴ;
$vs_aa \u103A aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
$vs_aa \u1037\u103A ɛ $creaky;
$vs_aa \u103A ɛ $high;
$vs_aa \u103A ɛ $low;
$vs_aa \u1037 a $creaky; # redundant creaky tone
$vs_aa a $high;
$vs_aa a $low;
$vs_i က\u103A eɪ\u032Fʔ;
$vs_i \u103A eɪ\u032Fʔ;
$vs_i \u103A eɪ\u032Fʔ;
$vs_i \u1037\u103A e $creaky ɪ\u032Fɴ;
$vs_i \u103A e $high ɪ\u032Fɴ;
$vs_i \u103A e $low ɪ\u032Fɴ;
$vs_i \u103A eɪ\u032Fʔ;
$vs_i \u1037\u103A e $creaky ɪ\u032Fɴ;
$vs_i \u103A e $high ɪ\u032Fɴ;
$vs_i \u103A e $low ɪ\u032Fɴ;
$vs_i $vs_u က\u103A aɪ\u032Fʔ;
$vs_i $vs_u \u1037\u103A a $creaky ɪ\u032Fɴ;
$vs_i $vs_u \u103A a $high ɪ\u032Fɴ;
$vs_i $vs_u \u103A a $low ɪ\u032Fɴ;
$vs_i $vs_u \u1037\u103A a $creaky ɪ\u032Fɴ;
$vs_i $vs_u \u103A a $high ɪ\u032Fɴ;
$vs_i $vs_u \u103A a $low ɪ\u032Fɴ;
$vs_i $vs_u \u1037\u103A o $creaky;
$vs_i $vs_u \u103A o $high;
$vs_i $vs_u \u103A o $low; # in က\u102D\u102Fယ\u103A /kò/
$vs_i $vs_u \u1037 o $creaky;
$vs_i $vs_u o $high;
$vs_i $vs_u o $low;
$vs_i $anusvara \u1037 e $creaky ɪ\u032Fɴ;
$vs_i $anusvara e $high ɪ\u032Fɴ;
$vs_i $anusvara e $low ɪ\u032Fɴ;
$vs_i i $creaky;
$vs_ii \u1037 i $creaky; # this does not usually occur
$vs_ii i $high;
$vs_ii i $low;
$vs_u က\u103A oʊ\u032Fʔ;
$vs_u \u103A oʊ\u032Fʔ;
$vs_u \u1037\u103A o $creaky ʊ\u032Fɴ;
$vs_u \u103A o $high ʊ\u032Fɴ;
$vs_u \u103A o $low ʊ\u032Fɴ;
$vs_u \u103A oʊ\u032Fʔ;
$vs_u \u1037\u103A o $creaky ʊ\u032Fɴ;
$vs_u \u103A o $high ʊ\u032Fɴ;
$vs_u \u103A o $low ʊ\u032Fɴ;
$vs_u \u103A oʊ\u032Fʔ;
$vs_u \u1037\u103A o $creaky ʊ\u032Fɴ;
$vs_u \u103A o $high ʊ\u032Fɴ;
$vs_u \u103A o $low ʊ\u032Fɴ;
$vs_u $anusvara \u1037 o $creaky ʊ\u032Fɴ;
$vs_u $anusvara o $high ʊ\u032Fɴ;
$vs_u $anusvara o $low ʊ\u032Fɴ;
$vs_u u $creaky;
$vs_uu \u1037 u $creaky; # this does not usually occur
$vs_uu u $high;
$vs_uu u $low;
$vs_e \u103A ɪʔ;
$vs_e $vs_aa က\u103A aʊ\u032Fʔ;
$vs_e $vs_aa \u1037\u103A a $creaky ʊ\u032Fɴ;
$vs_e $vs_aa \u103A a $high ʊ\u032Fɴ;
$vs_e $vs_aa \u103A a $low ʊ\u032Fɴ;
$vs_e $vs_aa \u1037 ɔ $creaky;
$vs_e $vs_aa ɔ $high; # redundant high tone; this does not usually occur
$vs_e $vs_aa \u103A ɔ $low;
$vs_e $vs_aa ɔ $high;
$vs_e \u1037 e $creaky;
$vs_e e $high;
$vs_e e $low;
$vs_ai \u1037 ɛ $creaky;
$vs_ai ɛ $high; # redundant high tone; this does not usually occur
$vs_ai ɛ $high;
$anusvara \u1037 a $creaky ɴ;
$anusvara a $high ɴ;
$anusvara a $low ɴ;
$med_w \u103A ʊʔ;
$med_w \u1037\u103A ʊ $creaky ɴ;
$med_w \u103A ʊ $high ɴ;
$med_w \u103A ʊ $low ɴ;
$med_w \u103A ʊʔ;
$med_w \u1037\u103A ʊ $creaky ɴ;
$med_w \u103A ʊ $high ɴ;
$med_w \u103A ʊ $low ɴ;
#
# Medials
#
::Null;
# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
# velar + /j/ ==> modern palatals.
ကျ t\u0361ɕ;
ချ t\u0361ɕʰ;
ဂျ d\u0361ʑ;
ဃျ d\u0361ʑ;
ကြ t\u0361ɕ;
ခြ t\u0361ɕʰ;
ဂြ d\u0361ʑ;
ဃြ d\u0361ʑ;
# Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
{ [$med_y $med_r] ;
# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
# other medials.
# First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
\u103D \u103E \u103E \u103D;
::Null;
# Now MEDIAL WA comes last.
# Produce the palatal ʃ from (SA|LA)+YA+HA.
သျ\u103E ʃ;
လျ\u103E ʃ;
# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
\u103C \u103E \u103E \u103C;
::Null;
# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
\u103B \u103E \u103E \u103B;
::Null;
# Consume MEDIAL HA and apply devoicing.
\u103E ŋ\u030A;
\u103E ɲ\u0325;
\u103E ɲ\u0325;
\u103E n\u0325;
\u103E n\u0325;
\u103E m\u0325;
\u103E ʃ;
\u103E ʃ;
\u103E l\u0325;
\u103E w\u0325;
\u103E l\u0325;
# Drop any remaining U+103E MEDIAL HA.
\u103E ;
# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
# U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this
\u103B } \u103D ;
\u103C } \u103D ;
\u103B j;
\u103C j;
\u103D w;
#
# Initials
#
# Velars
က k;
kʰ;
ɡ;
ɡ;
ŋ;
# Historic palatals
s;
sʰ;
z;
z;
ɲ;
ɲ;
# Alveolars
t;
tʰ;
d;
d;
n;
# Historic dentals ==> alveolars
t;
tʰ;
d;
d;
n;
# Labials
p;
pʰ;
b;
b;
m;
# Other letters
j;
j; # historic /r/
\u103A ; # final, typically not pronounced in native words
l;
w;
θ; # historic /s/ ==> modern dental
h;
l;
ʔ;
# Independent vowels
\u1037 ʔḭ; # redundant creaky tone; this does not usually occur
ဣး ʔí; # this does not usually occur
ʔḭ;
\u1037 ʔḭ; # this does not usually occur
ဤး ʔí; # this does not usually occur
ʔì;
\u1037 ʔṵ; # redundant creaky tone; this does not usually occur
ဥး ʔú; # this does not usually occur
ʔṵ;
\u1037 ʔṵ; # this does not usually occur
ဦး ʔú;
ʔù;
\u1037 ʔḛ; # this does not usually occur
ဧး ʔé;
ʔè;
\u1037 ʔɔ\u0330; # this does not usually occur
ဩး ʔɔ\u0301; # redundant high tone; this does not usually occur
ʔɔ\u0301;
\u1037 ʔɔ\u0330; # this does not usually occur
ဪး ʔɔ\u0301; # this does not usually occur
ʔɔ\u0300;
# Various signs
n\u0325aɪ\u032Fʔ;
jwḛ;
# ၎င\u103Aး was handled earlier.
ʔḭ;
#
# Postprocessing
#
# Delete any remaining U+103A ASAT.
$asat ;
# Delete zero-width space, non-joiner, joiner.
[\u200B-\u200D] ;
::NFC;