blob: b1c6c8ac28fb8d159b5618cc0c4e86d4c5e455a5 [file] [log] [blame]
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html#License
#
# File: si_si_FONIPA.txt
# Generated from CLDR
#
# Sinhala pronunciation rules
#
# Output
# k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f
# ə əː a aː æ æː i iː u uː e eː o oː
#
# References
# [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage:
# Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis.
# Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions,
# pages 890–897. http://www.aclweb.org/anthology/P06-2114
# Simplify ya + yansaya to plain ya after a consonant.
[\u0D9A-\u0DC6] \u0DCA (\u200D)? { \u0DCA‍ය ය;
# Delete ZWNJ and ZWJ to simplify further processing.
\u200C ;
\u200D ;
# Insert a schwa after every consonant that is not followed by a dependent vowel
# or virama.
::Null;
([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] $1 ə;
# Pronunciation rules proper.
::Null;
# fප is an alternative spelling of ෆ.
# This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield)
# [see http://bradshawofthefuture.blogspot.com/2013/02/f.html].
[Ff]ප f;
# zස is seemingly the only way to unambiguously indicate a voiced /z/ sound.
# This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease)
# [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය]
# or in zස\u0DD3බ\u0DCA‍රා (zebra) [see https://si.wikipedia.org/wiki/‍zස\u0DD3බ\u0DCA‍රා].
[Zz]ස z;
ŋ;
o ŋ; # common substitution for anusvaraya
([\u0D9A-\u0DC6]) | $1 \u0DCA $1; # TODO: check which consonants geminate
h;
a;
aː;
æ;
æː;
i;
iː;
u;
uː;
ri;
ruː;
ilu;
iluː;
e;
eː;
aj;
o;
oː;
aw; # TODO: check if this is correct
k;
k;
ɡ;
ɡ;
ŋ;
ᵑɡ;
c;
c;
ɟ;
ɟ;
ɲ;
kɲ; # TODO: double-check
ɟ;
ʈ;
ʈ;
ɖ;
ɖ;
n;
ⁿɖ;
t;
t;
d;
d;
n;
d;
p;
p;
b;
b;
m;
b;
j;
r;
l;
w;
ʃ;
ʃ;
s;
h;
l;
f;
\u0DCA ; # delete virama
aː;
æ;
æː;
\u0DD2 i;
\u0DD3 iː;
\u0DD4 u;
\u0DD6 uː;
ru;
e;
eː;
aj;
o;
oː;
aw; # TODO: check if this is correct
lu;
ruː;
luː;
# Heuristics for turning /ə/ into /a/. Based on [1].
$c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f];
$s=[:^L:];
# Rule #1
::Null;
$s sv { ə ə; # exception (a)
$s k { ə } r ə; # exception (b)
$s $c { ə } $s ə; # exception (c)
$s $c $c { ə a;
$s $c { ə a;
# Rule #2
::Null;
$c r { ə } $c a; # clause (a) and (b)
$c r { a } h a; # clause (d), exception
$c r { a } $c ə; # clause (c)
# Rule #3
# The paper is unclear about what this rule means. The interpretation here
# assumes that "preceded" in the paper is a typo and should be read "followed".
::Null;
[a e æ o ə] h { ə a;
# Rules #4 through #7
::Null;
ə } $c $c a; # Rule #4
ə } [rbɖʈ] $s ə; # Rule #5 exception
ə } $c $s a; # Rule #5
ə } ji $s a; # Rule #6
k { ə } [rl] u a; # Rule #7
# Rule #8
# Note that the paper doesn't say explicitly that this rule should be
# anchored at the beginning of a word, but the remarks before the rules
# seem to imply this.
::Null;
$s k { a } l[aeoj ə; # Typo in paper: /j/ was /y/.
$s k { a } le[mh][ui] ə;
$s k { alə } h[ui] əle;
$s k { a } lə ə;
# Diphthongs
::Null;
www+ ww; # යෞව\u0DCAවන
[i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu w;
əji aj;
iji iː; # perhaps: ij
[u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji j;