| # © 2016 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| # Generated using tools/cldr/cldr-to-icu/build-icu-data.xml |
| # |
| # File: si_si_FONIPA.txt |
| # Generated from CLDR |
| # |
| |
| # Sinhala pronunciation rules |
| # |
| # Output |
| # k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f |
| # ə əː a aː æ æː i iː u uː e eː o oː |
| # |
| # References |
| # [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage: |
| # Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis. |
| # Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions, |
| # pages 890–897. http://www.aclweb.org/anthology/P06-2114 |
| # Simplify ya + yansaya to plain ya after a consonant. |
| [\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCAය → ය; |
| # Delete ZWNJ and ZWJ to simplify further processing. |
| \u200C → ; |
| \u200D → ; |
| # Insert a schwa after every consonant that is not followed by a dependent vowel |
| # or virama. |
| ::Null; |
| ([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə; |
| # Pronunciation rules proper. |
| ::Null; |
| # fප is an alternative spelling of ෆ. |
| # This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield) |
| # [see http://bradshawofthefuture.blogspot.com/2013/02/f.html]. |
| [Ff]ප → f; |
| # zස is seemingly the only way to unambiguously indicate a voiced /z/ sound. |
| # This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease) |
| # [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය] |
| # or in zස\u0DD3බ\u0DCAරා (zebra) [see https://si.wikipedia.org/wiki/zස\u0DD3බ\u0DCAරා]. |
| [Zz]ස → z; |
| ං → ŋ; |
| o → ŋ; # common substitution for anusvaraya |
| ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1; # TODO: check which consonants geminate |
| ඃ → h; |
| අ → a; |
| ආ → aː; |
| ඇ → æ; |
| ඈ → æː; |
| ඉ → i; |
| ඊ → iː; |
| උ → u; |
| ඌ → uː; |
| ඍ → ri; |
| ඎ → ruː; |
| ඏ → ilu; |
| ඐ → iluː; |
| එ → e; |
| ඒ → eː; |
| ඓ → aj; |
| ඔ → o; |
| ඕ → oː; |
| ඖ → aw; # TODO: check if this is correct |
| ක → k; |
| ඛ → k; |
| ග → ɡ; |
| ඝ → ɡ; |
| ඞ → ŋ; |
| ඟ → ᵑɡ; |
| ච → c; |
| ඡ → c; |
| ජ → ɟ; |
| ඣ → ɟ; |
| ඤ → ɲ; |
| ඥ → kɲ; # TODO: double-check |
| ඦ → ɟ; |
| ට → ʈ; |
| ඨ → ʈ; |
| ඩ → ɖ; |
| ඪ → ɖ; |
| ණ → n; |
| ඬ → ⁿɖ; |
| ත → t; |
| ථ → t; |
| ද → d; |
| ධ → d; |
| න → n; |
| ඳ → ⁿd; |
| ප → p; |
| ඵ → p; |
| බ → b; |
| භ → b; |
| ම → m; |
| ඹ → ᵐb; |
| ය → j; |
| ර → r; |
| ල → l; |
| ව → w; |
| ශ → ʃ; |
| ෂ → ʃ; |
| ස → s; |
| හ → h; |
| ළ → l; |
| ෆ → f; |
| \u0DCA → ; # delete virama |
| ා → aː; |
| ැ → æ; |
| ෑ → æː; |
| \u0DD2 → i; |
| \u0DD3 → iː; |
| \u0DD4 → u; |
| \u0DD6 → uː; |
| ෘ → ru; |
| ෙ → e; |
| ේ → eː; |
| ෛ → aj; |
| ො → o; |
| ෝ → oː; |
| ෞ → aw; # TODO: check if this is correct |
| ෟ → lu; |
| ෲ → ruː; |
| ෳ → luː; |
| # Heuristics for turning /ə/ into /a/. Based on [1]. |
| $c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f]; |
| $s=[:^L:]; |
| # Rule #1 |
| ::Null; |
| $s sv { ə → ə; # exception (a) |
| $s k { ə } r → ə; # exception (b) |
| $s $c { ə } $s → ə; # exception (c) |
| $s $c $c { ə → a; |
| $s $c { ə → a; |
| # Rule #2 |
| ::Null; |
| $c r { ə } $c → a; # clause (a) and (b) |
| $c r { a } h → a; # clause (d), exception |
| $c r { a } $c → ə; # clause (c) |
| # Rule #3 |
| # The paper is unclear about what this rule means. The interpretation here |
| # assumes that "preceded" in the paper is a typo and should be read "followed". |
| ::Null; |
| [a e æ o ə] h { ə → a; |
| # Rules #4 through #7 |
| ::Null; |
| ə } $c $c → a; # Rule #4 |
| ə } [rbɖʈ] $s → ə; # Rule #5 exception |
| ə } $c $s → a; # Rule #5 |
| ə } ji $s → a; # Rule #6 |
| k { ə } [rl] u → a; # Rule #7 |
| # Rule #8 |
| # Note that the paper doesn't say explicitly that this rule should be |
| # anchored at the beginning of a word, but the remarks before the rules |
| # seem to imply this. |
| ::Null; |
| $s k { a } l[aeo]ːj → ə; # Typo in paper: /j/ was /y/. |
| $s k { a } le[mh][ui] → ə; |
| $s k { alə } h[ui] → əle; |
| $s k { a } lə → ə; |
| # Diphthongs |
| ::Null; |
| www+ → ww; # යෞව\u0DCAවන |
| [i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w; |
| əji → aj; |
| iji → iː; # perhaps: ij |
| [u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j; |
| |