| # © 2016 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| # Generated using tools/cldr/cldr-to-icu/build-icu-data.xml |
| # |
| # File: und_FONIPA_fa.txt |
| # Generated from CLDR |
| # |
| |
| # Vowels |
| # ------ |
| # In these rules, we produce ی و ا both for short and for long vowels. |
| # This would be wrong for writing Farsi or Arabic, but when transliterating |
| # foreign words and names, it is strongly preferred to vowel marks. |
| # Short schwa [ə] and a few other, schwa-like vowels get omitted entirely |
| # unless at the end of the word, in which case we emit ه whose Farsi |
| # word-final pronunciation comes close to [ə]. At the beginning of words, |
| # Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding |
| # dark vowels; note that this use of آ is quite different from Arabic. |
| $IVowel = [i ɪ e {e\u031E}]; |
| $UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɔ w {w\u0325} ʍ ʷ]; |
| $AVowel = [ɛ œ ɜ æ ɶ]; |
| $DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ\u0308}]; # آ instead of ا at beginning of words |
| $SchwaVowel = [ɘ ɵ ə {ɵ\u031E}]; |
| $Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ]; |
| $Boundary = [^[:L:][:M:][:N:]]; |
| ::NFD; |
| [ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ; |
| ʲ → j; |
| ᵐ → m; |
| ⁿ → n; |
| ᵑ → ŋ; |
| ::NFC; |
| # TODO: Diphthongs probably need more work. |
| # Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک |
| $UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia; |
| # Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز |
| yʉ → iu; |
| ::NULL; |
| # Vowels |
| $Boundary {$SchwaVowel ː?} → ای; |
| $SchwaVowel ː → ی; |
| {[$SchwaVowel e {e\u031E}]} [^[:L:][:M:][:N:][\.]] → ه; |
| $SchwaVowel → ; |
| $Boundary {$IVowel ː?} → ای; |
| $IVowel ː? j? → ی; |
| $Boundary {$UVowel ː?} → او; |
| $UVowel ː? → و; |
| $Boundary {$AVowel ː?} → ا; |
| $AVowel ː? → ا; |
| $Boundary {$DarkAVowel ː?} → آ; |
| $DarkAVowel ː? → ا; |
| # Shadda for long (geminated) consonants |
| ː → \u0651; |
| # Affricates |
| [{t\u0361ʃ} ʧ] → چ; |
| # Clicks |
| [ɡ g ɠ k] $Click → کچ; |
| [n ɲ]? $Click → نچ; |
| # Nasal stops |
| [{m\u0325} m ɱ] → م; |
| [{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن; |
| [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نک; |
| [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g]? → نگ; |
| # Non-nasal stops |
| [p {p\u032A}] → پ; |
| [b {b\u032A} ɓ] → ب; |
| [{d\u033C} d ɗ ᶑ] → د; |
| [{t\u033C} t] → ت; |
| [ʈ] → ط; |
| [ɖ] → ض; |
| c → چ; |
| ɟ → دج; |
| k → ک; |
| [ɡ g ɠ] → گ; |
| [q ɢ ʡ ʛ] → ق; |
| ʔ → ; |
| # Sibilant fricatives |
| s → س; |
| z → ز; |
| [ʃ ʂ ɕ ʄ] → ش; |
| [ʒ ʐ ʑ] → ژ; |
| # Non-sibilant fricatives |
| [ɸ f] → ف; |
| [β v] → و; |
| [{θ\u033C} θ {θ\u0331}] → ث; |
| [{ð\u033C} ð {ð\u0320}] → ذ; |
| ç → ش; |
| ʝ $IVowel? ː? → ی; |
| [x χ] → خ; |
| [ɣ ʁ] → غ; |
| ħ → ح; |
| ʕ → ع; |
| [h ɦ {ʔ\u031E}] → ه; |
| # Approximants, trills, flaps |
| ʋ → و; |
| ʙ → بر; |
| {r\u031D} → رژ; |
| [{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر; |
| [{ʀ\u0325} ʀ] → غ; |
| ʜ → ح; |
| ʢ → ع; |
| j $IVowel? ː? → ی; |
| # Laterals |
| ɬ → شل; |
| ɮ → ژل; |
| {[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لی; |
| [{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل; |
| [ʟ {ʟ\u0320}] → غ; |
| # Independent pass for misc cleanup. |
| ::NULL; |
| # Strip off syllable markers |
| \. → ; |
| # Sequences of three or more ووو look very confusing; we shorten them. |
| # Polish Darłowo [darwɔvɔ] → داروو → داروووو |
| ووو+ → وو; |
| |