| # © 2016 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| # Generated using tools/cldr/cldr-to-icu/build-icu-data.xml |
| # |
| # File: Grek_Latn_UNGEGN.txt |
| # Generated from CLDR |
| # |
| |
| # For modern Greek, based on UNGEGN rules. |
| # Rules are predicated on running NFD first, and NFC afterwards |
| # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN |
| # WARNING: need to add accents to both filters ### |
| # :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ; |
| :: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ; |
| ::NFD (NFC) ; |
| # Useful variables |
| $lower = [[:latin:][:greek:] & [:Ll:]] ; |
| $upper = [[:latin:][:greek:] & [:Lu:]] ; |
| $accent = [[:Mn:][:Me:]] ; |
| $macron = \u0304 ; |
| $ddot = \u0308 ; |
| $lcgvowel = [αεηιουω] ; |
| $ucgvowel = [ΑΕΗΙΟΥΩ] ; |
| $gvowel = [$lcgvowel $ucgvowel] ; |
| $lcgvowelC = [$lcgvowel $accent] ; |
| $evowel = [aeiouyAEIOUY]; |
| $vowel = [ $evowel $gvowel] ; |
| $beforeLower = $accent * $lower ; |
| $gammaLike = [ΓΚΞΧγκξχϰ] ; |
| $egammaLike = [GKXCgkxc] ; |
| $smooth = \u0313 ; |
| $rough = \u0314 ; |
| $iotasub = \u0345 ; |
| $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ; |
| $under = \u0331; |
| $caron = \u030C; |
| $afterLetter = [:L:] [\'$accent]* ; |
| $beforeLetter = [\'$accent]* [:L:] ; |
| # Fix punctuation |
| # preserve orginal |
| \: ↔ \: $under ; |
| \? ↔ \? $under ; |
| \; ↔ \? ; |
| · ↔ \: ; |
| # Fix any ancient characters that creep in |
| \u0342 → \u0301 ; |
| \u0302 → \u0301 ; |
| \u0300 → \u0301 ; |
| $smooth → ; |
| $rough → ; |
| $iotasub → ; |
| ͺ → ; |
| # need to have these up here so the rules don't mask |
| η ↔ i $under ; |
| Η ↔ I $under ; |
| Ψ } $beforeLower ↔ Ps ; |
| Ψ ↔ PS ; |
| ψ ↔ ps ; |
| ω ↔ o $under ; |
| Ω ↔ O $under; |
| # at begining or end of word, convert mp to b |
| [^[:L:]$accent] { μπ → b ; |
| μπ } [^[:L:]$accent] → b ; |
| [^[:L:]$accent] { [Μμ][Ππ] → B ; |
| [Μμ][Ππ] } [^[:L:]$accent] → B ; |
| μπ ← b ; |
| Μπ ← B } $beforeLower ; |
| ΜΠ ← B ; |
| # handle diphthongs ending with upsilon |
| ου ↔ ou ; |
| ΟΥ ↔ OU ; |
| Ου ↔ Ou ; |
| οΥ ↔ oU ; |
| $fmaker = [aeiAEI] $under ? ; |
| $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate |
| $fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ; |
| υ $1 ← ( $shiftForwardVowels )* v $under ; |
| $fmaker { υ ( $shiftForwardVowels )* } → $1 f $under; |
| υ $1 ← ( $shiftForwardVowels )* f $under ; |
| $fmaker { Υ } $softener ↔ V $under ; |
| $fmaker { Υ ↔ U $under ; |
| υ ↔ y ; |
| Υ ↔ Y ; |
| # NORMAL |
| α ↔ a ; |
| Α ↔ A ; |
| β ↔ v ; |
| Β ↔ V ; |
| γ } $gammaLike ↔ n } $egammaLike ; |
| γ ↔ g ; |
| Γ } $gammaLike ↔ N } $egammaLike ; |
| Γ ↔ G ; |
| δ ↔ d ; |
| Δ ↔ D ; |
| ε ↔ e ; |
| Ε ↔ E ; |
| ζ ↔ z ; |
| Ζ ↔ Z ; |
| θ ↔ th ; |
| Θ } $beforeLower ↔ Th ; |
| Θ ↔ TH ; |
| ι ↔ i ; |
| Ι ↔ I ; |
| κ ↔ k ; |
| Κ ↔ K ; |
| λ ↔ l ; |
| Λ ↔ L ; |
| μ ↔ m ; |
| Μ ↔ M ; |
| ν } $gammaLike → n\' ; |
| ν ↔ n ; |
| Ν } $gammaLike ↔ N\' ; |
| Ν ↔ N ; |
| ξ ↔ x ; |
| Ξ ↔ X ; |
| ο ↔ o ; |
| Ο ↔ O ; |
| π ↔ p ; |
| Π ↔ P ; |
| ρ ↔ r ; |
| Ρ ↔ R ; |
| # insert separator before things that turn into s |
| [Pp] { } [ςσΣϷϸϺϻ] → \' ; |
| # special S variants |
| Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L |
| ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L |
| Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L |
| ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L |
| # Caron means exception |
| # before a letter, initial |
| ς } $beforeLetter ↔ s $under } $beforeLetter; |
| σ } $beforeLetter ↔ s } $beforeLetter; |
| # otherwise, after a letter = final |
| $afterLetter { σ ↔ $afterLetter { s $under; |
| $afterLetter { ς ↔ $afterLetter { s ; |
| # otherwise (isolated) = initial |
| ς ↔ s $under; |
| σ ↔ s ; |
| # [Pp] { Σ ↔ \'S ; |
| Σ ↔ S ; |
| τ ↔ t ; |
| Τ ↔ T ; |
| φ ↔ f ; |
| Φ ↔ F ; |
| χ ↔ ch ; |
| Χ } $beforeLower ↔ Ch ; |
| Χ ↔ CH ; |
| # Completeness for ASCII |
| # $ignore = [[:Mark:]''] * ; |
| | ch ← h ; |
| | k ← c ; |
| | i ← j ; |
| | k ← q ; |
| | b ← u } $vowel ; |
| | b ← w } $vowel ; |
| | y ← u ; |
| | y ← w ; |
| | Ch ← H ; |
| | K ← C ; |
| | I ← J ; |
| | K ← Q ; |
| | B ← W } $vowel ; |
| | B ← U } $vowel ; |
| | Y ← W ; |
| | Y ← U ; |
| # Completeness for Greek |
| ϐ → | β ; |
| ϑ → | θ ; |
| ϒ → | Υ ; |
| ϕ → | φ ; |
| ϖ → | π ; |
| ϰ → | κ ; |
| ϱ → | ρ ; |
| ϲ → | σ ; |
| Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL |
| ϳ → j ; |
| ϴ → | Θ ; |
| ϵ → | ε ; |
| µ → | μ ; |
| # delete any trailing ' marks used for roundtripping |
| ← [Ππ] { \' } [Ss] ; |
| ← [Νν] { \' } $egammaLike ; |
| ::NFC (NFD) ; |
| # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD |
| :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ; |
| |