| #-------------------------------------------------------------------- |
| # Copyright (c) 1999-2004, International Business Machines |
| # Corporation and others. All Rights Reserved. |
| #-------------------------------------------------------------------- |
| # For modern Greek, based on UNGEGN rules. |
| |
| # Rules are predicated on running NFD first, and NFC afterwards |
| # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN |
| # WARNING: need to add accents to both filters ### |
| # :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ; |
| |
| :: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ; |
| ::NFD (NFC) ; |
| |
| # Useful variables |
| |
| $lower = [[:latin:][:greek:] & [:Ll:]] ; |
| $upper = [[:latin:][:greek:] & [:Lu:]] ; |
| $accent = [[:Mn:][:Me:]] ; |
| |
| $macron = ̄ ; |
| $ddot = ̈ ; |
| |
| $lcgvowel = [αεηιουω] ; |
| $ucgvowel = [ΑΕΗΙΟΥΩ] ; |
| $gvowel = [$lcgvowel $ucgvowel] ; |
| $lcgvowelC = [$lcgvowel $accent] ; |
| |
| $evowel = [aeiouyAEIOUY]; |
| $vowel = [ $evowel $gvowel] ; |
| |
| $beforeLower = $accent * $lower ; |
| |
| $gammaLike = [ΓΚΞΧγκξχϰ] ; |
| $egammaLike = [GKXCgkxc] ; |
| $smooth = ̓ ; |
| $rough = ̔ ; |
| $iotasub = ͅ ; |
| |
| $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ; |
| |
| $under = ̱; |
| |
| $caron = ̌; |
| |
| $afterLetter = [:L:] [\'$accent]* ; |
| $beforeLetter = [\'$accent]* [:L:] ; |
| |
| # Fix punctuation |
| |
| # preserve orginal |
| \: <> \: $under ; |
| \? <> \? $under ; |
| |
| \; <> \? ; |
| · <> \: ; |
| |
| # Fix any ancient characters that creep in |
| |
| ͂ > ́ ; |
| ̂ > ́ ; |
| ̀ > ́ ; |
| $smooth > ; |
| $rough > ; |
| $iotasub > ; |
| ͺ > ; |
| |
| # need to have these up here so the rules don't mask |
| |
| η <> i $under ; |
| Η <> I $under ; |
| |
| Ψ } $beforeLower <> Ps ; |
| Ψ <> PS ; |
| ψ <> ps ; |
| |
| ω <> o $under ; |
| Ω <> O $under; |
| |
| # at begining or end of word, convert mp to b |
| |
| [^[:L:]$accent] { μπ > b ; |
| μπ } [^[:L:]$accent] > b ; |
| [^[:L:]$accent] { [Μμ][Ππ] > B ; |
| [Μμ][Ππ] } [^[:L:]$accent] > B ; |
| |
| μπ < b ; |
| Μπ < B } $beforeLower ; |
| ΜΠ < B ; |
| |
| # handle diphthongs ending with upsilon |
| |
| ου <> ou ; |
| ΟΥ <> OU ; |
| Ου <> Ou ; |
| οΥ <> oU ; |
| |
| $fmaker = [aeiAEI] $under ? ; |
| $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate |
| |
| $fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ; |
| υ $1 < ( $shiftForwardVowels )* v $under ; |
| |
| $fmaker { υ ( $shiftForwardVowels )* } > $1 f $under; |
| υ $1 < ( $shiftForwardVowels )* f $under ; |
| |
| $fmaker { Υ } $softener <> V $under ; |
| $fmaker { Υ <> U $under ; |
| |
| υ <> y ; |
| Υ <> Y ; |
| |
| # NORMAL |
| |
| α <> a ; |
| Α <> A ; |
| |
| β <> v ; |
| Β <> V ; |
| |
| γ } $gammaLike <> n } $egammaLike ; |
| γ <> g ; |
| Γ } $gammaLike <> N } $egammaLike ; |
| Γ <> G ; |
| |
| δ <> d ; |
| Δ <> D ; |
| |
| ε <> e ; |
| Ε <> E ; |
| |
| ζ <> z ; |
| Ζ <> Z ; |
| |
| θ <> th ; |
| Θ } $beforeLower <> Th ; |
| Θ <> TH ; |
| |
| ι <> i ; |
| Ι <> I ; |
| |
| κ <> k ; |
| Κ <> K ; |
| |
| λ <> l ; |
| Λ <> L ; |
| |
| μ <> m ; |
| Μ <> M ; |
| |
| ν } $gammaLike > n\' ; |
| ν <> n ; |
| Ν } $gammaLike <> N\' ; |
| Ν <> N ; |
| |
| ξ <> x ; |
| Ξ <> X ; |
| |
| ο <> o ; |
| Ο <> O ; |
| |
| π <> p ; |
| Π <> P ; |
| |
| ρ <> r ; |
| Ρ <> R ; |
| |
| # insert separator before things that turn into s |
| [Pp] { } [ςσΣϷϸϺϻ] > \' ; |
| |
| # special S variants |
| |
| Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L |
| ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L |
| Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L |
| ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L |
| |
| # Caron means exception |
| |
| # before a letter, initial |
| ς } $beforeLetter <> s $under } $beforeLetter; |
| σ } $beforeLetter <> s } $beforeLetter; |
| |
| # otherwise, after a letter = final |
| $afterLetter { σ <> $afterLetter { s $under; |
| $afterLetter { ς <> $afterLetter { s ; |
| |
| # otherwise (isolated) = initial |
| ς <> s $under; |
| σ <> s ; |
| |
| # [Pp] { Σ <> \'S ; |
| Σ <> S ; |
| |
| τ <> t ; |
| Τ <> T ; |
| |
| φ <> f ; |
| Φ <> F ; |
| |
| χ <> ch ; |
| Χ } $beforeLower <> Ch ; |
| Χ <> CH ; |
| |
| # Completeness for ASCII |
| |
| # $ignore = [[:Mark:]''] * ; |
| |
| | ch < h ; |
| | k < c ; |
| | i < j ; |
| | k < q ; |
| | b < u } $vowel ; |
| | b < w } $vowel ; |
| | y < u ; |
| | y < w ; |
| |
| | Ch < H ; |
| | K < C ; |
| | I < J ; |
| | K < Q ; |
| | B < W } $vowel ; |
| | B < U } $vowel ; |
| | Y < W ; |
| | Y < U ; |
| |
| # Completeness for Greek |
| |
| ϐ > | β ; |
| ϑ > | θ ; |
| ϒ > | Υ ; |
| ϕ > | φ ; |
| ϖ > | π ; |
| |
| ϰ > | κ ; |
| ϱ > | ρ ; |
| ϲ > | σ ; |
| Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL |
| ϳ > j ; |
| ϴ > | Θ ; |
| ϵ > | ε ; |
| µ > | μ ; |
| |
| # delete any trailing ' marks used for roundtripping |
| |
| < [Ππ] { \' } [Ss] ; |
| < [Νν] { \' } $egammaLike ; |
| |
| ::NFC (NFD) ; |
| |
| # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD |
| :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ; |