| // -*- Coding: utf-8; -*- |
| //-------------------------------------------------------------------- |
| // Copyright (c) 1999-2002, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| // THIS IS A MACHINE-GENERATED FILE |
| // Tool: dumpicurules.bat |
| // Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt |
| // Date: Sat Jul 27 10:31:01 2002 |
| //-------------------------------------------------------------------- |
| |
| // Greek_Latin_UNGEGN |
| |
| t_Grek_Latn_UNGEGN { |
| Rule { |
| //-------------------------------------------------------------------- |
| //-------------------------------------------------------------------- |
| //-------------------------------------------------------------------- |
| // For modern Greek, based on UNGEGN rules. |
| |
| // Rules are predicated on running NFD first, and NFC afterwards |
| // MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN |
| // WARNING: need to add accents to both filters ### |
| // :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ] ; |
| |
| ":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;" |
| "::NFD (NFC) ;" |
| |
| // Useful variables |
| |
| "$lower = [[:latin:][:greek:] & [:Ll:]] ;" |
| "$upper = [[:latin:][:greek:] & [:Lu:]] ;" |
| "$accent = [[:Mn:][:Me:]] ;" |
| |
| "$macron = ̄ ;" |
| "$ddot = ̈ ;" |
| |
| "$lcgvowel = [αεηιουω] ;" |
| "$ucgvowel = [ΑΕΗΙΟΥΩ] ;" |
| "$gvowel = [$lcgvowel $ucgvowel] ;" |
| "$lcgvowelC = [$lcgvowel $accent] ;" |
| |
| "$evowel = [aeiouyAEIOUY];" |
| "$vowel = [ $evowel $gvowel] ;" |
| |
| "$beforeLower = $accent * $lower ;" |
| |
| "$gammaLike = [ΓΚΞΧγκξχϰ] ;" |
| "$egammaLike = [GKXCgkxc] ;" |
| "$smooth = ̓ ;" |
| "$rough = ̔ ;" |
| "$iotasub = ͅ ;" |
| |
| "$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;" |
| |
| "$under = ̱;" |
| |
| "$caron = ̌;" |
| |
| "$afterLetter = [:L:] [\\\'$accent]* ;" |
| "$beforeLetter = [\\\'$accent]* [:L:] ;" |
| |
| // Fix punctuation |
| |
| // preserve orginal |
| "\\\: <> \\\: $under ;" |
| "\\\? <> \\\? $under ;" |
| |
| "\\\; <> \\\? ;" |
| "· <> \\\: ;" |
| |
| // Fix any ancient characters that creep in |
| |
| "͂ > ́ ;" |
| "̂ > ́ ;" |
| "̀ > ́ ;" |
| "$smooth > ;" |
| "$rough > ;" |
| "$iotasub > ;" |
| "ͺ > ;" |
| |
| // need to have these up here so the rules don't mask |
| |
| "η <> i $under ;" |
| "Η <> I $under ;" |
| |
| "Ψ } $beforeLower <> Ps ;" |
| "Ψ <> PS ;" |
| "ψ <> ps ;" |
| |
| "ω <> o $under ;" |
| "Ω <> O $under;" |
| |
| // at begining or end of word, convert mp to b |
| |
| "[^[:L:]$accent] { μπ > b ;" |
| "μπ } [^[:L:]$accent] > b ;" |
| "[^[:L:]$accent] { [Μμ][Ππ] > B ;" |
| "[Μμ][Ππ] } [^[:L:]$accent] > B ;" |
| |
| "μπ < b ;" |
| "Μπ < B } $beforeLower ;" |
| "ΜΠ < B ;" |
| |
| // handle diphthongs ending with upsilon |
| |
| "ου <> ou ;" |
| "ΟΥ <> OU ;" |
| "Ου <> Ou ;" |
| "οΥ <> oU ;" |
| |
| "$fmaker = [aeiAEI] $under ? ;" |
| "$shiftForwardVowels = [[:Mn:]-[\u0308]];" // note: a diaeresis keeps the items separate |
| |
| "$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;" |
| "υ $1 < ( $shiftForwardVowels )* v $under ;" |
| |
| "$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;" |
| "υ $1 < ( $shiftForwardVowels )* f $under ;" |
| |
| "$fmaker { Υ } $softener <> V $under ;" |
| "$fmaker { Υ <> U $under ;" |
| |
| "υ <> y ;" |
| "Υ <> Y ;" |
| |
| // NORMAL |
| |
| "α <> a ;" |
| "Α <> A ;" |
| |
| "β <> v ;" |
| "Β <> V ;" |
| |
| "γ } $gammaLike <> n } $egammaLike ;" |
| "γ <> g ;" |
| "Γ } $gammaLike <> N } $egammaLike ;" |
| "Γ <> G ;" |
| |
| "δ <> d ;" |
| "Δ <> D ;" |
| |
| "ε <> e ;" |
| "Ε <> E ;" |
| |
| "ζ <> z ;" |
| "Ζ <> Z ;" |
| |
| "θ <> th ;" |
| "Θ } $beforeLower <> Th ;" |
| "Θ <> TH ;" |
| |
| "ι <> i ;" |
| "Ι <> I ;" |
| |
| "κ <> k ;" |
| "Κ <> K ;" |
| |
| "λ <> l ;" |
| "Λ <> L ;" |
| |
| "μ <> m ;" |
| "Μ <> M ;" |
| |
| "ν } $gammaLike > n\\\' ;" |
| "ν <> n ;" |
| "Ν } $gammaLike <> N\\\' ;" |
| "Ν <> N ;" |
| |
| "ξ <> x ;" |
| "Ξ <> X ;" |
| |
| "ο <> o ;" |
| "Ο <> O ;" |
| |
| "π <> p ;" |
| "Π <> P ;" |
| |
| "ρ <> r ;" |
| "Ρ <> R ;" |
| |
| "[Pp] { } ς > \\\' ;" |
| "[Pp] { } σ > \\\' ;" |
| |
| // Caron means exception |
| |
| // before a letter, initial |
| "ς } $beforeLetter <> s $under } $beforeLetter;" |
| "σ } $beforeLetter <> s } $beforeLetter;" |
| |
| // otherwise, after a letter = final |
| "$afterLetter { σ <> $afterLetter { s $under;" |
| "$afterLetter { ς <> $afterLetter { s ;" |
| |
| // otherwise (isolated) = initial |
| "ς <> s $under;" |
| "σ <> s ;" |
| |
| "[Pp] { Σ <> \\\'S ;" |
| "Σ <> S ;" |
| |
| "τ <> t ;" |
| "Τ <> T ;" |
| |
| "φ <> f ;" |
| "Φ <> F ;" |
| |
| "χ <> ch ;" |
| "Χ } $beforeLower <> Ch ;" |
| "Χ <> CH ;" |
| |
| // Completeness for ASCII |
| |
| // $ignore = [[:Mark:]''] * ; |
| |
| "| ch < h ;" |
| "| k < c ;" |
| "| i < j ;" |
| "| k < q ;" |
| "| b < u } $vowel ;" |
| "| b < w } $vowel ;" |
| "| y < u ;" |
| "| y < w ;" |
| |
| "| Ch < H ;" |
| "| K < C ;" |
| "| I < J ;" |
| "| K < Q ;" |
| "| B < W } $vowel ;" |
| "| B < U } $vowel ;" |
| "| Y < W ;" |
| "| Y < U ;" |
| |
| // Completeness for Greek |
| |
| "ϐ > | β ;" |
| "ϑ > | θ ;" |
| "ϒ > | Υ ;" |
| "ϕ > | φ ;" |
| "ϖ > | π ;" |
| |
| "ϰ > | κ ;" |
| "ϱ > | ρ ;" |
| "ϲ > | σ ;" |
| "ϳ > j ;" |
| "ϴ > | Θ ;" |
| "ϵ > | ε ;" |
| "µ > | μ ;" |
| |
| // delete any trailing ' marks used for roundtripping |
| |
| "< [Ππ] { \\\' } [Ss] ;" |
| "< [Νν] { \\\' } $egammaLike ;" |
| |
| "::NFC (NFD) ;" |
| |
| // MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD |
| ":: ([[[:Latin:][:Mn:][:Me:]] ['\\\:?]]) ;" |
| } |
| } |