| // -*- Coding: utf-8; -*- |
| //-------------------------------------------------------------------- |
| // Copyright (c) 1999-2001, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| // THIS IS A MACHINE-GENERATED FILE |
| // Tool: dumpICUrules.bat |
| // Source: ../../text/resources/Transliterator_Greek_Latin.txt |
| // Date: Wed Nov 7 09:34:04 2001 |
| //-------------------------------------------------------------------- |
| |
| // Greek_Latin |
| |
| translit_Greek_Latin { |
| Rule { |
| //-------------------------------------------------------------------- |
| // Copyright (c) 1999-2001, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| // $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Greek_Latin.txt,v $ |
| // $Date: 2001/11/07 18:50:25 $ |
| // $Revision: 1.3 $ |
| //-------------------------------------------------------------------- |
| |
| // Rules are predicated on running NFD first, and NFC afterwards |
| "::NFD (NFC) ;" |
| |
| // TEST CASES |
| |
| // Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος |
| // ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ |
| // ᾳ ῃ ῳ ὃ ὄ |
| // ὠς ὡς ὢς ὣς |
| // Ὠς Ὡς Ὢς Ὣς |
| // ὨΣ ὩΣ ὪΣ ὫΣ |
| // Ạ, ạ, Ẹ, ẹ, Ọ, ọ |
| |
| // Useful variables |
| |
| "$lower = [:Ll:] ;" |
| "$upper = [:Lu:] ;" |
| "$accent = [:M:] ;" |
| |
| "$macron = \u0304 ;" |
| "$ddot = \u0308 ;" |
| "$ddotmac = [$ddot$macron];" |
| |
| "$lcgvowel = [αεηιουω] ;" |
| "$ucgvowel = [ΑΕΗΙΟΥΩ] ;" |
| "$gvowel = [$lcgvowel $ucgvowel] ;" |
| "$lcgvowelC = [$lcgvowel $accent] ;" |
| |
| "$evowel = [aeiouyAEIOUY];" |
| "$vowel = [ $evowel $gvowel] ;" |
| |
| "$beforeLower = $accent * $lower ;" |
| |
| "$gammaLike = [ΓΚΞΧγκξχϰ] ;" |
| "$egammaLike = [GKXCgkxc] ;" |
| "$smooth = ̓ ;" |
| "$rough = ̔ ;" |
| "$iotasub = ͅ ;" |
| |
| "$evowel_i = [$evowel-[iI]] ;" |
| |
| "$caron = \u030C;" |
| |
| "$afterLetter = [:^L:] [\\\'[:M:]]* ;" |
| "$beforeLetter = [\\\'[:M:]]* [:^L:] ;" |
| |
| // Fix punctuation |
| |
| "\\\; <> \\\? ;" |
| "· <> \\\: ;" |
| |
| // CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve |
| |
| "\u0342 <> \u0302 ;" |
| |
| // IOTA: convert iota subscript to iota |
| // first make previous alpha long! |
| |
| "$accent_minus = [[$accent]-[$iotasub$macron]];" |
| |
| "Α } $accent_minus * $iotasub > | Α $macron ;" |
| "α } $accent_minus * $iotasub > | α $macron ;" |
| |
| // now convert to uppercase if after uppercase, ow to lowercase |
| |
| "$upper $accent * { $iotasub > I ;" |
| "$iotasub > i ;" |
| |
| "| $1 $iotasub < ([:L:] $macron [:M:]*) i ;" |
| "| $1 $iotasub < ([:L:] $macron [:M:]*) I ;" |
| |
| // BREATHING |
| |
| // Convert rough breathing to h, and move before letters. |
| |
| // Make A ` x = > H a x |
| |
| "Α ($macron?) $rough } $beforeLower > H | α $1;" |
| "Ε $rough } $beforeLower > H | ε;" |
| "Η $rough } $beforeLower > H | η ;" |
| "Ι ($ddot?) $rough } $beforeLower > H | ι $1;" |
| "Ο $rough } $beforeLower > H | ο ;" |
| "Υ $rough } $beforeLower > H | υ ;" |
| "Ω ($ddot?) $rough } $beforeLower > H | ω $1;" |
| |
| // Make A x ` = > H a x |
| |
| "Α ($lower $macron?) $rough > H | α $1 ;" |
| "Ε ($lower) $rough > H | ε $1 ;" |
| "Η ($lower) $rough > H | η $1 ;" |
| "Ι ($lower $ddot?) $rough > H | ι $1 ;" |
| "Ο ($lower) $rough > H | ο $1 ;" |
| "Υ ($lower) $rough > H | υ $1 ;" |
| "Ω ($lower $ddot?) $rough > H | ω $1 ;" |
| |
| //Otherwise, make x ` into h x and X ` into H X |
| |
| "($lcgvowel + $ddotmac? ) $rough > h | $1 ;" |
| "($gvowel + $ddotmac? ) $rough > H | $1 ;" |
| |
| // Go backwards with H |
| |
| "| $1 $rough < h ($evowel $macron $ddot? $evowel_i $macron?) ;" |
| "| $1 $rough < h ($evowel $ddot? $evowel $macron?) ;" |
| "| $1 $rough < h ($evowel $macron? $ddot?) ;" |
| |
| "| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel_i $macron?) ;" |
| "| $1 $rough < H ([AEIOUY] $ddot? $evowel $macron?) ;" |
| "| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;" |
| |
| // titlecase, have to fix individually |
| // in the future, we should add &uppercase() to make this easier |
| |
| "| A $1 $rough < H a ($macron $ddot? $evowel_i $macron?) ;" |
| "| E $1 $rough < H e ($macron $ddot? $evowel_i $macron?) ;" |
| "| I $1 $rough < H i ($macron $ddot? $evowel_i $macron?) ;" |
| "| O $1 $rough < H o ($macron $ddot? $evowel_i $macron?) ;" |
| "| U $1 $rough < H u ($macron $ddot? $evowel_i $macron?) ;" |
| "| Y $1 $rough < H y ($macron $ddot? $evowel_i $macron?) ;" |
| |
| "| A $1 $rough < H a ($ddot? $evowel $macron?) ;" |
| "| E $1 $rough < H e ($ddot? $evowel $macron?) ;" |
| "| I $1 $rough < H i ($ddot? $evowel $macron?) ;" |
| "| O $1 $rough < H o ($ddot? $evowel $macron?) ;" |
| "| U $1 $rough < H u ($ddot? $evowel $macron?) ;" |
| "| Y $1 $rough < H y ($ddot? $evowel $macron?) ;" |
| |
| "| A $1 $rough < H a ($macron? $ddot? ) ;" |
| "| E $1 $rough < H e ($macron? $ddot? ) ;" |
| "| I $1 $rough < H i ($macron? $ddot? ) ;" |
| "| O $1 $rough < H o ($macron? $ddot? ) ;" |
| "| U $1 $rough < H u ($macron? $ddot? ) ;" |
| "| Y $1 $rough < H y ($macron? $ddot? ) ;" |
| |
| // Now do smooth |
| |
| //delete smooth breathing for Latin |
| "$smooth > ;" |
| |
| // insert in Greek |
| // the assumption is that all Marks are on letters. |
| |
| "| $1 $smooth < [^[:L:][:M:]] { ([rR]) } [^hH$smooth$rough] ;" |
| "| $1 $smooth < [^[:L:][:M:]] { ($evowel $macron? $evowel $macron?) } [^$smooth$rough] ;" |
| "| $1 $smooth < [^[:L:][:M:]] { ($evowel $macron?) } [^$evowel$smooth$rough] ;" |
| |
| // TODO: preserve smooth/rough breathing if not |
| // on initial vowel sequence |
| |
| // need to have these up here so the rules don't mask |
| |
| // remove now superfluous macron when returning |
| |
| "Α < A $macron ;" |
| "α < a $macron ;" |
| |
| "η <> e $macron ;" |
| "Η <> E $macron ;" |
| |
| "φ <> ph ;" |
| "Ψ } $beforeLower <> Ps ;" |
| "Ψ <> PS ;" |
| |
| "Φ } $beforeLower <> Ph ;" |
| "Φ <> PH ;" |
| "ψ <> ps ;" |
| |
| "ω <> o $macron ;" |
| "Ω <> O $macron;" |
| |
| // NORMAL |
| |
| "α <> a ;" |
| "Α <> A ;" |
| |
| "β <> b ;" |
| "Β <> B ;" |
| |
| "γ } $gammaLike <> n } $egammaLike ;" |
| "γ <> g ;" |
| "Γ } $gammaLike <> N } $egammaLike ;" |
| "Γ <> G ;" |
| |
| "δ <> d ;" |
| "Δ <> D ;" |
| |
| "ε <> e ;" |
| "Ε <> E ;" |
| |
| "ζ <> z ;" |
| "Ζ <> Z ;" |
| |
| "θ <> th ;" |
| "Θ } $beforeLower <> Th ;" |
| "Θ <> TH ;" |
| |
| "ι <> i ;" |
| "Ι <> I ;" |
| |
| "κ <> k ;" |
| "Κ <> K ;" |
| |
| "λ <> l ;" |
| "Λ <> L ;" |
| |
| "μ <> m ;" |
| "Μ <> M ;" |
| |
| "ν } $gammaLike > n\\\' ;" |
| "ν <> n ;" |
| "Ν } $gammaLike <> N\\\' ;" |
| "Ν <> N ;" |
| |
| "ξ <> x ;" |
| "Ξ <> X ;" |
| |
| "ο <> o ;" |
| "Ο <> O ;" |
| |
| "π <> p ;" |
| "Π <> P ;" |
| |
| "ρ $rough <> rh;" |
| "Ρ $rough } $beforeLower <> Rh ;" |
| "Ρ $rough <> RH ;" |
| "ρ <> r ;" |
| "Ρ <> R ;" |
| |
| // insert separator |
| |
| "[Pp] { } ς > \\\' ;" |
| "[Pp] { } σ > \\\' ;" |
| |
| // Caron means exception |
| |
| // before a letter, initial |
| "ς } $beforeLetter <> s $caron } $beforeLetter;" |
| "σ } $beforeLetter <> s } $beforeLetter;" |
| |
| // otherwise, after a letter = final |
| "$afterLetter { σ <> $afterLetter { s $caron;" |
| "$afterLetter { ς <> $afterLetter { s ;" |
| |
| // otherwise (isolated) = initial |
| "ς <> s $caron;" |
| "σ <> s ;" |
| |
| "[Pp] { Σ <> \\\'S ;" |
| "Σ <> S ;" |
| |
| "τ <> t ;" |
| "Τ <> T ;" |
| |
| "$vowel {υ } <> u ;" |
| "υ <> y ;" |
| "$vowel { Υ <> U ;" |
| "Υ <> Y ;" |
| |
| "χ <> ch ;" |
| "Χ } $beforeLower <> Ch ;" |
| "Χ <> CH ;" |
| |
| // Completeness for ASCII |
| |
| "$ignore = [[:Mark:]''] * ;" |
| |
| "| k < c ;" |
| "| ph < f ;" |
| "| i < j ;" |
| "| k < q ;" |
| "| u < v ;" |
| "| u < w ;" |
| "| K < C ;" |
| "| PH < F } $ignore [:UppercaseLetter:] ;" |
| "| PH < [:UppercaseLetter:] $ignore { F ;" |
| "| PH < F ;" |
| "| I < J ;" |
| "| K < Q ;" |
| "| U < V ;" |
| "| U < W ;" |
| |
| "$rough } $ignore [:UppercaseLetter:] > H ;" |
| "$ignore [:UppercaseLetter:] { $rough > H ;" |
| "$rough < H ;" |
| "$rough <> h ;" |
| |
| // Completeness for Greek |
| |
| "ϐ > | β ;" |
| "ϑ > | θ ;" |
| "ϒ > | Υ ;" |
| "ϕ > | φ ;" |
| "ϖ > | π ;" |
| |
| "ϰ > | κ ;" |
| "ϱ > | ρ ;" |
| "ϲ > | σ ;" |
| "ϳ > j ;" |
| "ϴ > | Θ ;" |
| "ϵ > | ε ;" |
| |
| "ͺ > i;" |
| |
| // delete any trailing ' marks used for roundtripping |
| |
| "< [Ππ] { \\\' } [Ss] ;" |
| "< [Νν] { \\\' } $egammaLike ;" |
| |
| "::NFC (NFD) ;" |
| } |
| } |