blob: 60bf95760c8536f04805fc174a7c8f32204c4522 [file] [log] [blame]
# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
#
# File: cy_cy_FONIPA.txt
# Generated from CLDR
#
# Transformation from Welsh (cy) to its IPA transcription (cy_FONIPA).
# Based on description of Northern Welsh in:
#
# http://en.wikipedia.org/wiki/Welsh_orthography
# http://en.wikipedia.org/wiki/Welsh_phonology
#
# Note that these rules are NOT complete: to be complete we would have to know
# the morphological analysis of the word. For example, final ‹au› is pronounced
# /a/ if it is the noun plural marker, otherwise it is /aɨ/. Similarly in
# “llongyfarch” (‘congratulating’), the morphological decomposition — “llon +
# cyfarch” — is needed to know that the ‹ng› is pronounced as /ŋg/, not as
# /ŋ/.
#
# Author: Richard Sproat
::Lower;
::NFC;
[’ [:P:]] → ;
# Class definitions
$end = [$ ];
# Both orthographic and phonetic vowels
$vowel = [aeiouwyâêîôûŵŷɑɨəɛɪɔʊ];
# W is a placeholder for the glide -- see below
$cons = [
m {m\u0325} n {n\u0325} ŋ {ŋ\u030A}
p b t d k ɡ
f v θ ð s ʃ h χ
l ɬ r {r\u0325}
{d\u0361ʒ} g W w j
];
# Preprocessing of letters that sometimes occur
k → c;
v → f;
x → s;
z → s;
::Null;
# Consonant transductions:
# Trigraphs
ngh → ŋ\u030A;
# Digraphs
ch → χ;
dd → ð;
ff → f;
ll → ɬ;
mh → m\u0325;
nh → n\u0325;
ng → ŋ;
ph → f;
rh → r\u0325;
th → θ;
# Monographs
b → b;
c → k;
d → d;
f → v;
g → ɡ;
h → h;
j → d\u0361ʒ; # Loan words
l → l;
m → m;
n → n;
p → p;
r → r;
s → s;
t → t;
::Null;
# Transduce ‹si› to /ʃ/ before vowels
si} $vowel → ʃ;
::Null;
# Treatment of glides.
# First transduce ‹i›, ‹w› to glides prior to vowels. With ‹w› we want to
# do this also before /r,l/ after /ɡ/ (from Proto-Celtic *w) e.g. “gwlad”,
# “gwraig”. However the “after g” environment must allow for the following
# possibilities:
#
# ɡ → ŋ via nasal mutation
# ɡ → 0 via soft mutation
{i} $vowel → j;
{w} $vowel → W; # Temporary register
[ɡŋ] {w} [rl] $vowel → W; # Plain or nasal mutation environment
^ {w} [rl] $vowel → W; # Soft mutation at the beginning of a word
# Transduce accented ‹ẃ› to ‹w›: this is used to indicate when a ‹w› that would
# normally be expected to be a glide, is instead a vowel:
ẃ → w;
::Null;
# Stress placement, needed for vowel quality/quantity prediction
# Basic rule of stress in Welsh is to place it on the penult,
# except of course in monosyllables.
{($vowel+ $cons+ $vowel+ $cons*)} $end → ˈ $1; ## Polysyllabic words
$end $cons* {($vowel+ $cons*)} $end → ˈ $1; ## Monosyllabic words
::Null;
# Transduction of vowels
# The first rule above overgenerates streams of stress marks. The rule below
# cleans that up.
ˈ+ → ˈ;
# Diphthongs
# Deal with ‹y› first since we also need to lengthen the /ɨ/ if that is in the
# correct environment for lengthening.
# ‹y› is /ɨ/ in final syllable, otherwise /ə/
yw } $cons* $end → ɨu;
yw → əu;
y} $cons* $end → ɨ;
y → ə;
::Null;
# Diphthongs in long environment
# Final, or before word-final s
ˈ { ɨu } s? $end → ɨːu;
ˈ { aw } s? $end → ɑːu;
ˈ { ew } s? $end → eːu;
ˈ { oe } s? $end → ɔːɨ;
ˈ { ou } s? $end → ɔːɨ;
ˈ { wy } s? $end → uːɨ;
# before b, ch, d, dd, g, f, ff, th followed by the end of a word
# or a vowel
ˈ { ɨu } [bχdðɡvfθ] $end → ɨːu;
ˈ { aw } [bχdðɡvfθ] $end → ɑːu;
ˈ { ew } [bχdðɡvfθ] $end → eːu;
ˈ { oe } [bχdðɡvfθ] $end → ɔːɨ;
ˈ { ou } [bχdðɡvfθ] $end → ɔːɨ;
ˈ { wy } [bχdðɡvfθ] $end → uːɨ;
ˈ { ɨu } [bχdðɡvfθ] $vowel → ɨːu;
ˈ { aw } [bχdðɡvfθ] $vowel → ɑːu;
ˈ { ew } [bχdðɡvfθ] $vowel → eːu;
ˈ { oe } [bχdðɡvfθ] $vowel → ɔːɨ;
ˈ { ou } [bχdðɡvfθ] $vowel → ɔːɨ;
ˈ { wy } [bχdðɡvfθ] $vowel → uːɨ;
# Diphthongs in other environments
ae → ɑːɨ;
ai → ai;
au → aɨ; ## As plural ending /a/, but we can't predict this
aw → au;
ei → əi;
eu → əɨ;
ew → ɛu;
ey → əɨ;
iw → ɪu;
oe → ɔɨ;
oi → ɔi;
ou → ɔɨ;
uw → ɨu;
wy → ʊɨ;
# Long environments
# Final, or before word-final s
ˈ { ɨ } s? $end → ɨː;
ˈ { a } s? $end → ɑː;
ˈ { e } s? $end → eː;
ˈ { i } s? $end → iː;
ˈ { o } s? $end → oː;
ˈ { u } s? $end → ɨː;
ˈ { w } s? $end → uː;
# before b, ch, d, dd, g, f, ff, th followed by the end of a word
# or a vowel
ˈ { ɨ } [bχdðɡvfθ] $end → ɨː;
ˈ { a } [bχdðɡvfθ] $end → ɑː;
ˈ { e } [bχdðɡvfθ] $end → eː;
ˈ { i } [bχdðɡvfθ] $end → iː;
ˈ { o } [bχdðɡvfθ] $end → oː;
ˈ { u } [bχdðɡvfθ] $end → ɨː;
ˈ { w } [bχdðɡvfθ] $end → uː;
ˈ { ɨ } [bχdðɡvfθ] $vowel → ɨː;
ˈ { a } [bχdðɡvfθ] $vowel → ɑː;
ˈ { e } [bχdðɡvfθ] $vowel → eː;
ˈ { i } [bχdðɡvfθ] $vowel → iː;
ˈ { o } [bχdðɡvfθ] $vowel → oː;
ˈ { u } [bχdðɡvfθ] $vowel → ɨː;
ˈ { w } [bχdðɡvfθ] $vowel → uː;
# Short environments
a → a;
e → ɛ;
i → ɪ;
o → ɔ;
u → ɨ\u031E;
w → ʊ;
::Null;
W → w;
# Finally, deal with vowels that are marked as long with a circumflex
# (“to bach”). Do this last because we don't want the other vowel
# changes messing this up.
â → ɑː;
ê → eː;
î → iː;
ô → oː;
û → ɨː;
ŵ → uː;
ŷ → ɨː;
::Null;
# Move IPA stress marker to start of syllable.
([$cons w] [l ɬ r {r\u0325}]? j? w?) ˈ → ˈ $1;