blob: 1796fe63ea7a43f46a7cad99930d863463456bfb [file] [log] [blame]
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/Transliterator_Latin_Katakana.txt,v $
# $Date: 2001/10/20 01:33:44 $
# $Revision: 1.5 $
#--------------------------------------------------------------------
::NFD (NFC) ;
:: [:Latin:] Lower ();
# Uses modified Hepburn. Small changes to make unambiguous.
# | Kunrei-shiki: Hepburn/MHepburn
# | ------------------------------
# | si: shi
# | si ~ya: sha
# | si ~yu: shu
# | si ~yo: sho
# | zi: ji
# | zi ~ya: ja
# | zi ~yu: ju
# | zi ~yo: jo
# | ti: chi
# | ti ~ya: cha
# | ti ~yu: chu
# | ti ~yu: cho
# | tu: tsu
# | di: ji/dji
# | du: zu/dzu
# | hu: fu
# | For foreign words:
# | -----------------
# | se ~i si
# | si ~e she
# |
# | ze ~i zi
# | zi ~e je
# |
# | te ~i ti
# | ti ~e che
# | te ~u tu
# |
# | de ~i di
# | de ~u du
# | de ~i di
# |
# | he ~u: hu
# | hu ~a fa
# | hu ~i fi
# | hu ~e he
# | hu ~o ho
# Most small forms are generated, but if necessary
# explicit small forms are given with ~a, ~ya, etc.
#------------------------------------------------------
# Variables
$vowel = [aeiou] ;
$macron = \u0304 ;
# Variables used for doubled-consonants with tsu
$kana = [\u3041-\u3094] ;
$voice = [\u3099\u309B];
$semivoice = [\u309A\u309C];
$k_start = [カキクケコかきくけこ] ;
$s_start = [サシスセソさしすせそ] ;
$j_start = [シし] $voice ;
$t_start = [タチツテトたちつてと] ;
$n_start = [ナニヌネノンなにぬねの] ;
$h_start = [ハヒヘホはひへほ] ;
$f_start = [フふ] ;
$m_start = [マミムメモまみむめも] ;
$y_start = [ヤユヨやゆよ] ;
$r_start = [ラリルレロらりるれろ] ;
$w_start = [ワヰヱヲわゐゑを] ;
$v_start = [ワヰヱヲ]゙ ;
# if ン is followed by $n_quoter, then it needs an
# apostrophe after its romaji form to disambiguate it.
# e.g., ン ア ! = ナ, so represent as "n'a", not "na".
$n_quoter = [ア ン] ;
$small_y = [ャィュェョ] ;
$iteration = \u309D ;
#------------------------------------------------------
# katakana rules
# Punctuation
'.' <> 。;
',' <> 、;
# ' ' } [a-z] > ; # delete spaces before latin
# ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
# Iteration Mark
# Copy previous letter & marks
# TODO
# | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
# Specials for katakana -- not shared with hiragana
va <> ヷ ;
vi <> ヸ ;
ve <> ヹ ;
vo <> ヺ ;
'~ka' <> ;
'~ke' <> ;
# ~~~ begin shared rules ~~~
#special
ya < '~'ャ;
yi < '~' ;
yu < '~'ュ;
ye < '~'ェ;
yo < '~'ョ;
#normal
a <> ;
b | '~' < ゙} $small_y ;
by } $vowel > ビ | '~y' ;
ba <> バ ;
bi <> ビ ;
bu <> ブ ;
be <> ベ ;
bo <> ボ ;
c } i > | s ;
c } e > | s ;
da <> ダ ;
di <> ディ ;
du <> デゥ ;
de <> デ ;
do <> ド ;
dzu <> ヅ ;
dja < ヂャ ;
dji'~i' < ヂィ ; # liu
dju < ヂュ ;
dje < ヂェ ;
djo < ヂョ ;
dji <> ヂ ;
dj } $vowel > ヂ | '~y' ;
# TODO: QUESTION: use ĵĴżŻ instead of dj, dz
cha < チャ ;
chi'~i' < チィ ; # liu
chu < チュ ;
che < チェ ;
cho < チョ ;
chi <> ;
ch } $vowel > | '~y' ;
e <> ;
g | '~' < ギ} $small_y ;
gy } $vowel > ギ | '~y' ;
ga <> ガ ;
gi <> ギ ;
gu <> グ ;
ge <> ゲ ;
go <> ゴ ;
i <> ;
# j } $vowel > ジ | '~y' ;
ja <> ジャ ;
ji'~i' < ジィ ; # liu
ju <> ジュ ;
je <> ジェ ;
jo <> ジョ ;
ji <> ジ ;
k | '~' < キ} $small_y ;
ky } $vowel > | '~y' ;
ka <> ;
ki <> ;
ku <> ;
ke <> ;
ko <> ;
m | '~' < ミ} $small_y ;
my } $vowel > | '~y' ;
ma <> ;
mi <> ;
mu <> ;
me <> ;
mo <> ;
m } [pbfv] > ;
n | '~' < } $small_y ;
ny } $vowel > | '~y' ;
na <> ;
ni <> ;
nu <> ;
ne <> ;
no <> ;
o <> ;
p | '~' < ピ } $small_y ;
py } $vowel > ピ | '~y' ;
pa <> パ ;
pi <> ピ ;
pu <> プ ;
pe <> ペ ;
po <> ポ ;
h | '~' < } $small_y ;
hy } $vowel > | '~y' ;
ha <> ;
hi <> ;
hu <> ヘゥ ;
he <> ;
ho <> ;
# f | '~' < フ } $small_y ;
# f } $vowel > フ | '~' ;
fa <> ファ ;
fi <> フィ ;
fe <> フェ ;
fo <> フォ ;
fu <> ;
r | '~' < } $small_y ;
ry } $vowel > | '~y' ;
ra <> ;
ri <> ;
ru <> ;
re <> ;
ro <> ;
za <> ザ ;
zi <> ゼィ ;
zu <> ズ ;
ze <> ゼ ;
zo <> ゾ ;
sa <> ;
si <> セィ ;
su <> ;
se <> ;
so <> ;
sha < シャ ;
shi'~i' < シィ ; # liu
shu < シュ ;
she < シェ ;
sho < ショ ;
shi <> ;
sh } $vowel > | '~y' ;
ta <> ;
ti <> ティ ;
tu <> テゥ ;
te <> ;
to <> ;
tsu <> ;
# v } $vowel > ヴ | '~' ;
#'v~a' < ヴァ ; # liu
#'v~i' < ヴィ ; # liu
#'v~e' < ヴェ ; # liu
#'v~o' < ヴォ ; # liu
vu <> ヴ ;
u <> ;
# w } $vowel > ウ | '~' ;
wa <> ;
wi <> ;
wu > ;
we <> ;
wo <> ;
ya <> ;
yi > ;
yu <> ;
ye > ;
yo <> ;
# double consonants
#specials
s } sh > ;
t } ch > ;
#voiced
j } j <> } $j_start ;
b } b <> } [$h_start$f_start] $voice;
d } d <> } $t_start $voice;
g } g <> } $k_start $voice;
p } p <> } [$h_start$f_start] $semivoice;
# v } v <> ッ } [ワヰウヱヲう] $voice ;
z } z <> } $s_start $voice;
v } v <> } $v_start;
# normal
k } k <> } $k_start ;
m } m <> } $m_start ;
n } n <> } $n_start ;
h } h <> } $h_start ;
f } f <> } $f_start ;
r } r <> } $r_start ;
t } t <> } $t_start ;
s } s <> } $s_start ;
w } w <> } $w_start;
y } y <> } $y_start;
# completeness
x } x > ;
c } k > ;
c } c > ;
c } q > ;
l } l > ;
q } q > ;
# y } y > ッ ;
# w } w > ッ ;
# prolonged vowel mark. this indicates a doubling of
# the preceding vowel sound
#a < a { ー ; # liu
#e < e { ー ; # liu
#i < i { ー ; # liu
#o < o { ー ; # liu
#u < u { ー ; # liu
$macron <> ;
# small forms
'~a' <> ;
'~i' <> ;
'~u' <> ;
'~e' <> ;
'~o' <> ;
'~tsu' <> ;
'~wa' <> ;
'~ya' <> ;
'~yi' > ;
'~yu' <> ;
'~ye' > ;
'~yo' <> ;
# h- rule: lengthens vowel if not followed by a vowel
[aeiou] } h > ;
# one-way latin- > kana rules. these do not occur in
# well-formed romaji representing actual japanese text.
# their purpose is to make all romaji map to kana of
# some sort.
# the following are not really necessary, but produce
# slightly more natural results.
cy > セィ ;
dy > ディ ;
hy > ;
sy > セィ ;
ty > ティ ;
zy > ゼィ ;
h > ;
# isolated consonants listed here so as not to mask
# longer rules above.
ch > チ;
sh > ;
dz > ヅ ;
dj > ヂ;
b > ブ ;
d > デ ;
g > グ ;
k > ;
m > ;
n'' < } $n_quoter ;
n <> ;
p > プ ;
r > ;
s > ;
t > ;
y > ;
z > ズ ;
v > ヴ ;
f > フ;
j > ジ;
w > ウ;
# simple substitutions using backup
c > | k ;
l > | r ;
q > | k ;
x > | ks ;
# ~~~ END shared rules ~~~
#------------------------------------------------------
# Final cleanup
'~' > ; # delete stray tildes between letters
[:Katakana:] { '' } [:Latin:] > ; # delete stray quotes between letters
:: NFC (NFD) ;
# eof