source/data/translit/t_Hira_Kana.txt - external/github.com/unicode-org/icu - Git at Google

  // -*- Coding: utf-8; -*-
 //--------------------------------------------------------------------
 // Copyright (c) 1999-2004, International Business Machines
 // Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
 // THIS IS A MACHINE-GENERATED FILE
 // Tool: dumpICUrules.bat
 // Source: C:\work\DevICU4J\icu4j\src\com\ibm\icu\impl\data/Transliterator_Hiragana_Katakana.txt
 // Date: Fri Mar  1 16:15:45 2002
 //--------------------------------------------------------------------

 // Hiragana_Katakana

 t_Hira_Kana {
   Rule {
 //--------------------------------------------------------------------
 //--------------------------------------------------------------------
 //--------------------------------------------------------------------

 // note: a global filter is more efficient, but MUST include all source chars
 ":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;"
 ":: NFKC ();"

 // Hiragana-Katakana

 // This is largely a one-to-one mapping, but it has a
 // few kinks:

 // 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
 // Hiragana equivalents.  We use Hiragana wa/wi/we/wo
 // (308F-3092) with a voicing mark (3099), which is
 // semantically equivalent.  However, this is a non-
 // roundtripping transformation.

 // 2. The Katakana small ka/ke (30F5,30F6) have no
 // Hiragana equiavlents.  We convert them to normal
 // Hiragana ka/ke (304B,3051).  This is a one-way
 // information-losing transformation and precludes
 // round-tripping of 30F5 and 30F6.

 // 3. The combining marks 3099-309C are in the Hiragana
 // block, but they apply to Katakana as well, so we
 // leave them untouched.

 // 4. The Katakana prolonged sound mark 30FC doubles the
 // preceding vowel.  This is a one-way information-
 // losing transformation from Katakana to Hiragana.

 // 5. The Katakana middle dot separates words in foreign
 // expressions; we leave this unmodified.

 // The above points preclude successful round-trip
 // transformations of arbitrary input text.  However,
 // they provide naturalistic results that should conform
 // to user expectations.


 // Combining equivalents va/vi/ve/vo
 "わ゙ <> ヷ;"
 "ゐ゙ <> ヸ;"
 "ゑ゙ <> ヹ;"
 "を゙ <> ヺ;"

 // One-to-one mappings, main block
 // 3041:3094 <> 30A1:30F4
 // 309D,E <> 30FD,E
 "ぁ <> ァ;"
 "あ <> ア;"
 "ぃ <> ィ;"
 "い <> イ;"
 "ぅ <> ゥ;"
 "う <> ウ;"
 "ぇ <> ェ;"
 "え <> エ;"
 "ぉ <> ォ;"
 "お <> オ;"
 "か <> カ;"
 "が <> ガ;"
 "き <> キ;"
 "ぎ <> ギ;"
 "く <> ク;"
 "ぐ <> グ;"
 "け <> ケ;"
 "げ <> ゲ;"
 "こ <> コ;"
 "ご <> ゴ;"
 "さ <> サ;"
 "ざ <> ザ;"
 "し <> シ;"
 "じ <> ジ;"
 "す <> ス;"
 "ず <> ズ;"
 "せ <> セ;"
 "ぜ <> ゼ;"
 "そ <> ソ;"
 "ぞ <> ゾ;"
 "た <> タ;"
 "だ <> ダ;"
 "ち <> チ;"
 "ぢ <> ヂ;"
 "っ <> ッ;"
 "つ <> ツ;"
 "づ <> ヅ;"
 "て <> テ;"
 "で <> デ;"
 "と <> ト;"
 "ど <> ド;"
 "な <> ナ;"
 "に <> ニ;"
 "ぬ <> ヌ;"
 "ね <> ネ;"
 "の <> ノ;"
 "は <> ハ;"
 "ば <> バ;"
 "ぱ <> パ;"
 "ひ <> ヒ;"
 "び <> ビ;"
 "ぴ <> ピ;"
 "ふ <> フ;"
 "ぶ <> ブ;"
 "ぷ <> プ;"
 "へ <> ヘ;"
 "べ <> ベ;"
 "ぺ <> ペ;"
 "ほ <> ホ;"
 "ぼ <> ボ;"
 "ぽ <> ポ;"
 "ま <> マ;"
 "み <> ミ;"
 "む <> ム;"
 "め <> メ;"
 "も <> モ;"
 "ゃ <> ャ;"
 "や <> ヤ;"
 "ゅ <> ュ;"
 "ゆ <> ユ;"
 "ょ <> ョ;"
 "よ <> ヨ;"
 "ら <> ラ;"
 "り <> リ;"
 "る <> ル;"
 "れ <> レ;"
 "ろ <> ロ;"
 "ゎ <> ヮ;"
 "わ <> ワ;"
 "ゐ <> ヰ;"
 "ゑ <> ヱ;"
 "を <> ヲ;"
 "ん <> ン;"
 "ゔ <> ヴ;"
 "ゝ <> ヽ;"
 "ゞ <> ヾ;"

 // One-way Katakana-Hiragana xform of small K ka/ke to
 // normal H ka/ke.
 "か < ヵ;"
 "け < ヶ;"

 // Katakana followed by a prolonged sound mark 30FC has
 // its final vowel doubled.  This is a Katakana-Hiragana
 // one-way information-losing transformation.  We
 // include the small Katakana (e.g., small A 3041) and
 // do not distinguish them from their large
 // counterparts.  It doesn't make sense to double a
 // small counterpart vowel as a small Hiragana vowel, so
 // we don't do so.  In natural text this should never
 // occur anyway.  If a 30FC is seen without a preceding
 // vowel sound (e.g., after n 30F3) we do not change it.

 //## $long = ー;

 // The following categories are Hiragana, not Katakana
 // as might be expected, since by the time we get to the
 // 30FC, the preceding character will have already been
 // transformed to Hiragana.

 // {The following mechanically generated from the
 // Unicode 3.0 data:}

 "$xa = ["
 "ぁ あ か が さ ざ"
 "た だ な は ば ぱ"
 "ま ゃ や ら ゎ わ"
 "];"

 "$xi = ["
 "ぃ い き ぎ し じ"
 "ち ぢ に ひ び ぴ"
 "み り ゐ"
 "];"

 "$xu = ["
 "ぅ う く ぐ す ず"
 "っ つ づ ぬ ふ ぶ"
 "ぷ む ゅ ゆ る ゔ"
 "];"

 "$xe = ["
 "ぇ え け げ せ ぜ"
 "て で ね へ べ ぺ"
 "め れ ゑ"
 "];"

 "$xo = ["
 "ぉ お こ ご そ ぞ"
 "と ど の ほ ぼ ぽ"
 "も ょ よ ろ を"
 "];"

 "あ < $xa {ー};"
 "い < $xi {ー};"
 "う < $xu {ー};"
 "え < $xe {ー};"
 "お < $xo {ー};"

 ":: (NFKC) ;"

 // note: a global filter is more efficient, but MUST include all source chars!!
 ":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);"

 // eof
   }
 }
	// -- Coding: utf-8; --
	//--------------------------------------------------------------------
	// Copyright (c) 1999-2004, International Business Machines
	// Corporation and others. All Rights Reserved.
	//--------------------------------------------------------------------
	// THIS IS A MACHINE-GENERATED FILE
	// Tool: dumpICUrules.bat
	// Source: C:\work\DevICU4J\icu4j\src\com\ibm\icu\impl\data/Transliterator_Hiragana_Katakana.txt
	// Date: Fri Mar 1 16:15:45 2002
	//--------------------------------------------------------------------

	// Hiragana_Katakana

	t_Hira_Kana {
	Rule {
	//--------------------------------------------------------------------
	//--------------------------------------------------------------------
	//--------------------------------------------------------------------

	// note: a global filter is more efficient, but MUST include all source chars
	":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;"
	":: NFKC ();"

	// Hiragana-Katakana

	// This is largely a one-to-one mapping, but it has a
	// few kinks:

	// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
	// Hiragana equivalents. We use Hiragana wa/wi/we/wo
	// (308F-3092) with a voicing mark (3099), which is
	// semantically equivalent. However, this is a non-
	// roundtripping transformation.

	// 2. The Katakana small ka/ke (30F5,30F6) have no
	// Hiragana equiavlents. We convert them to normal
	// Hiragana ka/ke (304B,3051). This is a one-way
	// information-losing transformation and precludes
	// round-tripping of 30F5 and 30F6.

	// 3. The combining marks 3099-309C are in the Hiragana
	// block, but they apply to Katakana as well, so we
	// leave them untouched.

	// 4. The Katakana prolonged sound mark 30FC doubles the
	// preceding vowel. This is a one-way information-
	// losing transformation from Katakana to Hiragana.

	// 5. The Katakana middle dot separates words in foreign
	// expressions; we leave this unmodified.

	// The above points preclude successful round-trip
	// transformations of arbitrary input text. However,
	// they provide naturalistic results that should conform
	// to user expectations.


	// Combining equivalents va/vi/ve/vo
	"わ゙ <> ヷ;"
	"ゐ゙ <> ヸ;"
	"ゑ゙ <> ヹ;"
	"を゙ <> ヺ;"

	// One-to-one mappings, main block
	// 3041:3094 <> 30A1:30F4
	// 309D,E <> 30FD,E
	"ぁ <> ァ;"
	"あ <> ア;"
	"ぃ <> ィ;"
	"い <> イ;"
	"ぅ <> ゥ;"
	"う <> ウ;"
	"ぇ <> ェ;"
	"え <> エ;"
	"ぉ <> ォ;"
	"お <> オ;"
	"か <> カ;"
	"が <> ガ;"
	"き <> キ;"
	"ぎ <> ギ;"
	"く <> ク;"
	"ぐ <> グ;"
	"け <> ケ;"
	"げ <> ゲ;"
	"こ <> コ;"
	"ご <> ゴ;"
	"さ <> サ;"
	"ざ <> ザ;"
	"し <> シ;"
	"じ <> ジ;"
	"す <> ス;"
	"ず <> ズ;"
	"せ <> セ;"
	"ぜ <> ゼ;"
	"そ <> ソ;"
	"ぞ <> ゾ;"
	"た <> タ;"
	"だ <> ダ;"
	"ち <> チ;"
	"ぢ <> ヂ;"
	"っ <> ッ;"
	"つ <> ツ;"
	"づ <> ヅ;"
	"て <> テ;"
	"で <> デ;"
	"と <> ト;"
	"ど <> ド;"
	"な <> ナ;"
	"に <> ニ;"
	"ぬ <> ヌ;"
	"ね <> ネ;"
	"の <> ノ;"
	"は <> ハ;"
	"ば <> バ;"
	"ぱ <> パ;"
	"ひ <> ヒ;"
	"び <> ビ;"
	"ぴ <> ピ;"
	"ふ <> フ;"
	"ぶ <> ブ;"
	"ぷ <> プ;"
	"へ <> ヘ;"
	"べ <> ベ;"
	"ぺ <> ペ;"
	"ほ <> ホ;"
	"ぼ <> ボ;"
	"ぽ <> ポ;"
	"ま <> マ;"
	"み <> ミ;"
	"む <> ム;"
	"め <> メ;"
	"も <> モ;"
	"ゃ <> ャ;"
	"や <> ヤ;"
	"ゅ <> ュ;"
	"ゆ <> ユ;"
	"ょ <> ョ;"
	"よ <> ヨ;"
	"ら <> ラ;"
	"り <> リ;"
	"る <> ル;"
	"れ <> レ;"
	"ろ <> ロ;"
	"ゎ <> ヮ;"
	"わ <> ワ;"
	"ゐ <> ヰ;"
	"ゑ <> ヱ;"
	"を <> ヲ;"
	"ん <> ン;"
	"ゔ <> ヴ;"
	"ゝ <> ヽ;"
	"ゞ <> ヾ;"

	// One-way Katakana-Hiragana xform of small K ka/ke to
	// normal H ka/ke.
	"か < ヵ;"
	"け < ヶ;"

	// Katakana followed by a prolonged sound mark 30FC has
	// its final vowel doubled. This is a Katakana-Hiragana
	// one-way information-losing transformation. We
	// include the small Katakana (e.g., small A 3041) and
	// do not distinguish them from their large
	// counterparts. It doesn't make sense to double a
	// small counterpart vowel as a small Hiragana vowel, so
	// we don't do so. In natural text this should never
	// occur anyway. If a 30FC is seen without a preceding
	// vowel sound (e.g., after n 30F3) we do not change it.

	//## $long = ー;

	// The following categories are Hiragana, not Katakana
	// as might be expected, since by the time we get to the
	// 30FC, the preceding character will have already been
	// transformed to Hiragana.

	// {The following mechanically generated from the
	// Unicode 3.0 data:}

	"$xa = ["
	"ぁあかがさざ"
	"ただなはばぱ"
	"まゃやらゎわ"
	"];"

	"$xi = ["
	"ぃいきぎしじ"
	"ちぢにひびぴ"
	"みりゐ"
	"];"

	"$xu = ["
	"ぅうくぐすず"
	"っつづぬふぶ"
	"ぷむゅゆるゔ"
	"];"

	"$xe = ["
	"ぇえけげせぜ"
	"てでねへべぺ"
	"めれゑ"
	"];"

	"$xo = ["
	"ぉおこごそぞ"
	"とどのほぼぽ"
	"もょよろを"
	"];"

	"あ < $xa {ー};"
	"い < $xi {ー};"
	"う < $xu {ー};"
	"え < $xe {ー};"
	"お < $xo {ー};"

	":: (NFKC) ;"

	// note: a global filter is more efficient, but MUST include all source chars!!
	":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);"

	// eof
	}
	}