| // -*- Coding: utf-8; -*- |
| //-------------------------------------------------------------------- |
| // Copyright (c) 1999-2004, International Business Machines |
| // Corporation and others. All Rights Reserved. |
| //-------------------------------------------------------------------- |
| // THIS IS A MACHINE-GENERATED FILE |
| // Tool: dumpICUrules.bat |
| // Source: C:\work\DevICU4J\icu4j\src\com\ibm\icu\impl\data/Transliterator_Hiragana_Katakana.txt |
| // Date: Fri Mar 1 16:15:45 2002 |
| //-------------------------------------------------------------------- |
| |
| // Hiragana_Katakana |
| |
| t_Hira_Kana { |
| Rule { |
| //-------------------------------------------------------------------- |
| //-------------------------------------------------------------------- |
| //-------------------------------------------------------------------- |
| |
| // note: a global filter is more efficient, but MUST include all source chars |
| ":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;" |
| ":: NFKC ();" |
| |
| // Hiragana-Katakana |
| |
| // This is largely a one-to-one mapping, but it has a |
| // few kinks: |
| |
| // 1. The Katakana va/vi/ve/vo (30F7-30FA) have no |
| // Hiragana equivalents. We use Hiragana wa/wi/we/wo |
| // (308F-3092) with a voicing mark (3099), which is |
| // semantically equivalent. However, this is a non- |
| // roundtripping transformation. |
| |
| // 2. The Katakana small ka/ke (30F5,30F6) have no |
| // Hiragana equiavlents. We convert them to normal |
| // Hiragana ka/ke (304B,3051). This is a one-way |
| // information-losing transformation and precludes |
| // round-tripping of 30F5 and 30F6. |
| |
| // 3. The combining marks 3099-309C are in the Hiragana |
| // block, but they apply to Katakana as well, so we |
| // leave them untouched. |
| |
| // 4. The Katakana prolonged sound mark 30FC doubles the |
| // preceding vowel. This is a one-way information- |
| // losing transformation from Katakana to Hiragana. |
| |
| // 5. The Katakana middle dot separates words in foreign |
| // expressions; we leave this unmodified. |
| |
| // The above points preclude successful round-trip |
| // transformations of arbitrary input text. However, |
| // they provide naturalistic results that should conform |
| // to user expectations. |
| |
| |
| // Combining equivalents va/vi/ve/vo |
| "わ゙ <> ヷ;" |
| "ゐ゙ <> ヸ;" |
| "ゑ゙ <> ヹ;" |
| "を゙ <> ヺ;" |
| |
| // One-to-one mappings, main block |
| // 3041:3094 <> 30A1:30F4 |
| // 309D,E <> 30FD,E |
| "ぁ <> ァ;" |
| "あ <> ア;" |
| "ぃ <> ィ;" |
| "い <> イ;" |
| "ぅ <> ゥ;" |
| "う <> ウ;" |
| "ぇ <> ェ;" |
| "え <> エ;" |
| "ぉ <> ォ;" |
| "お <> オ;" |
| "か <> カ;" |
| "が <> ガ;" |
| "き <> キ;" |
| "ぎ <> ギ;" |
| "く <> ク;" |
| "ぐ <> グ;" |
| "け <> ケ;" |
| "げ <> ゲ;" |
| "こ <> コ;" |
| "ご <> ゴ;" |
| "さ <> サ;" |
| "ざ <> ザ;" |
| "し <> シ;" |
| "じ <> ジ;" |
| "す <> ス;" |
| "ず <> ズ;" |
| "せ <> セ;" |
| "ぜ <> ゼ;" |
| "そ <> ソ;" |
| "ぞ <> ゾ;" |
| "た <> タ;" |
| "だ <> ダ;" |
| "ち <> チ;" |
| "ぢ <> ヂ;" |
| "っ <> ッ;" |
| "つ <> ツ;" |
| "づ <> ヅ;" |
| "て <> テ;" |
| "で <> デ;" |
| "と <> ト;" |
| "ど <> ド;" |
| "な <> ナ;" |
| "に <> ニ;" |
| "ぬ <> ヌ;" |
| "ね <> ネ;" |
| "の <> ノ;" |
| "は <> ハ;" |
| "ば <> バ;" |
| "ぱ <> パ;" |
| "ひ <> ヒ;" |
| "び <> ビ;" |
| "ぴ <> ピ;" |
| "ふ <> フ;" |
| "ぶ <> ブ;" |
| "ぷ <> プ;" |
| "へ <> ヘ;" |
| "べ <> ベ;" |
| "ぺ <> ペ;" |
| "ほ <> ホ;" |
| "ぼ <> ボ;" |
| "ぽ <> ポ;" |
| "ま <> マ;" |
| "み <> ミ;" |
| "む <> ム;" |
| "め <> メ;" |
| "も <> モ;" |
| "ゃ <> ャ;" |
| "や <> ヤ;" |
| "ゅ <> ュ;" |
| "ゆ <> ユ;" |
| "ょ <> ョ;" |
| "よ <> ヨ;" |
| "ら <> ラ;" |
| "り <> リ;" |
| "る <> ル;" |
| "れ <> レ;" |
| "ろ <> ロ;" |
| "ゎ <> ヮ;" |
| "わ <> ワ;" |
| "ゐ <> ヰ;" |
| "ゑ <> ヱ;" |
| "を <> ヲ;" |
| "ん <> ン;" |
| "ゔ <> ヴ;" |
| "ゝ <> ヽ;" |
| "ゞ <> ヾ;" |
| |
| // One-way Katakana-Hiragana xform of small K ka/ke to |
| // normal H ka/ke. |
| "か < ヵ;" |
| "け < ヶ;" |
| |
| // Katakana followed by a prolonged sound mark 30FC has |
| // its final vowel doubled. This is a Katakana-Hiragana |
| // one-way information-losing transformation. We |
| // include the small Katakana (e.g., small A 3041) and |
| // do not distinguish them from their large |
| // counterparts. It doesn't make sense to double a |
| // small counterpart vowel as a small Hiragana vowel, so |
| // we don't do so. In natural text this should never |
| // occur anyway. If a 30FC is seen without a preceding |
| // vowel sound (e.g., after n 30F3) we do not change it. |
| |
| //## $long = ー; |
| |
| // The following categories are Hiragana, not Katakana |
| // as might be expected, since by the time we get to the |
| // 30FC, the preceding character will have already been |
| // transformed to Hiragana. |
| |
| // {The following mechanically generated from the |
| // Unicode 3.0 data:} |
| |
| "$xa = [" |
| "ぁ あ か が さ ざ" |
| "た だ な は ば ぱ" |
| "ま ゃ や ら ゎ わ" |
| "];" |
| |
| "$xi = [" |
| "ぃ い き ぎ し じ" |
| "ち ぢ に ひ び ぴ" |
| "み り ゐ" |
| "];" |
| |
| "$xu = [" |
| "ぅ う く ぐ す ず" |
| "っ つ づ ぬ ふ ぶ" |
| "ぷ む ゅ ゆ る ゔ" |
| "];" |
| |
| "$xe = [" |
| "ぇ え け げ せ ぜ" |
| "て で ね へ べ ぺ" |
| "め れ ゑ" |
| "];" |
| |
| "$xo = [" |
| "ぉ お こ ご そ ぞ" |
| "と ど の ほ ぼ ぽ" |
| "も ょ よ ろ を" |
| "];" |
| |
| "あ < $xa {ー};" |
| "い < $xi {ー};" |
| "う < $xu {ー};" |
| "え < $xe {ー};" |
| "お < $xo {ー};" |
| |
| ":: (NFKC) ;" |
| |
| // note: a global filter is more efficient, but MUST include all source chars!! |
| ":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);" |
| |
| // eof |
| } |
| } |