|  // -*- Coding: utf-8; -*- | 
 | //-------------------------------------------------------------------- | 
 | // Copyright (c) 1999-2002, International Business Machines | 
 | // Corporation and others.  All Rights Reserved. | 
 | //-------------------------------------------------------------------- | 
 | // THIS IS A MACHINE-GENERATED FILE | 
 | // Tool: dumpicurules.bat | 
 | // Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt | 
 | // Date: Sat Jul 27 10:31:07 2002 | 
 | //-------------------------------------------------------------------- | 
 |  | 
 | // Hiragana_Katakana | 
 |  | 
 | t_Hira_Kana { | 
 |   Rule { | 
 | //-------------------------------------------------------------------- | 
 | //-------------------------------------------------------------------- | 
 | //-------------------------------------------------------------------- | 
 |  | 
 | // note: a global filter is more efficient, but MUST include all source chars | 
 | ":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;" | 
 | ":: NFKC ();" | 
 |  | 
 | // Hiragana-Katakana | 
 |  | 
 | // This is largely a one-to-one mapping, but it has a | 
 | // few kinks: | 
 |  | 
 | // 1. The Katakana va/vi/ve/vo (30F7-30FA) have no | 
 | // Hiragana equivalents.  We use Hiragana wa/wi/we/wo | 
 | // (308F-3092) with a voicing mark (3099), which is | 
 | // semantically equivalent.  However, this is a non- | 
 | // roundtripping transformation. | 
 |  | 
 | // 2. The Katakana small ka/ke (30F5,30F6) have no | 
 | // Hiragana equiavlents.  We convert them to normal | 
 | // Hiragana ka/ke (304B,3051).  This is a one-way | 
 | // information-losing transformation and precludes | 
 | // round-tripping of 30F5 and 30F6. | 
 |  | 
 | // 3. The combining marks 3099-309C are in the Hiragana | 
 | // block, but they apply to Katakana as well, so we | 
 | // leave them untouched. | 
 |  | 
 | // 4. The Katakana prolonged sound mark 30FC doubles the | 
 | // preceding vowel.  This is a one-way information- | 
 | // losing transformation from Katakana to Hiragana. | 
 |  | 
 | // 5. The Katakana middle dot separates words in foreign | 
 | // expressions; we leave this unmodified. | 
 |  | 
 | // The above points preclude successful round-trip | 
 | // transformations of arbitrary input text.  However, | 
 | // they provide naturalistic results that should conform | 
 | // to user expectations. | 
 |  | 
 |  | 
 | // Combining equivalents va/vi/ve/vo | 
 | "わ゙ <> ヷ;" | 
 | "ゐ゙ <> ヸ;" | 
 | "ゑ゙ <> ヹ;" | 
 | "を゙ <> ヺ;" | 
 |  | 
 | // One-to-one mappings, main block | 
 | // 3041:3094 <> 30A1:30F4 | 
 | // 309D,E <> 30FD,E | 
 | "ぁ <> ァ;" | 
 | "あ <> ア;" | 
 | "ぃ <> ィ;" | 
 | "い <> イ;" | 
 | "ぅ <> ゥ;" | 
 | "う <> ウ;" | 
 | "ぇ <> ェ;" | 
 | "え <> エ;" | 
 | "ぉ <> ォ;" | 
 | "お <> オ;" | 
 | "か <> カ;" | 
 | "が <> ガ;" | 
 | "き <> キ;" | 
 | "ぎ <> ギ;" | 
 | "く <> ク;" | 
 | "ぐ <> グ;" | 
 | "け <> ケ;" | 
 | "げ <> ゲ;" | 
 | "こ <> コ;" | 
 | "ご <> ゴ;" | 
 | "さ <> サ;" | 
 | "ざ <> ザ;" | 
 | "し <> シ;" | 
 | "じ <> ジ;" | 
 | "す <> ス;" | 
 | "ず <> ズ;" | 
 | "せ <> セ;" | 
 | "ぜ <> ゼ;" | 
 | "そ <> ソ;" | 
 | "ぞ <> ゾ;" | 
 | "た <> タ;" | 
 | "だ <> ダ;" | 
 | "ち <> チ;" | 
 | "ぢ <> ヂ;" | 
 | "っ <> ッ;" | 
 | "つ <> ツ;" | 
 | "づ <> ヅ;" | 
 | "て <> テ;" | 
 | "で <> デ;" | 
 | "と <> ト;" | 
 | "ど <> ド;" | 
 | "な <> ナ;" | 
 | "に <> ニ;" | 
 | "ぬ <> ヌ;" | 
 | "ね <> ネ;" | 
 | "の <> ノ;" | 
 | "は <> ハ;" | 
 | "ば <> バ;" | 
 | "ぱ <> パ;" | 
 | "ひ <> ヒ;" | 
 | "び <> ビ;" | 
 | "ぴ <> ピ;" | 
 | "ふ <> フ;" | 
 | "ぶ <> ブ;" | 
 | "ぷ <> プ;" | 
 | "へ <> ヘ;" | 
 | "べ <> ベ;" | 
 | "ぺ <> ペ;" | 
 | "ほ <> ホ;" | 
 | "ぼ <> ボ;" | 
 | "ぽ <> ポ;" | 
 | "ま <> マ;" | 
 | "み <> ミ;" | 
 | "む <> ム;" | 
 | "め <> メ;" | 
 | "も <> モ;" | 
 | "ゃ <> ャ;" | 
 | "や <> ヤ;" | 
 | "ゅ <> ュ;" | 
 | "ゆ <> ユ;" | 
 | "ょ <> ョ;" | 
 | "よ <> ヨ;" | 
 | "ら <> ラ;" | 
 | "り <> リ;" | 
 | "る <> ル;" | 
 | "れ <> レ;" | 
 | "ろ <> ロ;" | 
 | "ゎ <> ヮ;" | 
 | "わ <> ワ;" | 
 | "ゐ <> ヰ;" | 
 | "ゑ <> ヱ;" | 
 | "を <> ヲ;" | 
 | "ん <> ン;" | 
 | "ゔ <> ヴ;" | 
 | "ゝ <> ヽ;" | 
 | "ゞ <> ヾ;" | 
 |  | 
 | // One-way Katakana-Hiragana xform of small K ka/ke to | 
 | // normal H ka/ke. | 
 | "か < ヵ;" | 
 | "け < ヶ;" | 
 |  | 
 | // Katakana followed by a prolonged sound mark 30FC has | 
 | // its final vowel doubled.  This is a Katakana-Hiragana | 
 | // one-way information-losing transformation.  We | 
 | // include the small Katakana (e.g., small A 3041) and | 
 | // do not distinguish them from their large | 
 | // counterparts.  It doesn't make sense to double a | 
 | // small counterpart vowel as a small Hiragana vowel, so | 
 | // we don't do so.  In natural text this should never | 
 | // occur anyway.  If a 30FC is seen without a preceding | 
 | // vowel sound (e.g., after n 30F3) we do not change it. | 
 |  | 
 | //## $long = ー; | 
 |  | 
 | // The following categories are Hiragana, not Katakana | 
 | // as might be expected, since by the time we get to the | 
 | // 30FC, the preceding character will have already been | 
 | // transformed to Hiragana. | 
 |  | 
 | // {The following mechanically generated from the | 
 | // Unicode 3.0 data:} | 
 |  | 
 | "$xa = ["  | 
 | "ぁ あ か が さ ざ"  | 
 | "た だ な は ば ぱ"  | 
 | "ま ゃ や ら ゎ わ"  | 
 | "];" | 
 |  | 
 | "$xi = ["  | 
 | "ぃ い き ぎ し じ"  | 
 | "ち ぢ に ひ び ぴ"  | 
 | "み り ゐ"  | 
 | "];" | 
 |  | 
 | "$xu = ["  | 
 | "ぅ う く ぐ す ず"  | 
 | "っ つ づ ぬ ふ ぶ"  | 
 | "ぷ む ゅ ゆ る ゔ"  | 
 | "];" | 
 |  | 
 | "$xe = ["  | 
 | "ぇ え け げ せ ぜ"  | 
 | "て で ね へ べ ぺ"  | 
 | "め れ ゑ"  | 
 | "];" | 
 |  | 
 | "$xo = ["  | 
 | "ぉ お こ ご そ ぞ"  | 
 | "と ど の ほ ぼ ぽ"  | 
 | "も ょ よ ろ を"  | 
 | "];" | 
 |  | 
 | "あ < $xa {ー};" | 
 | "い < $xi {ー};" | 
 | "う < $xu {ー};" | 
 | "え < $xe {ー};" | 
 | "お < $xo {ー};" | 
 |  | 
 | ":: (NFKC) ;" | 
 |  | 
 | // note: a global filter is more efficient, but MUST include all source chars!! | 
 | ":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);" | 
 |  | 
 | // eof | 
 |   } | 
 | } |