| /* |
| ******************************************************************************* |
| * Copyright (C) 1996-2000, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| * |
| * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/resources/Attic/TransliterationRule_Latin_Jamo.java,v $ |
| * $Date: 2000/03/10 04:07:31 $ |
| * $Revision: 1.2 $ |
| * |
| ***************************************************************************************** |
| */ |
| package com.ibm.text.resources; |
| import java.util.ListResourceBundle; |
| |
| public class TransliterationRule_Latin_Jamo extends ListResourceBundle { |
| /** |
| * Overrides ListResourceBundle |
| */ |
| public Object[][] getContents() { |
| return new Object[][] { |
| { "Rule", "" |
| |
| // VARIABLES |
| |
| + "initial=[\u1100-\u115F];" |
| + "medial=[\u1160-\u11A7];" |
| + "final=[\u11A8-\u11F9];" // added - aliu |
| + "vowel=[aeiouwyAEIOUWY\u1160-\u11A7];" |
| + "consonant=[bcdfghjklmnpqrstvxzBCDFGHJKLMNPQRSTVXZ{medial}{final}];" |
| + "ye=[yeYE];" |
| + "ywe=[yweYWE];" |
| + "yw=[ywYW];" |
| + "nl=[nlNL];" |
| + "gnl=[gnlGNL];" |
| + "lsgb=[lsgbLSGB];" |
| + "ywao=[ywaoYWAO];" |
| + "bl=[blBL];" |
| |
| // RULES |
| |
| // Hangul structure is IMF or IM |
| // So you can have, because of adjacent sequences |
| // IM, but not II or IF |
| // MF or MI, but not MM |
| // FI, but not FF or FM |
| |
| // For English, we just have C or V. |
| // To generate valid Hangul: |
| // Vowels: |
| // We insert IEUNG between VV, and otherwise map V to M |
| // We also insert IEUNG if there is no |
| // Consonants: |
| // We don't break doubles |
| // Cases like lmgg, we have to break at lm |
| // So to guess whether a consonant is I or F |
| // we map all C's to F, except when followed by a vowel, e.g. |
| // X[{vowel}>CHOSEONG (initial) |
| // X>JONGSEONG (final) |
| |
| // special insertion for funny sequences of vowels, and for empty consonant |
| |
| + "'' < ({consonant}) \u110B;" // insert a break between any consonant and the empty consonant. |
| + "({medial}) ({vowel}) <> \u110B;" // HANGUL CHOSEONG IEUNG |
| |
| // Below, insert an empty consonant in front of a vowel, if there is no Initial in front. |
| |
| // Fix casing. |
| // Because Korean is caseless, we just want to treat everything as |
| // lowercase. |
| // we could do this by always preceeding this transliterator with |
| // an upper-lowercase transformation, but that wouldn't invert nicely. |
| // We use the "revisit" syntax to just convert latin to latin |
| // so that we can avoid |
| // having to restate all the Latin=>Jamo rules, with the I/F handling. |
| |
| // We don't have to add titlecase, since that will be picked up |
| // since the first letter is converted, then revisited. E.g. |
| // |Gg => |gg => {sang kiyeok} |
| // We do have to have all caps, since otherwise we could get: |
| // |GG => |gG => {kiyeok}|G => {kiyeok}|g => {kiyeok}{kiyeok} |
| |
| + "Z > |z;" |
| + "YU > |yu;" |
| + "YO > |yo;" |
| + "YI > |yi;" |
| + "YEO > |yeo;" |
| + "YE > |ye;" |
| + "YAE > |yae;" |
| + "YA > |ya;" |
| + "Y > |y;" |
| + "WI > |wi;" |
| + "WEO > |weo;" |
| + "WE > |we;" |
| + "WAE > |wae;" |
| + "WA > |wa;" |
| + "W > |w;" |
| + "U > |u;" |
| + "T > |t;" |
| + "SS > |ss;" |
| + "S > |s;" |
| + "P > |p;" |
| + "OE > |oe;" |
| + "O > |o;" |
| + "NJ > |nj;" |
| + "NH > |nh;" |
| + "NG > |ng;" |
| + "N > |n;" |
| + "M > |m;" |
| + "LT > |lt;" |
| + "LS > |ls;" |
| + "LP > |lp;" |
| + "LM > |lm;" |
| + "LH > |lh;" |
| + "LG > |lg;" |
| + "LB > |lb;" |
| + "L > |l;" |
| + "K > |k;" |
| + "JJ > |jj;" |
| + "J > |j;" |
| + "I > |i;" |
| + "H > |h;" |
| + "GS > |gs;" |
| + "GG > |gg;" |
| + "G > |g;" |
| + "EU > |eu;" |
| + "EO > |eo;" |
| + "E > |e;" |
| + "DD > |dd;" |
| + "D > |d;" |
| + "BS > |bs;" |
| + "BB > |bb;" |
| + "B > |b;" |
| + "AE > |ae;" |
| + "A > |a;" |
| |
| // APOSTROPHE |
| |
| // As always, an apostrophe is used to separate digraphs into |
| // singles. That is, if you really wanted [KAN][GGAN], instead |
| // of [KANG][GAN] you would write "kan'ggan". |
| |
| // Rules for inserting ' when mapping separated digraphs back |
| // from Hangul to Latin. Catch every letter that can be the |
| // LAST of a digraph (or multigraph) AND first of an initial |
| |
| + "'' < (l) (\u11c0;" // hangul jongseong thieuth |
| + "'' < ({lsgb}) (\u11ba;" // hangul jongseong sios |
| + "'' < (l) (\u11c1;" // hangul jongseong phieuph |
| + "'' < (l) (\u11b7;" // hangul jongseong mieum |
| + "'' < (n) (\u11bd;" // hangul jongseong cieuc |
| + "'' < ({nl}) (\u11c2;" // hangul jongseong hieuh |
| + "'' < ({gnl}) (\u11a9;" // hangul jongseong ssangkiyeok |
| + "'' < ({bl}) (\u11b8;" // hangul jongseong pieup |
| + "'' < (d) (\u11ae;" // hangul jongseong tikeut |
| |
| + "'' < ({ye}) (\u116e;" // hangul jungseong u |
| + "'' < ({ywe}) (\u1169;" // hangul jungseong o |
| + "'' < ({yw}) (\u1175;" // hangul jungseong i |
| + "'' < ({ywao}) (\u1166;" // hangul jungseong e |
| + "'' < ({yw}) (\u1161;" // hangul jungseong a |
| |
| + "'' < (l) (\u1110;" // hangul choseong thieuth |
| + "'' < ({lsgb}) (\u110a;" // hangul choseong ssangsios |
| + "'' < ({lsgb}) (\u1109;" // hangul choseong sios |
| + "'' < (l) (\u1111;" // hangul choseong phieuph |
| + "'' < (l) (\u1106;" // hangul choseong mieum |
| + "'' < (n) (\u110c;" // hangul choseong cieuc |
| + "'' < (n) (\u110d;" |
| + "'' < ({nl}) (\u1112;" // hangul choseong hieuh |
| + "'' < ({gnl}) (\u1101;" // hangul choseong ssangkiyeok |
| + "'' < ({gnl}) (\u1100;" // hangul choseong kiyeok |
| + "'' < (d) (\u1103;" // hangul choseong tikeut |
| + "'' < (d) (\u1104;" |
| + "'' < ({bl}) (\u1107;" // hangul choseong pieup |
| + "'' < ({bl}) (\u1108;" |
| |
| // INITIALS |
| |
| + "t ({vowel}) <> \u1110;" // hangul choseong thieuth |
| + "ss ({vowel}) <> \u110a;" // hangul choseong ssangsios |
| + "s ({vowel}) <> \u1109;" // hangul choseong sios |
| + "p ({vowel}) <> \u1111;" // hangul choseong phieuph |
| + "n ({vowel}) <> \u1102;" // hangul choseong nieun |
| + "m ({vowel}) <> \u1106;" // hangul choseong mieum |
| + "l ({vowel}) <> \u1105;" // hangul choseong rieul |
| + "k ({vowel}) <> \u110f;" // hangul choseong khieukh |
| + "j ({vowel}) <> \u110c;" // hangul choseong cieuc |
| + "h ({vowel}) <> \u1112;" // hangul choseong hieuh |
| + "gg ({vowel}) <> \u1101;" // hangul choseong ssangkiyeok |
| + "g ({vowel}) <> \u1100;" // hangul choseong kiyeok |
| + "d ({vowel}) <> \u1103;" // hangul choseong tikeut |
| + "c ({vowel}) <> \u110e;" // hangul choseong chieuch |
| + "b ({vowel}) <> \u1107;" // hangul choseong pieup |
| + "bb ({vowel}) <> \u1108;" |
| + "jj ({vowel}) <> \u110d;" |
| + "dd ({vowel}) <> \u1104;" |
| |
| // If we have gotten through to these rules, and we start with |
| // a consonant, then the remaining mappings would be to F, |
| // because must have CC (or C<non-letter>), not CV. |
| // If we have F before us, then |
| // we would end up with FF, which is wrong. The simplest fix is |
| // to still make it an initial, but also insert an "u", |
| // so we end up with F, I, u, and then continue with the C |
| |
| // special, only initial |
| + "bb > \u1108\u116e;" // hangul choseong ssangpieup |
| + "jj > \u1108\u110d;" // hangul choseong ssangcieuc |
| + "dd > \u1108\u1104;" // hangul choseong ssangtikeut |
| |
| + "({final}) t > \u1110\u116e;" // hangul choseong thieuth |
| + "({final}) ss > \u110a\u116e;" // hangul choseong ssangsios |
| + "({final}) s > \u1109\u116e;" // hangul choseong sios |
| + "({final}) p > \u1111\u116e;" // hangul choseong phieuph |
| + "({final}) n > \u1102\u116e;" // hangul choseong nieun |
| + "({final}) m > \u1106\u116e;" // hangul choseong mieum |
| + "({final}) l > \u1105\u116e;" // hangul choseong rieul |
| + "({final}) k > \u110f\u116e;" // hangul choseong khieukh |
| + "({final}) j > \u110c\u116e;" // hangul choseong cieuc |
| + "({final}) h > \u1112\u116e;" // hangul choseong hieuh |
| + "({final}) gg > \u1101\u116e;" // hangul choseong ssangkiyeok |
| + "({final}) g > \u1100\u116e;" // hangul choseong kiyeok |
| + "({final}) d > \u1103\u116e;" // hangul choseong tikeut |
| + "({final}) c > \u110e\u116e;" // hangul choseong chieuch |
| + "({final}) b > \u1107\u116e;" // hangul choseong pieup |
| |
| // MEDIALS after INITIALS |
| |
| + "({initial}) yu <> \u1172;" // hangul jungseong yu |
| + "({initial}) yo <> \u116d;" // hangul jungseong yo |
| + "({initial}) yi <> \u1174;" // hangul jungseong yi |
| + "({initial}) yeo <> \u1167;" // hangul jungseong yeo |
| + "({initial}) ye <> \u1168;" // hangul jungseong ye |
| + "({initial}) yae <> \u1164;" // hangul jungseong yae |
| + "({initial}) ya <> \u1163;" // hangul jungseong ya |
| + "({initial}) wi <> \u1171;" // hangul jungseong wi |
| + "({initial}) weo <> \u116f;" // hangul jungseong weo |
| + "({initial}) we <> \u1170;" // hangul jungseong we |
| + "({initial}) wae <> \u116b;" // hangul jungseong wae |
| + "({initial}) wa <> \u116a;" // hangul jungseong wa |
| + "({initial}) u <> \u116e;" // hangul jungseong u |
| + "({initial}) oe <> \u116c;" // hangul jungseong oe |
| + "({initial}) o <> \u1169;" // hangul jungseong o |
| + "({initial}) i <> \u1175;" // hangul jungseong i |
| + "({initial}) eu <> \u1173;" // hangul jungseong eu |
| + "({initial}) eo <> \u1165;" // hangul jungseong eo |
| + "({initial}) e <> \u1166;" // hangul jungseong e |
| + "({initial}) ae <> \u1162;" // hangul jungseong ae |
| + "({initial}) a <> \u1161;" // hangul jungseong a |
| |
| // MEDIALS (vowels) not after INITIALs |
| |
| + "yu > \u110B\u1172;" // hangul jungseong yu |
| + "yo > \u110B\u116d;" // hangul jungseong yo |
| + "yi > \u110B\u1174;" // hangul jungseong yi |
| + "yeo > \u110B\u1167;" // hangul jungseong yeo |
| + "ye > \u110B\u1168;" // hangul jungseong ye |
| + "yae > \u110B\u1164;" // hangul jungseong yae |
| + "ya > \u110B\u1163;" // hangul jungseong ya |
| + "wi > \u110B\u1171;" // hangul jungseong wi |
| + "weo > \u110B\u116f;" // hangul jungseong weo |
| + "we > \u110B\u1170;" // hangul jungseong we |
| + "wae > \u110B\u116b;" // hangul jungseong wae |
| + "wa > \u110B\u116a;" // hangul jungseong wa |
| + "u > \u110B\u116e;" // hangul jungseong u |
| + "oe > \u110B\u116c;" // hangul jungseong oe |
| + "o > \u110B\u1169;" // hangul jungseong o |
| + "i > \u110B\u1175;" // hangul jungseong i |
| + "eu > \u110B\u1173;" // hangul jungseong eu |
| + "eo > \u110B\u1165;" // hangul jungseong eo |
| + "e > \u110B\u1166;" // hangul jungseong e |
| + "ae > \u110B\u1162;" // hangul jungseong ae |
| + "a > \u110B\u1161;" // hangul jungseong a |
| |
| |
| // FINALS |
| |
| + "t <> \u11c0;" // hangul jongseong thieuth |
| + "ss <> \u11bb;" // hangul jongseong ssangsios |
| + "s <> \u11ba;" // hangul jongseong sios |
| + "p <> \u11c1;" // hangul jongseong phieuph |
| + "nj <> \u11ac;" // hangul jongseong nieun-cieuc |
| + "nh <> \u11ad;" // hangul jongseong nieun-hieuh |
| + "ng <> \u11bc;" // hangul jongseong ieung |
| + "n <> \u11ab;" // hangul jongseong nieun |
| + "m <> \u11b7;" // hangul jongseong mieum |
| + "lt <> \u11b4;" // hangul jongseong rieul-thieuth |
| + "ls <> \u11b3;" // hangul jongseong rieul-sios |
| + "lp <> \u11b5;" // hangul jongseong rieul-phieuph |
| + "lm <> \u11b1;" // hangul jongseong rieul-mieum |
| + "lh <> \u11b6;" // hangul jongseong rieul-hieuh |
| + "lg <> \u11b0;" // hangul jongseong rieul-kiyeok |
| + "lb <> \u11b2;" // hangul jongseong rieul-pieup |
| + "l <> \u11af;" // hangul jongseong rieul |
| + "k <> \u11bf;" // hangul jongseong khieukh |
| + "j <> \u11bd;" // hangul jongseong cieuc |
| + "h <> \u11c2;" // hangul jongseong hieuh |
| + "gs <> \u11aa;" // hangul jongseong kiyeok-sios |
| + "gg <> \u11a9;" // hangul jongseong ssangkiyeok |
| + "g <> \u11a8;" // hangul jongseong kiyeok |
| + "d <> \u11ae;" // hangul jongseong tikeut |
| + "c <> \u11be;" // hangul jongseong chieuch |
| + "bs <> \u11b9;" // hangul jongseong pieup-sios |
| + "b <> \u11b8;" // hangul jongseong pieup |
| |
| // extra English letters |
| // {moved to bottom - aliu} |
| |
| + "z > |s;" |
| //{ + "Z > |s;" } masked |
| + "x > |ks;" |
| + "X > |ks;" |
| + "v > |b;" |
| + "V > |b;" |
| + "r > |l;" |
| + "R > |l;" |
| + "q > |k;" |
| + "Q > |k;" |
| + "f > |p;" |
| + "F > |p;" |
| //{ + "c > |k;" } masked |
| + "C > |k;" |
| |
| + "y > \u1172;" // hangul jungseong yu |
| + "w > \u1171;" // hangul jungseong wi |
| |
| |
| // ==================================== |
| // Normal final rule: remove ' |
| // ==================================== |
| |
| + "''>;" |
| } |
| }; |
| } |
| } |