data/ljamo.txt - external/github.com/unicode-org/icu - Git at Google

  // -*- Coding: utf-8; -*-
 //--------------------------------------------------------------------
 // Copyright (c) 1999-2001, International Business Machines
 // Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
 // THIS IS A MACHINE-GENERATED FILE
 // Tool: dumpICUrules.bat
 // Source: \icu4j\src\com\ibm\text\resources/Transliterator_Latin_Jamo.utf8.txt
 // Date: Thu Mar  8 12:27:24 2001
 //--------------------------------------------------------------------

 // Latin_Jamo

 ljamo {
   Rule {
 //--------------------------------------------------------------------
 // Copyright (c) 1999-2001, International Business Machines
 // Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------

 // Latin-Jamo

 // Transliteration from Latin characters to Korean script is done in
 // two steps: Latin to Jamo, then Jamo to Hangul.  The Jamo-Hangul
 // transliteration is done algorithmically following Unicode 3.0
 // section 3.11.  This file implements the Latin to Jamo
 // transliteration using rules.

 // Jamo occupy the block 1100-11FF.  Within this block there are three
 // groups of characters: initial consonants or choseong (I), medial
 // vowels or jungseong (M), and trailing consonants or jongseong (F).
 // Standard Korean syllables are of the form I+M+F*.

 // Section 3.11 describes the use of 'filler' jamo to convert
 // nonstandard syllables to standard form: the choseong filler 115F and
 // the junseong filler 1160.  In this transliterator, we will not use
 // 115F or 1160.

 // We will, however, insert two 'null' jamo to make foreign words
 // conform to Korean syllable structure.  These are the null initial
 // consonant 110B (IEUNG) and the null vowel 1173 (EU).  In Latin text,
 // we will use the hyphen in order to disambiguate strings,
 // e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).

 // We will not use all of the characters in the jamo block.  We will
 // only use the 19 initials, 21 medials, and 27 finals possessing a
 // jamo short name as defined in section 4.4 of the Unicode book.

 // Rules of thumb.  These guidelines provide the basic framework
 // for the rules.  They are phrased in terms of Latin-Jamo transliteration.
 // The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
 // just context-free transliteration of jamo to corresponding short names,
 // with the addition of hyphens to maintain round-trip integrity
 // in the context of the Latin-Jamo rules.

 // A sequence of vowels:
 // - Take the longest sequence you can. If there are too many, or you don't
 //   have a starting consonant, introduce a 110B necessary.

 // A sequence of consonants.
 // - First join the double consonants: G + G -> GG
 // - In the remaining list,
 // -- If there is no preceding vowel, take the first consonant, and insert EU
 //    after it. Continue with the rest of the consonants.
 // -- If there is one consonant, attach to the following vowel
 // -- If there are two consonants and a following vowel, attach one to the
 //    preceeding vowel, and one to the following vowel.
 // -- If there are more than two consonants, join the first two together if you
 //    can: L + G => LG
 // -- If you still end up with more than 2 consonants, insert EU after the
 //    first one, and continue with the rest of the consonants.

 //----------------------------------------------------------------------
 // Variables

 // Some latin consonants or consonant pairs only occur as initials, and
 // some only as finals, but some occur as both.  This makes some jamo
 // consonants ambiguous when transliterated into latin.
 //   Initial only: IEUNG BB DD JJ R
 //   Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
 //   Initial and Final: B C D G GG H J K M N P S SS T

   "$Gi = \u1100;"
   "$GGi = \u1101;"
   "$Ni = \u1102;"
   "$Di = \u1103;"
   "$DD = \u1104;"
   "$R = \u1105;"
   "$Mi = \u1106;"
   "$Bi = \u1107;"
   "$BB = \u1108;"
   "$Si = \u1109;"
   "$SSi = \u110A;"
   "$IEUNG = \u110B;" // null initial, inserted during Latin-Jamo
   "$Ji = \u110C;"
   "$JJ = \u110D;"
   "$Ci = \u110E;"
   "$Ki = \u110F;"
   "$Ti = \u1110;"
   "$Pi = \u1111;"
   "$Hi = \u1112;"

   "$A = \u1161;"
   "$AE = \u1162;"
   "$YA = \u1163;"
   "$YAE = \u1164;"
   "$EO = \u1165;"
   "$E = \u1166;"
   "$YEO = \u1167;"
   "$YE = \u1168;"
   "$O = \u1169;"
   "$WA = \u116A;"
   "$WAE = \u116B;"
   "$OE = \u116C;"
   "$YO = \u116D;"
   "$U = \u116E;"
   "$WEO = \u116F;"
   "$WE = \u1170;"
   "$WI = \u1171;"
   "$YU = \u1172;"
   "$EU = \u1173;" // null medial, inserted during Latin-Jamo
   "$YI = \u1174;"
   "$I = \u1175;"

   "$Gf = \u11A8;"
   "$GGf = \u11A9;"
   "$GS = \u11AA;"
   "$Nf = \u11AB;"
   "$NJ = \u11AC;"
   "$NH = \u11AD;"
   "$Df = \u11AE;"
   "$L = \u11AF;"
   "$LG = \u11B0;"
   "$LM = \u11B1;"
   "$LB = \u11B2;"
   "$LS = \u11B3;"
   "$LT = \u11B4;"
   "$LP = \u11B5;"
   "$LH = \u11B6;"
   "$Mf = \u11B7;"
   "$Bf = \u11B8;"
   "$BS = \u11B9;"
   "$Sf = \u11BA;"
   "$SSf = \u11BB;"
   "$NG = \u11BC;"
   "$Jf = \u11BD;"
   "$Cf = \u11BE;"
   "$Kf = \u11BF;"
   "$Tf = \u11C0;"
   "$Pf = \u11C1;"
   "$Hf = \u11C2;"

   "$jamoInitial = [\u1100-\u1112];"

   "$jamoMedial = [\u1161-\u1175];"

   "$latinInitial = [bcdghjkmnprst];"

   // Any character in the latin transliteration of a medial
   "$latinMedial = [aeiouwy];"

   // The last character of the latin transliteration of a medial
   "$latinMedialEnd = [aeiou];"

 //----------------------------------------------------------------------
 // Jamo-Latin

 // Jamo to latin is relatively simple, since it is the latin that is
 // ambiguous.  Most rules are straightforward, and we encode them below
 // as simple add-on back rule, e.g.:

 //   $jamoMedial {bs} > $BS;

 // becomes

 //   $jamoMedial {bs} <> $BS;

 // Furthermore, we don't care about the ordering for Jamo-Latin because
 // we are going from single characters, so we can very easily piggyback
 // on the Latin-Jamo.

 // The main issue with Jamo-Latin is when to insert hyphens.
 // Hyphens are inserted to obtain correct round trip behavior.  For
 // example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
 // would then round trip to Ki A GGi E.  To prevent this, we insert a
 // hyphen: "kag-ge".  IMPORTANT: The need for hyphens depends
 // very specifically on the behavior of the Latin-Jamo rules.  A change
 // in the Latin-Jamo behavior can completely change the way the
 // hyphen insertion must be done.

 // First try to preserve actual hyphens in the jamo text by doubling
 // them.  This fixes problems like:
 // (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
 // => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L).  This is optional
 // -- if we don't care about losing hyphens in the jamo, we can delete
 // this rule.

   "'--' <> '-';"

 // Triple consonants.  For three consonants "axxx" we insert a
 // hyphen between the first and second "x" if XXf, Xf, and Xi all
 // exist, and we have A Xf XXi.  This prevents the reverse
 // transliteration to A XXf Xi.

   "'-' < $latinMedialEnd g {} $GGi;"
   "'-' < $latinMedialEnd s {} $SSi;"

 // For vowels the rule is similar.  If there is a vowel "ae" such that
 // "a" by itself and "e" by itself are vowels, then we want to map A E
 // to "a-e" so as not to round trip to AE.  However, in the text Ki EO
 // IEUNG E we don't need to map to "keo-e".  "keoe" suffices.  For
 // vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
 // tested.  NOTE: These rules used to have a left context of
 // $latinInitial instead of [^$latinMedial].  The problem with this is
 // sequences where an initial IEUNG is transliterated away:
 //   (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)

   "'-' < [^$latinMedial] [y w] e {} [$O $OE];"
   "'-' < [^$latinMedial] e {} [$O $OE $U];"
   "'-' < [^$latinMedial] [o a] {} [$E $EO $EU];"
   "'-' < [^$latinMedial] [w y] a {} [$E $EO $EU];"

 // Similar to the above, but with an intervening $IEUNG.

   "'-' < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];"
   "'-' < [^$latinMedial] e {} $IEUNG [$O $OE $U];"
   "'-' < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];"
   "'-' < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];"

 // Single finals followed by IEUNG.  The jamo sequence A Xf IEUNG E,
 // where Xi also exists, must be transliterated as "ax-e" to prevent
 // the round trip conversion to A Xi E.

   "'-' < $latinMedialEnd b {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd c {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd d {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd g {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd h {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd j {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd k {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd m {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd n {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd p {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd s {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd t {} $IEUNG $jamoMedial;"

 // Double finals followed by IEUNG.  Similar to the single finals
 // followed by IEUNG.  Any latin consonant pair X Y, between medials,
 // that we would split by Latin-Jamo, we must handle when it occurs as
 // part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
 // E.

   "'-' < $latinMedialEnd b s {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd g g {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd g s {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd l b {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd l g {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd l h {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd l m {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd l p {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd l s {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd l t {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd n g {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd n h {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd n j {} $IEUNG $jamoMedial;"
   "'-' < $latinMedialEnd s s {} $IEUNG $jamoMedial;"

 // Split doubles.  Text of the form A Xi Xf E, where XXi also occurs,
 // we transliterate as "ax-xe" to prevent round trip transliteration as
 // A XXi E.

   "'-' < $latinMedialEnd b {} $Bi $jamoMedial;"
   "'-' < $latinMedialEnd d {} $Di $jamoMedial;"
   "'-' < $latinMedialEnd j {} $Ji $jamoMedial;"
   "'-' < $latinMedialEnd g {} $Gi $jamoMedial;"
   "'-' < $latinMedialEnd s {} $Si $jamoMedial;"

 // XYY.  This corresponds to the XYY rule in Latin-Jamo.  By default
 // Latin-Jamo maps "xyy" to Xf YYi, to keep YY together.  As a result,
 // "xyy" forms that correspond to XYf Yi must be transliterated as
 // "xy-y".

   "'-' < $latinMedialEnd b s {} [$Si $SSi];"
   "'-' < $latinMedialEnd g s {} [$Si $SSi];"
   "'-' < $latinMedialEnd l b {} [$Bi $BB];"
   "'-' < $latinMedialEnd l g {} [$Gi $GGi];"
   "'-' < $latinMedialEnd l s {} [$Si $SSi];"
   "'-' < $latinMedialEnd n g {} [$Gi $GGi];"
   "'-' < $latinMedialEnd n j {} [$Ji $JJ];"

 // Deletion of IEUNG is handled below.

 //----------------------------------------------------------------------
 // Latin-Jamo

 // [Basic, context-free Jamo-Latin rules are embedded here too.  See
 // above.]

 // Split digraphs: Text of the form 'axye', where 'xy' is a final
 // digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
 // 'e' are medials, we want to transliterate this as A Xf Yi E rather
 // than A XYf IEUNG E.  We do NOT include text of the form "axxe",
 // since that is handled differently below.  These rules are generated
 // programmatically from the jamo data.

   "$jamoMedial {b s} $latinMedial > $Bf $Si;"
   "$jamoMedial {g s} $latinMedial > $Gf $Si;"
   "$jamoMedial {l b} $latinMedial > $L $Bi;"
   "$jamoMedial {l g} $latinMedial > $L $Gi;"
   "$jamoMedial {l h} $latinMedial > $L $Hi;"
   "$jamoMedial {l m} $latinMedial > $L $Mi;"
   "$jamoMedial {l p} $latinMedial > $L $Pi;"
   "$jamoMedial {l s} $latinMedial > $L $Si;"
   "$jamoMedial {l t} $latinMedial > $L $Ti;"
   "$jamoMedial {n g} $latinMedial > $Nf $Gi;"
   "$jamoMedial {n h} $latinMedial > $Nf $Hi;"
   "$jamoMedial {n j} $latinMedial > $Nf $Ji;"

 // Single consonants are initials: Text of the form 'axe', where 'x'
 // can be an initial or a final, and 'a' and 'e' are medials, we want
 // to transliterate as A Xi E rather than A Xf IEUNG E.

   "$jamoMedial {b} $latinMedial > $Bi;"
   "$jamoMedial {c} $latinMedial > $Ci;"
   "$jamoMedial {d} $latinMedial > $Di;"
   "$jamoMedial {g} $latinMedial > $Gi;"
   "$jamoMedial {h} $latinMedial > $Hi;"
   "$jamoMedial {j} $latinMedial > $Ji;"
   "$jamoMedial {k} $latinMedial > $Ki;"
   "$jamoMedial {m} $latinMedial > $Mi;"
   "$jamoMedial {n} $latinMedial > $Ni;"
   "$jamoMedial {p} $latinMedial > $Pi;"
   "$jamoMedial {s} $latinMedial > $Si;"
   "$jamoMedial {t} $latinMedial > $Ti;"

 // Doubled initials.  The sequence "axxe", where XX exists as an initial
 // (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
 // to transliterate as A XXi E, rather than split to A Xf Xi E.

   "$jamoMedial {b b} $latinMedial > $BB;"
   "$jamoMedial {d d} $latinMedial > $DD;"
   "$jamoMedial {j j} $latinMedial > $JJ;"
   "$jamoMedial {g g} $latinMedial > $GGi;"
   "$jamoMedial {s s} $latinMedial > $SSi;"

 // XYY.  Because doubled consonants bind more strongly than XY
 // consonants, we must handle the sequence "axyy" specially.  Here XYf
 // and YYi must exist.  In these cases, we map to Xf YYi rather than
 // XYf.

   "$jamoMedial {b} s s > $Bf;"
   "$jamoMedial {g} s s > $Gf;"
   "$jamoMedial {l} b b > $L;"
   "$jamoMedial {l} g g > $L;"
   "$jamoMedial {l} s s > $L;"
   "$jamoMedial {n} g g > $Nf;"
   "$jamoMedial {n} j j > $Nf;"

 // Finals: Attach consonant with preceding medial to preceding medial.
 // Do this BEFORE mapping consonants to initials.  Longer keys must
 // precede shorter keys that they start with, e.g., the rule for 'bs'
 // must precede 'b'.

 // [BASIC Jamo-Latin FINALS handled here.  Order irrelevant within this
 // block for Jamo-Latin.]

   "$jamoMedial {bs} <> $BS;"
   "$jamoMedial {b} <> $Bf;"
   "$jamoMedial {c} <> $Cf;"
   "$jamoMedial {d} <> $Df;"
   "$jamoMedial {gg} <> $GGf;"
   "$jamoMedial {gs} <> $GS;"
   "$jamoMedial {g} <> $Gf;"
   "$jamoMedial {h} <> $Hf;"
   "$jamoMedial {j} <> $Jf;"
   "$jamoMedial {k} <> $Kf;"
   "$jamoMedial {lb} <> $LB;  $jamoMedial {lg} <> $LG;"
   "$jamoMedial {lh} <> $LH;"
   "$jamoMedial {lm} <> $LM;"
   "$jamoMedial {lp} <> $LP;"
   "$jamoMedial {ls} <> $LS;"
   "$jamoMedial {lt} <> $LT;"
   "$jamoMedial {l} <> $L;"
   "$jamoMedial {m} <> $Mf;"
   "$jamoMedial {ng} <> $NG;"
   "$jamoMedial {nh} <> $NH;"
   "$jamoMedial {nj} <> $NJ;"
   "$jamoMedial {n} <> $Nf;"
   "$jamoMedial {p} <> $Pf;"
   "$jamoMedial {ss} <> $SSf;"
   "$jamoMedial {s} <> $Sf;"
   "$jamoMedial {t} <> $Tf;"

 // Initials: Attach single consonant to following medial.  Do this
 // AFTER mapping finals.  Longer keys must precede shorter keys that
 // they start with, e.g., the rule for 'gg' must precede 'g'.

 // [BASIC Jamo-Latin INITIALS handled here.  Order irrelevant within
 // this block for Jamo-Latin.]

   "{gg} $latinMedial <> $GGi;"
   "{g} $latinMedial <> $Gi;"
   "{n} $latinMedial <> $Ni;"
   "{dd} $latinMedial <> $DD;"
   "{d} $latinMedial <> $Di;"
   "{r} $latinMedial <> $R;"
   "{m} $latinMedial <> $Mi;"
   "{bb} $latinMedial <> $BB;"
   "{b} $latinMedial <> $Bi;"
   "{ss} $latinMedial <> $SSi;"
   "{s} $latinMedial <> $Si;"
   "{jj} $latinMedial <> $JJ;"
   "{j} $latinMedial <> $Ji;"
   "{c} $latinMedial <> $Ci;"
   "{k} $latinMedial <> $Ki;"
   "{t} $latinMedial <> $Ti;"
   "{p} $latinMedial <> $Pi;"
   "{h} $latinMedial <> $Hi;"

 // 'r' in final position.  Because of the equivalency of the 'l' and
 // 'r' jamo (the glyphs are the same), we try to provide the same
 // equivalency in Latin-Jamo.  The 'l' to 'r' conversion is handled
 // below.  If we see an 'r' in an apparent final position, treat it
 // like 'l'.  For example, "karka" => Ki A R EU Ki A without this rule.
 // Instead, we want Ki A L Ki A.

   "$jamoMedial {r} $latinInitial > | l;"

 // Initial + Final: If we match the next rule, we have initial then
 // final consonant with no intervening medial.  We insert the null
 // vowel BEFORE it to create a well-formed syllable.  (In the next rule
 // we insert a null vowel AFTER an anomalous initial.)

   "$jamoInitial {} [bcdghjklmnpst] > $EU;"

 // Initial + X: This block matches an initial consonant not followed by
 // a medial.  We insert the null vowel after it.  We handle double
 // initials explicitly here; for single initial consonants we insert EU
 // (as Latin) after them and let standard rules do the rest.

 // BREAKS ROUND TRIP INTEGRITY

   "gg > $GGi $EU;"
   "dd > $DD $EU;"
   "bb > $BB $EU;"
   "ss > $SSi $EU;"
   "jj > $JJ $EU;"

   "([bcdghjkmnprst]) > | $1 eu;"

 // X + Final: Finally we have to deal with a consonant that can only be
 // interpreted as a final (not an initial) and which is preceded
 // neither by an initial nor a medial.  It is the start of the
 // syllable, but cannot be.  Most of these will already be handled by
 // the above rules.  'bs' splits into Bi EU Sf.  Similar for 'gs' 'ng'
 // 'nh' 'nj'.  The only problem is 'l' and digraphs starting with 'l'.
 // For this isolated case, we could add a null initial and medial,
 // which would give "la" => IEUNG EU L IEUNG A, for example.  A more
 // economical solution is to transliterate isolated "l" (that is,
 // initial "l") to "r".  (Other similar conversions of consonants that
 // occur neither as initials nor as finals are handled below.)

   "l > | r;"

 // Medials.  If a medial is preceded by an initial, then we proceed
 // normally.  As usual, longer keys must precede shorter ones.

 // [BASIC Jamo-Latin MEDIALS handled here.  Order irrelevant within
 // this block for Jamo-Latin.]

   "$jamoInitial {ae} <> $AE;"
   "$jamoInitial {a} <> $A;"
   "$jamoInitial {eo} <> $EO;"
   "$jamoInitial {eu} <> $EU;"
   "$jamoInitial {e} <> $E;"
   "$jamoInitial {i} <> $I;"
   "$jamoInitial {oe} <> $OE;"
   "$jamoInitial {o} <> $O;"
   "$jamoInitial {u} <> $U;"
   "$jamoInitial {wae} <> $WAE;"
   "$jamoInitial {wa} <> $WA;"
   "$jamoInitial {weo} <> $WEO;"
   "$jamoInitial {we} <> $WE;"
   "$jamoInitial {wi} <> $WI;"
   "$jamoInitial {yae} <> $YAE;"
   "$jamoInitial {ya} <> $YA;"
   "$jamoInitial {yeo} <> $YEO;"
   "$jamoInitial {ye} <> $YE;"
   "$jamoInitial {yi} <> $YI;"
   "$jamoInitial {yo} <> $YO;"
   "$jamoInitial {yu} <> $YU;"

 // We may see an anomalous isolated 'w' or 'y'.  In that case, we
 // interpret it as 'wi' and 'yu', respectively.

 // BREAKS ROUND TRIP INTEGRITY

   "$jamoInitial {w} > | wi;"
   "$jamoInitial {y} > | yu;"

 // Otherwise, insert a null consonant IEUNG before the medial (which is
 // still an untransliterated latin vowel).

   "($latinMedial) > $IEUNG | $1;"

 // Convert non-jamo latin consonants to equivalents.  These occur as
 // neither initials nor finals in jamo.  'l' occurs as a final, but not
 // an initial; it is handled above.  The following letters (left hand
 // side) will never be output by Jamo-Latin.

   "f > | p;"
   "q > | k;"
   "v > | b;"
   "x > | ks;"
   "z > | s;"

 // Delete hyphens (Latin-Jamo).

   "'-' > ;"

 // Delete null consonants (Jamo-Latin).  Do NOT delete null EU vowels,
 // since these may also occur in text.

   "< $IEUNG;"

 // eof
   }
 }
	// -- Coding: utf-8; --
	//--------------------------------------------------------------------
	// Copyright (c) 1999-2001, International Business Machines
	// Corporation and others. All Rights Reserved.
	//--------------------------------------------------------------------
	// THIS IS A MACHINE-GENERATED FILE
	// Tool: dumpICUrules.bat
	// Source: \icu4j\src\com\ibm\text\resources/Transliterator_Latin_Jamo.utf8.txt
	// Date: Thu Mar 8 12:27:24 2001
	//--------------------------------------------------------------------

	// Latin_Jamo

	ljamo {
	Rule {
	//--------------------------------------------------------------------
	// Copyright (c) 1999-2001, International Business Machines
	// Corporation and others. All Rights Reserved.
	//--------------------------------------------------------------------

	// Latin-Jamo

	// Transliteration from Latin characters to Korean script is done in
	// two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
	// transliteration is done algorithmically following Unicode 3.0
	// section 3.11. This file implements the Latin to Jamo
	// transliteration using rules.

	// Jamo occupy the block 1100-11FF. Within this block there are three
	// groups of characters: initial consonants or choseong (I), medial
	// vowels or jungseong (M), and trailing consonants or jongseong (F).
	// Standard Korean syllables are of the form I+M+F*.

	// Section 3.11 describes the use of 'filler' jamo to convert
	// nonstandard syllables to standard form: the choseong filler 115F and
	// the junseong filler 1160. In this transliterator, we will not use
	// 115F or 1160.

	// We will, however, insert two 'null' jamo to make foreign words
	// conform to Korean syllable structure. These are the null initial
	// consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
	// we will use the hyphen in order to disambiguate strings,
	// e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).

	// We will not use all of the characters in the jamo block. We will
	// only use the 19 initials, 21 medials, and 27 finals possessing a
	// jamo short name as defined in section 4.4 of the Unicode book.

	// Rules of thumb. These guidelines provide the basic framework
	// for the rules. They are phrased in terms of Latin-Jamo transliteration.
	// The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
	// just context-free transliteration of jamo to corresponding short names,
	// with the addition of hyphens to maintain round-trip integrity
	// in the context of the Latin-Jamo rules.

	// A sequence of vowels:
	// - Take the longest sequence you can. If there are too many, or you don't
	// have a starting consonant, introduce a 110B necessary.

	// A sequence of consonants.
	// - First join the double consonants: G + G -> GG
	// - In the remaining list,
	// -- If there is no preceding vowel, take the first consonant, and insert EU
	// after it. Continue with the rest of the consonants.
	// -- If there is one consonant, attach to the following vowel
	// -- If there are two consonants and a following vowel, attach one to the
	// preceeding vowel, and one to the following vowel.
	// -- If there are more than two consonants, join the first two together if you
	// can: L + G => LG
	// -- If you still end up with more than 2 consonants, insert EU after the
	// first one, and continue with the rest of the consonants.

	//----------------------------------------------------------------------
	// Variables

	// Some latin consonants or consonant pairs only occur as initials, and
	// some only as finals, but some occur as both. This makes some jamo
	// consonants ambiguous when transliterated into latin.
	// Initial only: IEUNG BB DD JJ R
	// Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
	// Initial and Final: B C D G GG H J K M N P S SS T

	"$Gi = \u1100;"
	"$GGi = \u1101;"
	"$Ni = \u1102;"
	"$Di = \u1103;"
	"$DD = \u1104;"
	"$R = \u1105;"
	"$Mi = \u1106;"
	"$Bi = \u1107;"
	"$BB = \u1108;"
	"$Si = \u1109;"
	"$SSi = \u110A;"
	"$IEUNG = \u110B;" // null initial, inserted during Latin-Jamo
	"$Ji = \u110C;"
	"$JJ = \u110D;"
	"$Ci = \u110E;"
	"$Ki = \u110F;"
	"$Ti = \u1110;"
	"$Pi = \u1111;"
	"$Hi = \u1112;"

	"$A = \u1161;"
	"$AE = \u1162;"
	"$YA = \u1163;"
	"$YAE = \u1164;"
	"$EO = \u1165;"
	"$E = \u1166;"
	"$YEO = \u1167;"
	"$YE = \u1168;"
	"$O = \u1169;"
	"$WA = \u116A;"
	"$WAE = \u116B;"
	"$OE = \u116C;"
	"$YO = \u116D;"
	"$U = \u116E;"
	"$WEO = \u116F;"
	"$WE = \u1170;"
	"$WI = \u1171;"
	"$YU = \u1172;"
	"$EU = \u1173;" // null medial, inserted during Latin-Jamo
	"$YI = \u1174;"
	"$I = \u1175;"

	"$Gf = \u11A8;"
	"$GGf = \u11A9;"
	"$GS = \u11AA;"
	"$Nf = \u11AB;"
	"$NJ = \u11AC;"
	"$NH = \u11AD;"
	"$Df = \u11AE;"
	"$L = \u11AF;"
	"$LG = \u11B0;"
	"$LM = \u11B1;"
	"$LB = \u11B2;"
	"$LS = \u11B3;"
	"$LT = \u11B4;"
	"$LP = \u11B5;"
	"$LH = \u11B6;"
	"$Mf = \u11B7;"
	"$Bf = \u11B8;"
	"$BS = \u11B9;"
	"$Sf = \u11BA;"
	"$SSf = \u11BB;"
	"$NG = \u11BC;"
	"$Jf = \u11BD;"
	"$Cf = \u11BE;"
	"$Kf = \u11BF;"
	"$Tf = \u11C0;"
	"$Pf = \u11C1;"
	"$Hf = \u11C2;"

	"$jamoInitial = [\u1100-\u1112];"

	"$jamoMedial = [\u1161-\u1175];"

	"$latinInitial = [bcdghjkmnprst];"

	// Any character in the latin transliteration of a medial
	"$latinMedial = [aeiouwy];"

	// The last character of the latin transliteration of a medial
	"$latinMedialEnd = [aeiou];"

	//----------------------------------------------------------------------
	// Jamo-Latin

	// Jamo to latin is relatively simple, since it is the latin that is
	// ambiguous. Most rules are straightforward, and we encode them below
	// as simple add-on back rule, e.g.:

	// $jamoMedial {bs} > $BS;

	// becomes

	// $jamoMedial {bs} <> $BS;

	// Furthermore, we don't care about the ordering for Jamo-Latin because
	// we are going from single characters, so we can very easily piggyback
	// on the Latin-Jamo.

	// The main issue with Jamo-Latin is when to insert hyphens.
	// Hyphens are inserted to obtain correct round trip behavior. For
	// example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
	// would then round trip to Ki A GGi E. To prevent this, we insert a
	// hyphen: "kag-ge". IMPORTANT: The need for hyphens depends
	// very specifically on the behavior of the Latin-Jamo rules. A change
	// in the Latin-Jamo behavior can completely change the way the
	// hyphen insertion must be done.

	// First try to preserve actual hyphens in the jamo text by doubling
	// them. This fixes problems like:
	// (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
	// => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
	// -- if we don't care about losing hyphens in the jamo, we can delete
	// this rule.

	"'--' <> '-';"

	// Triple consonants. For three consonants "axxx" we insert a
	// hyphen between the first and second "x" if XXf, Xf, and Xi all
	// exist, and we have A Xf XXi. This prevents the reverse
	// transliteration to A XXf Xi.

	"'-' < $latinMedialEnd g {} $GGi;"
	"'-' < $latinMedialEnd s {} $SSi;"

	// For vowels the rule is similar. If there is a vowel "ae" such that
	// "a" by itself and "e" by itself are vowels, then we want to map A E
	// to "a-e" so as not to round trip to AE. However, in the text Ki EO
	// IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
	// vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
	// tested. NOTE: These rules used to have a left context of
	// $latinInitial instead of [^$latinMedial]. The problem with this is
	// sequences where an initial IEUNG is transliterated away:
	// (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)

	"'-' < [^$latinMedial] [y w] e {} [$O $OE];"
	"'-' < [^$latinMedial] e {} [$O $OE $U];"
	"'-' < [^$latinMedial] [o a] {} [$E $EO $EU];"
	"'-' < [^$latinMedial] [w y] a {} [$E $EO $EU];"

	// Similar to the above, but with an intervening $IEUNG.

	"'-' < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];"
	"'-' < [^$latinMedial] e {} $IEUNG [$O $OE $U];"
	"'-' < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];"
	"'-' < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];"

	// Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
	// where Xi also exists, must be transliterated as "ax-e" to prevent
	// the round trip conversion to A Xi E.

	"'-' < $latinMedialEnd b {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd c {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd d {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd g {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd h {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd j {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd k {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd m {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd n {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd p {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd s {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd t {} $IEUNG $jamoMedial;"

	// Double finals followed by IEUNG. Similar to the single finals
	// followed by IEUNG. Any latin consonant pair X Y, between medials,
	// that we would split by Latin-Jamo, we must handle when it occurs as
	// part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
	// E.

	"'-' < $latinMedialEnd b s {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd g g {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd g s {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd l b {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd l g {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd l h {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd l m {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd l p {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd l s {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd l t {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd n g {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd n h {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd n j {} $IEUNG $jamoMedial;"
	"'-' < $latinMedialEnd s s {} $IEUNG $jamoMedial;"

	// Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
	// we transliterate as "ax-xe" to prevent round trip transliteration as
	// A XXi E.

	"'-' < $latinMedialEnd b {} $Bi $jamoMedial;"
	"'-' < $latinMedialEnd d {} $Di $jamoMedial;"
	"'-' < $latinMedialEnd j {} $Ji $jamoMedial;"
	"'-' < $latinMedialEnd g {} $Gi $jamoMedial;"
	"'-' < $latinMedialEnd s {} $Si $jamoMedial;"

	// XYY. This corresponds to the XYY rule in Latin-Jamo. By default
	// Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
	// "xyy" forms that correspond to XYf Yi must be transliterated as
	// "xy-y".

	"'-' < $latinMedialEnd b s {} [$Si $SSi];"
	"'-' < $latinMedialEnd g s {} [$Si $SSi];"
	"'-' < $latinMedialEnd l b {} [$Bi $BB];"
	"'-' < $latinMedialEnd l g {} [$Gi $GGi];"
	"'-' < $latinMedialEnd l s {} [$Si $SSi];"
	"'-' < $latinMedialEnd n g {} [$Gi $GGi];"
	"'-' < $latinMedialEnd n j {} [$Ji $JJ];"

	// Deletion of IEUNG is handled below.

	//----------------------------------------------------------------------
	// Latin-Jamo

	// [Basic, context-free Jamo-Latin rules are embedded here too. See
	// above.]

	// Split digraphs: Text of the form 'axye', where 'xy' is a final
	// digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
	// 'e' are medials, we want to transliterate this as A Xf Yi E rather
	// than A XYf IEUNG E. We do NOT include text of the form "axxe",
	// since that is handled differently below. These rules are generated
	// programmatically from the jamo data.

	"$jamoMedial {b s} $latinMedial > $Bf $Si;"
	"$jamoMedial {g s} $latinMedial > $Gf $Si;"
	"$jamoMedial {l b} $latinMedial > $L $Bi;"
	"$jamoMedial {l g} $latinMedial > $L $Gi;"
	"$jamoMedial {l h} $latinMedial > $L $Hi;"
	"$jamoMedial {l m} $latinMedial > $L $Mi;"
	"$jamoMedial {l p} $latinMedial > $L $Pi;"
	"$jamoMedial {l s} $latinMedial > $L $Si;"
	"$jamoMedial {l t} $latinMedial > $L $Ti;"
	"$jamoMedial {n g} $latinMedial > $Nf $Gi;"
	"$jamoMedial {n h} $latinMedial > $Nf $Hi;"
	"$jamoMedial {n j} $latinMedial > $Nf $Ji;"

	// Single consonants are initials: Text of the form 'axe', where 'x'
	// can be an initial or a final, and 'a' and 'e' are medials, we want
	// to transliterate as A Xi E rather than A Xf IEUNG E.

	"$jamoMedial {b} $latinMedial > $Bi;"
	"$jamoMedial {c} $latinMedial > $Ci;"
	"$jamoMedial {d} $latinMedial > $Di;"
	"$jamoMedial {g} $latinMedial > $Gi;"
	"$jamoMedial {h} $latinMedial > $Hi;"
	"$jamoMedial {j} $latinMedial > $Ji;"
	"$jamoMedial {k} $latinMedial > $Ki;"
	"$jamoMedial {m} $latinMedial > $Mi;"
	"$jamoMedial {n} $latinMedial > $Ni;"
	"$jamoMedial {p} $latinMedial > $Pi;"
	"$jamoMedial {s} $latinMedial > $Si;"
	"$jamoMedial {t} $latinMedial > $Ti;"

	// Doubled initials. The sequence "axxe", where XX exists as an initial
	// (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
	// to transliterate as A XXi E, rather than split to A Xf Xi E.

	"$jamoMedial {b b} $latinMedial > $BB;"
	"$jamoMedial {d d} $latinMedial > $DD;"
	"$jamoMedial {j j} $latinMedial > $JJ;"
	"$jamoMedial {g g} $latinMedial > $GGi;"
	"$jamoMedial {s s} $latinMedial > $SSi;"

	// XYY. Because doubled consonants bind more strongly than XY
	// consonants, we must handle the sequence "axyy" specially. Here XYf
	// and YYi must exist. In these cases, we map to Xf YYi rather than
	// XYf.

	"$jamoMedial {b} s s > $Bf;"
	"$jamoMedial {g} s s > $Gf;"
	"$jamoMedial {l} b b > $L;"
	"$jamoMedial {l} g g > $L;"
	"$jamoMedial {l} s s > $L;"
	"$jamoMedial {n} g g > $Nf;"
	"$jamoMedial {n} j j > $Nf;"

	// Finals: Attach consonant with preceding medial to preceding medial.
	// Do this BEFORE mapping consonants to initials. Longer keys must
	// precede shorter keys that they start with, e.g., the rule for 'bs'
	// must precede 'b'.

	// [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
	// block for Jamo-Latin.]

	"$jamoMedial {bs} <> $BS;"
	"$jamoMedial {b} <> $Bf;"
	"$jamoMedial {c} <> $Cf;"
	"$jamoMedial {d} <> $Df;"
	"$jamoMedial {gg} <> $GGf;"
	"$jamoMedial {gs} <> $GS;"
	"$jamoMedial {g} <> $Gf;"
	"$jamoMedial {h} <> $Hf;"
	"$jamoMedial {j} <> $Jf;"
	"$jamoMedial {k} <> $Kf;"
	"$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;"
	"$jamoMedial {lh} <> $LH;"
	"$jamoMedial {lm} <> $LM;"
	"$jamoMedial {lp} <> $LP;"
	"$jamoMedial {ls} <> $LS;"
	"$jamoMedial {lt} <> $LT;"
	"$jamoMedial {l} <> $L;"
	"$jamoMedial {m} <> $Mf;"
	"$jamoMedial {ng} <> $NG;"
	"$jamoMedial {nh} <> $NH;"
	"$jamoMedial {nj} <> $NJ;"
	"$jamoMedial {n} <> $Nf;"
	"$jamoMedial {p} <> $Pf;"
	"$jamoMedial {ss} <> $SSf;"
	"$jamoMedial {s} <> $Sf;"
	"$jamoMedial {t} <> $Tf;"

	// Initials: Attach single consonant to following medial. Do this
	// AFTER mapping finals. Longer keys must precede shorter keys that
	// they start with, e.g., the rule for 'gg' must precede 'g'.

	// [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
	// this block for Jamo-Latin.]

	"{gg} $latinMedial <> $GGi;"
	"{g} $latinMedial <> $Gi;"
	"{n} $latinMedial <> $Ni;"
	"{dd} $latinMedial <> $DD;"
	"{d} $latinMedial <> $Di;"
	"{r} $latinMedial <> $R;"
	"{m} $latinMedial <> $Mi;"
	"{bb} $latinMedial <> $BB;"
	"{b} $latinMedial <> $Bi;"
	"{ss} $latinMedial <> $SSi;"
	"{s} $latinMedial <> $Si;"
	"{jj} $latinMedial <> $JJ;"
	"{j} $latinMedial <> $Ji;"
	"{c} $latinMedial <> $Ci;"
	"{k} $latinMedial <> $Ki;"
	"{t} $latinMedial <> $Ti;"
	"{p} $latinMedial <> $Pi;"
	"{h} $latinMedial <> $Hi;"

	// 'r' in final position. Because of the equivalency of the 'l' and
	// 'r' jamo (the glyphs are the same), we try to provide the same
	// equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
	// below. If we see an 'r' in an apparent final position, treat it
	// like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
	// Instead, we want Ki A L Ki A.

	"$jamoMedial {r} $latinInitial > \| l;"

	// Initial + Final: If we match the next rule, we have initial then
	// final consonant with no intervening medial. We insert the null
	// vowel BEFORE it to create a well-formed syllable. (In the next rule
	// we insert a null vowel AFTER an anomalous initial.)

	"$jamoInitial {} [bcdghjklmnpst] > $EU;"

	// Initial + X: This block matches an initial consonant not followed by
	// a medial. We insert the null vowel after it. We handle double
	// initials explicitly here; for single initial consonants we insert EU
	// (as Latin) after them and let standard rules do the rest.

	// BREAKS ROUND TRIP INTEGRITY

	"gg > $GGi $EU;"
	"dd > $DD $EU;"
	"bb > $BB $EU;"
	"ss > $SSi $EU;"
	"jj > $JJ $EU;"

	"([bcdghjkmnprst]) > \| $1 eu;"

	// X + Final: Finally we have to deal with a consonant that can only be
	// interpreted as a final (not an initial) and which is preceded
	// neither by an initial nor a medial. It is the start of the
	// syllable, but cannot be. Most of these will already be handled by
	// the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
	// 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
	// For this isolated case, we could add a null initial and medial,
	// which would give "la" => IEUNG EU L IEUNG A, for example. A more
	// economical solution is to transliterate isolated "l" (that is,
	// initial "l") to "r". (Other similar conversions of consonants that
	// occur neither as initials nor as finals are handled below.)

	"l > \| r;"

	// Medials. If a medial is preceded by an initial, then we proceed
	// normally. As usual, longer keys must precede shorter ones.

	// [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
	// this block for Jamo-Latin.]

	"$jamoInitial {ae} <> $AE;"
	"$jamoInitial {a} <> $A;"
	"$jamoInitial {eo} <> $EO;"
	"$jamoInitial {eu} <> $EU;"
	"$jamoInitial {e} <> $E;"
	"$jamoInitial {i} <> $I;"
	"$jamoInitial {oe} <> $OE;"
	"$jamoInitial {o} <> $O;"
	"$jamoInitial {u} <> $U;"
	"$jamoInitial {wae} <> $WAE;"
	"$jamoInitial {wa} <> $WA;"
	"$jamoInitial {weo} <> $WEO;"
	"$jamoInitial {we} <> $WE;"
	"$jamoInitial {wi} <> $WI;"
	"$jamoInitial {yae} <> $YAE;"
	"$jamoInitial {ya} <> $YA;"
	"$jamoInitial {yeo} <> $YEO;"
	"$jamoInitial {ye} <> $YE;"
	"$jamoInitial {yi} <> $YI;"
	"$jamoInitial {yo} <> $YO;"
	"$jamoInitial {yu} <> $YU;"

	// We may see an anomalous isolated 'w' or 'y'. In that case, we
	// interpret it as 'wi' and 'yu', respectively.

	// BREAKS ROUND TRIP INTEGRITY

	"$jamoInitial {w} > \| wi;"
	"$jamoInitial {y} > \| yu;"

	// Otherwise, insert a null consonant IEUNG before the medial (which is
	// still an untransliterated latin vowel).

	"($latinMedial) > $IEUNG \| $1;"

	// Convert non-jamo latin consonants to equivalents. These occur as
	// neither initials nor finals in jamo. 'l' occurs as a final, but not
	// an initial; it is handled above. The following letters (left hand
	// side) will never be output by Jamo-Latin.

	"f > \| p;"
	"q > \| k;"
	"v > \| b;"
	"x > \| ks;"
	"z > \| s;"

	// Delete hyphens (Latin-Jamo).

	"'-' > ;"

	// Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
	// since these may also occur in text.

	"< $IEUNG;"

	// eof
	}
	}