This commit was manufactured by cvs2svn to create tag
'jan_13_00_icu_sync'.
X-SVN-Rev: 576
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..4d99a35
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,58 @@
+* text=auto !eol
+
+*.c text !eol
+*.cc text !eol
+*.classpath text !eol
+*.cpp text !eol
+*.css text !eol
+*.dsp text !eol
+*.dsw text !eol
+*.filters text !eol
+*.h text !eol
+*.htm text !eol
+*.html text !eol
+*.in text !eol
+*.java text !eol
+*.launch text !eol
+*.mak text !eol
+*.md text !eol
+*.MF text !eol
+*.mk text !eol
+*.pl text !eol
+*.pm text !eol
+*.project text !eol
+*.properties text !eol
+*.py text !eol
+*.rc text !eol
+*.sh text eol=lf
+*.sln text !eol
+*.stub text !eol
+*.txt text !eol
+*.ucm text !eol
+*.vcproj text !eol
+*.vcxproj text !eol
+*.xml text !eol
+*.xsl text !eol
+*.xslt text !eol
+Makefile text !eol
+configure text !eol
+LICENSE text !eol
+README text !eol
+
+*.bin -text
+*.brk -text
+*.cnv -text
+*.icu -text
+*.res -text
+*.nrm -text
+*.spp -text
+*.tri2 -text
+
+# The following file types are stored in Git-LFS.
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.dat filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+
diff --git a/src/com/ibm/demo/translit/Demo.java b/src/com/ibm/demo/translit/Demo.java
new file mode 100755
index 0000000..62c6f1a
--- /dev/null
+++ b/src/com/ibm/demo/translit/Demo.java
@@ -0,0 +1,254 @@
+package demo.translit;
+import java.applet.*;
+import java.awt.*;
+import java.awt.event.*;
+import java.util.*;
+import com.ibm.text.components.*;
+import com.ibm.text.*;
+
+/**
+ * A frame that allows the user to experiment with keyboard
+ * transliteration. This class has a main() method so it can be run
+ * as an application. The frame contains an editable text component
+ * and uses keyboard transliteration to process keyboard events.
+ *
+ * <p>Copyright (c) IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: Demo.java,v $ $Revision: 1.2 $ $Date: 2000/01/11 04:15:27 $
+ */
+public class Demo extends Frame {
+
+ static final boolean DEBUG = false;
+
+ Transliterator translit = null;
+
+ boolean compound = false;
+ Transliterator[] compoundTranslit = new Transliterator[MAX_COMPOUND];
+ static final int MAX_COMPOUND = 128;
+ int compoundCount = 0;
+
+ TransliteratingTextComponent text = null;
+
+ Menu translitMenu;
+ CheckboxMenuItem translitItem;
+ CheckboxMenuItem noTranslitItem;
+
+ static final String NO_TRANSLITERATOR = "None";
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ public static void main(String[] args) {
+ Frame f = new Demo(600, 200);
+ f.addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ System.exit(0);
+ }
+ });
+ f.setVisible(true);
+ }
+
+ public Demo(int width, int height) {
+ super("Transliteration Demo");
+
+ initMenus();
+
+ addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ handleClose();
+ }
+ });
+
+ text = new TransliteratingTextComponent();
+ Font font = new Font("serif", Font.PLAIN, 48);
+ text.setFont(font);
+ text.setSize(width, height);
+ text.setVisible(true);
+ text.setText("\u03B1\u05D0\u3042\u4E80");
+ add(text);
+
+ setSize(width, height);
+ }
+
+ private void initMenus() {
+ MenuBar mbar;
+ Menu menu;
+ MenuItem mitem;
+ CheckboxMenuItem citem;
+
+ setMenuBar(mbar = new MenuBar());
+ mbar.add(menu = new Menu("File"));
+ menu.add(mitem = new MenuItem("Quit"));
+ mitem.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ handleClose();
+ }
+ });
+
+ final ItemListener setTransliteratorListener = new ItemListener() {
+ public void itemStateChanged(ItemEvent e) {
+ CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
+ if (e.getStateChange() == ItemEvent.DESELECTED) {
+ // Don't let the current transliterator be deselected.
+ // Just reselect it.
+ item.setState(true);
+ } else if (compound) {
+ // Adding an item to a compound transliterator
+ handleAddToCompound(item.getLabel());
+ } else if (item != translitItem) {
+ // Deselect previous choice. Don't need to call
+ // setState(true) on new choice.
+ translitItem.setState(false);
+ translitItem = item;
+ handleSetTransliterator(item.getLabel());
+ }
+ }
+ };
+
+ translit = null;
+ mbar.add(translitMenu = new Menu("Transliterator"));
+ translitMenu.add(translitItem = noTranslitItem =
+ new CheckboxMenuItem(NO_TRANSLITERATOR, true));
+ noTranslitItem.addItemListener(new ItemListener() {
+ public void itemStateChanged(ItemEvent e) {
+ // Can't uncheck None -- any action here sets None to true
+ setNoTransliterator();
+ }
+ });
+
+ translitMenu.addSeparator();
+
+ translitMenu.add(citem = new CheckboxMenuItem("Compound"));
+ citem.addItemListener(new ItemListener() {
+ public void itemStateChanged(ItemEvent e) {
+ CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
+ if (e.getStateChange() == ItemEvent.DESELECTED) {
+ // If compound gets deselected, then select NONE
+ setNoTransliterator();
+ } else if (!compound) {
+ // Switching from non-compound to compound
+ translitItem.setState(false);
+ translitItem = item;
+ translit = null;
+ compound = true;
+ compoundCount = 0;
+ for (int i=0; i<MAX_COMPOUND; ++i) {
+ compoundTranslit[i] = null;
+ }
+ }
+ }
+ });
+
+ translitMenu.addSeparator();
+
+ for (Enumeration e=getSystemTransliteratorNames().elements();
+ e.hasMoreElements(); ) {
+ String s = (String) e.nextElement();
+ translitMenu.add(citem = new CheckboxMenuItem(s));
+ citem.addItemListener(setTransliteratorListener);
+ }
+
+ mbar.add(menu = new Menu("Batch"));
+ menu.add(mitem = new MenuItem("Transliterate Selection"));
+ mitem.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ handleBatchTransliterate();
+ }
+ });
+ }
+
+ /**
+ * Get a sorted list of the system transliterators.
+ */
+ private static Vector getSystemTransliteratorNames() {
+ Vector v = new Vector();
+ for (Enumeration e=Transliterator.getAvailableIDs();
+ e.hasMoreElements(); ) {
+ v.addElement(e.nextElement());
+ }
+ // Insertion sort, O(n^2) acceptable for small n
+ for (int i=0; i<(v.size()-1); ++i) {
+ String a = (String) v.elementAt(i);
+ for (int j=i+1; j<v.size(); ++j) {
+ String b = (String) v.elementAt(j);
+ if (a.compareTo(b) > 0) {
+ v.setElementAt(b, i);
+ v.setElementAt(a, j);
+ a = b;
+ }
+ }
+ }
+ return v;
+ }
+
+ private void setNoTransliterator() {
+ translitItem = noTranslitItem;
+ noTranslitItem.setState(true);
+ handleSetTransliterator(noTranslitItem.getLabel());
+ compound = false;
+ for (int i=0; i<translitMenu.getItemCount(); ++i) {
+ MenuItem it = translitMenu.getItem(i);
+ if (it != noTranslitItem && it instanceof CheckboxMenuItem) {
+ ((CheckboxMenuItem) it).setState(false);
+ }
+ }
+ }
+
+ private void handleAddToCompound(String name) {
+ if (compoundCount < MAX_COMPOUND) {
+ compoundTranslit[compoundCount] = decodeTranslitItem(name);
+ ++compoundCount;
+ Transliterator t[] = new Transliterator[compoundCount];
+ System.arraycopy(compoundTranslit, 0, t, 0, compoundCount);
+ translit = new CompoundTransliterator("Compound", t);
+ text.setTransliterator(translit);
+ }
+ }
+
+ private void handleSetTransliterator(String name) {
+ translit = decodeTranslitItem(name);
+ text.setTransliterator(translit);
+ }
+
+ /**
+ * Decode a menu item that looks like <translit name>.
+ */
+ private static Transliterator decodeTranslitItem(String name) {
+ return (name.equals(NO_TRANSLITERATOR))
+ ? null : Transliterator.getInstance(name);
+ }
+
+ private void handleBatchTransliterate() {
+ if (translit == null) {
+ return;
+ }
+
+ int start = text.getSelectionStart();
+ int end = text.getSelectionEnd();
+ ReplaceableString s =
+ new ReplaceableString(text.getText().substring(start, end));
+
+ StringBuffer log = null;
+ if (DEBUG) {
+ log = new StringBuffer();
+ log.append('"' + s.toString() + "\" (start " + start +
+ ", end " + end + ") -> \"");
+ }
+
+ translit.transliterate(s);
+ String str = s.toString();
+
+ if (DEBUG) {
+ log.append(str + "\"");
+ System.out.println("Batch " + translit.getID() + ": " + log.toString());
+ }
+
+ text.replaceRange(str, start, end);
+ text.select(start, start + str.length());
+ }
+
+ private void handleClose() {
+ dispose();
+ }
+}
diff --git a/src/com/ibm/demo/translit/DemoApplet.java b/src/com/ibm/demo/translit/DemoApplet.java
new file mode 100755
index 0000000..dede459
--- /dev/null
+++ b/src/com/ibm/demo/translit/DemoApplet.java
@@ -0,0 +1,62 @@
+package demo.translit;
+import java.awt.*;
+import java.awt.event.*;
+import java.applet.*;
+import com.ibm.text.components.AppletFrame;
+
+/**
+ * A simple Applet that shows a button. When pressed, the button
+ * shows the DemoAppletFrame. This Applet is meant to be embedded
+ * in a web page.
+ *
+ * <p>Copyright (c) IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: DemoApplet.java,v $ $Revision: 1.2 $ $Date: 2000/01/11 04:15:27 $
+ */
+public class DemoApplet extends Applet {
+
+ Demo frame = null;
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ public static void main(String args[]) {
+ final DemoApplet applet = new DemoApplet();
+ new AppletFrame("Transliteration Demo", applet, 640, 480);
+ }
+
+ public void init() {
+
+ Button button = new Button("Transliteration Demo");
+ button.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ if (frame == null) {
+ frame = new Demo(600, 200);
+ frame.addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent we) {
+ frame = null;
+ }
+ });
+ }
+ frame.setVisible(true);
+ frame.toFront();
+ }
+ });
+
+ add(button);
+
+ Dimension size = button.getPreferredSize();
+ size.width += 10;
+ size.height += 10;
+
+ resize(size);
+ }
+
+ public void stop() {
+ if (frame != null) {
+ frame.dispose();
+ }
+ frame = null;
+ }
+}
diff --git a/src/com/ibm/demo/translit/demo.bat b/src/com/ibm/demo/translit/demo.bat
new file mode 100755
index 0000000..88f63e3
--- /dev/null
+++ b/src/com/ibm/demo/translit/demo.bat
@@ -0,0 +1,7 @@
+REM For best results, run the demo as an applet inside of Netscape
+REM with Bitstream Cyberbit installed.
+
+REM setup your JDK 1.1.x path and classpath here:
+call JDK11
+set CLASSPATH=../translit.jar;%CLASSPATH%
+javaw Demo
diff --git a/src/com/ibm/demo/translit/demo.html b/src/com/ibm/demo/translit/demo.html
new file mode 100755
index 0000000..6327daf
--- /dev/null
+++ b/src/com/ibm/demo/translit/demo.html
@@ -0,0 +1,8 @@
+<HTML>
+<HEAD>
+<TITLE>Transliteration Demo</TITLE>
+</HEAD>
+<BODY>
+<APPLET CODE="DemoApplet.class" WIDTH=140 HEIGHT=33></APPLET>
+</BODY>
+</HTML>
diff --git a/src/com/ibm/test/translit/TransliteratorTest.java b/src/com/ibm/test/translit/TransliteratorTest.java
new file mode 100755
index 0000000..d2abec5
--- /dev/null
+++ b/src/com/ibm/test/translit/TransliteratorTest.java
@@ -0,0 +1,507 @@
+package test.translit;
+import test.IntlTest;
+import com.ibm.text.*;
+import java.text.*;
+import java.util.*;
+
+/**
+ * @test
+ * @summary General test of Transliterator
+ */
+public class TransliteratorTest extends IntlTest {
+
+ public static void main(String[] args) throws Exception {
+ new TransliteratorTest().run(args);
+ }
+
+ /**
+ * A CommonPoint legacy round-trip test for the Kana transliterator.
+ */
+// public void TestKanaRoundTrip() {
+// Transliterator t = Transliterator.getInstance("Kana");
+// StringTokenizer tok = new StringTokenizer(KANA_RT_DATA);
+// while (tok.hasMoreTokens()) {
+// String str = tok.nextToken();
+// ReplaceableString tmp = new ReplaceableString(str);
+// t.transliterate(tmp, Transliterator.FORWARD);
+//
+// str = tmp.toString();
+// tmp = new ReplaceableString(str);
+// t.transliterate(tmp, Transliterator.REVERSE);
+// t.transliterate(tmp, Transliterator.FORWARD);
+// if (!tmp.toString().equals(str)) {
+// tmp = new ReplaceableString(str);
+// t.transliterate(tmp, Transliterator.REVERSE);
+// String a = tmp.toString();
+// t.transliterate(tmp, Transliterator.FORWARD);
+// errln("FAIL: " + escape(str) + " -> " +
+// escape(a) + " -> " + escape(tmp.toString()));
+// }
+// }
+// }
+
+ public void TestInstantiation() {
+ long ms = System.currentTimeMillis();
+ String ID;
+ for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
+ ID = (String) e.nextElement();
+ try {
+ Transliterator t = Transliterator.getInstance(ID);
+ // We should get a new instance if we try again
+ Transliterator t2 = Transliterator.getInstance(ID);
+ if (t != t2) {
+ logln(ID + ":" + t);
+ } else {
+ errln("FAIL: " + ID + " returned identical instances");
+ }
+ } catch (IllegalArgumentException ex) {
+ errln("FAIL: " + ID);
+ throw ex;
+ }
+ }
+
+ // Now test the failure path
+ try {
+ ID = "<Not a valid Transliterator ID>";
+ Transliterator t = Transliterator.getInstance(ID);
+ errln("FAIL: " + ID + " returned " + t);
+ } catch (IllegalArgumentException ex) {
+ logln("OK: Bogus ID handled properly");
+ }
+
+ ms = System.currentTimeMillis() - ms;
+ logln("Elapsed time: " + ms + " ms");
+ }
+
+ public void TestDisplayName() {
+ String ID;
+ for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
+ ID = (String) e.nextElement();
+ logln(ID + " -> " + Transliterator.getDisplayName(ID));
+ }
+ }
+
+ public void TestSimpleRules() {
+ /* Example: rules 1. ab>x|y
+ * 2. yc>z
+ *
+ * []|eabcd start - no match, copy e to tranlated buffer
+ * [e]|abcd match rule 1 - copy output & adjust cursor
+ * [ex|y]cd match rule 2 - copy output & adjust cursor
+ * [exz]|d no match, copy d to transliterated buffer
+ * [exzd]| done
+ */
+ expect("ab>x|y;" +
+ "yc>z",
+ "eabcd", "exzd");
+
+ /* Another set of rules:
+ * 1. ab>x|yzacw
+ * 2. za>q
+ * 3. qc>r
+ * 4. cw>n
+ *
+ * []|ab Rule 1
+ * [x|yzacw] No match
+ * [xy|zacw] Rule 2
+ * [xyq|cw] Rule 4
+ * [xyqn]| Done
+ */
+ expect("ab>x|yzacw;" +
+ "za>q;" +
+ "qc>r;" +
+ "cw>n",
+ "ab", "xyqn");
+
+ /* Test categories
+ */
+ Transliterator t = new RuleBasedTransliterator("<ID>",
+ "dummy=\uE100;" +
+ "vowel=[aeiouAEIOU];" +
+ "lu=[:Lu:];" +
+ "{vowel} ({lu}) > !;" +
+ "{vowel} > &;" +
+ "(!) {lu} > ^;" +
+ "{lu} > *;" +
+ "a>ERROR");
+ expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
+ }
+
+ // Restore this test if/when it's been deciphered. In general,
+ // tests that depend on a specific transliterator are subject
+ // to the same fragility as tests that depend on resource data.
+
+// public void TestKana() {
+// String DATA[] = {
+// "a", "\u3042",
+// "A", "\u30A2",
+// "aA", "\u3042\u30A2",
+// "aaaa", "\u3042\u3042\u3042\u3042",
+// "akasata", "\u3042\u304B\u3055\u305F",
+// };
+//
+// Transliterator t = Transliterator.getInstance("Latin-Kana");
+// Transliterator rt = Transliterator.getInstance("Kana-Latin");
+// for (int i=0; i<DATA.length; i+=2) {
+// expect(t, DATA[i], DATA[i+1], rt);
+// }
+// }
+
+ /**
+ * Test inline set syntax and set variable syntax.
+ */
+ public void TestInlineSet() {
+ expect("[:Ll:] (x) > y; [:Ll:] > z;", "aAbxq", "zAyzz");
+ expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
+
+ expect("digit = [0-9];" +
+ "alpha = [a-zA-Z];" +
+ "alphanumeric = [{digit}{alpha}];" + // ***
+ "special = [^{alphanumeric}];" + // ***
+ "{alphanumeric} > -;" +
+ "{special} > *;",
+
+ "thx-1138", "---*----");
+ }
+
+ /**
+ * Create some inverses and confirm that they work. We have to be
+ * careful how we do this, since the inverses will not be true
+ * inverses -- we can't throw any random string at the composition
+ * of the transliterators and expect the identity function. F x
+ * F' != I. However, if we are careful about the input, we will
+ * get the expected results.
+ */
+ public void TestRuleBasedInverse() {
+ String RULES =
+ "abc>zyx;" +
+ "ab>yz;" +
+ "bc>zx;" +
+ "ca>xy;" +
+ "a>x;" +
+ "b>y;" +
+ "c>z;" +
+
+ "abc<zyx;" +
+ "ab<yz;" +
+ "bc<zx;" +
+ "ca<xy;" +
+ "a<x;" +
+ "b<y;" +
+ "c<z;" +
+
+ "";
+
+ String[] DATA = {
+ // Careful here -- random strings will not work. If we keep
+ // the left side to the domain and the right side to the range
+ // we will be okay though (left, abc; right xyz).
+ "a", "x",
+ "abcacab", "zyxxxyy",
+ "caccb", "xyzzy",
+ };
+
+ Transliterator fwd = new RuleBasedTransliterator("<ID>", RULES);
+ Transliterator rev = new RuleBasedTransliterator("<ID>", RULES,
+ RuleBasedTransliterator.REVERSE, null);
+ for (int i=0; i<DATA.length; i+=2) {
+ expect(fwd, DATA[i], DATA[i+1]);
+ expect(rev, DATA[i+1], DATA[i]);
+ }
+ }
+
+ /**
+ * Basic test of keyboard.
+ */
+ public void TestKeyboard() {
+ Transliterator t = new RuleBasedTransliterator("<ID>",
+ "psch>Y;"
+ +"ps>y;"
+ +"ch>x;"
+ +"a>A;");
+ String DATA[] = {
+ // insertion, buffer
+ "a", "A",
+ "p", "Ap",
+ "s", "Aps",
+ "c", "Apsc",
+ "a", "AycA",
+ "psch", "AycAY",
+ null, "AycAY", // null means finishKeyboardTransliteration
+ };
+
+ keyboardAux(t, DATA);
+ }
+
+ /**
+ * Basic test of keyboard with cursor.
+ */
+ public void TestKeyboard2() {
+ Transliterator t = new RuleBasedTransliterator("<ID>",
+ "ych>Y;"
+ +"ps>|y;"
+ +"ch>x;"
+ +"a>A;");
+ String DATA[] = {
+ // insertion, buffer
+ "a", "A",
+ "p", "Ap",
+ "s", "Ay",
+ "c", "Ayc",
+ "a", "AycA",
+ "p", "AycAp",
+ "s", "AycAy",
+ "c", "AycAyc",
+ "h", "AycAY",
+ null, "AycAY", // null means finishKeyboardTransliteration
+ };
+
+ keyboardAux(t, DATA);
+ }
+
+ /**
+ * Test keyboard transliteration with back-replacement.
+ */
+ public void TestKeyboard3() {
+ // We want th>z but t>y. Furthermore, during keyboard
+ // transliteration we want t>y then yh>z if t, then h are
+ // typed.
+ String RULES =
+ "t>|y;" +
+ "yh>z;" +
+ "";
+
+ String[] DATA = {
+ // Column 1: characters to add to buffer (as if typed)
+ // Column 2: expected appearance of buffer after
+ // keyboard xliteration.
+ "a", "a",
+ "b", "ab",
+ "t", "aby",
+ "c", "abyc",
+ "t", "abycy",
+ "h", "abycz",
+ null, "abycz", // null means finishKeyboardTransliteration
+ };
+
+ Transliterator t = new RuleBasedTransliterator("<ID>", RULES);
+ keyboardAux(t, DATA);
+ }
+
+ private void keyboardAux(Transliterator t, String[] DATA) {
+ int[] index = {0, 0, 0};
+ ReplaceableString s = new ReplaceableString();
+ for (int i=0; i<DATA.length; i+=2) {
+ StringBuffer log;
+ if (DATA[i] != null) {
+ log = new StringBuffer(s.toString() + " + "
+ + DATA[i]
+ + " -> ");
+ t.keyboardTransliterate(s, index, DATA[i]);
+ } else {
+ log = new StringBuffer(s.toString() + " => ");
+ t.finishKeyboardTransliteration(s, index);
+ }
+ String str = s.toString();
+ // Show the start index '{' and the cursor '|'
+ log.append(str.substring(0, index[Transliterator.START])).
+ append('{').
+ append(str.substring(index[Transliterator.START],
+ index[Transliterator.CURSOR])).
+ append('|').
+ append(str.substring(index[Transliterator.CURSOR]));
+ if (str.equals(DATA[i+1])) {
+ logln(log.toString());
+ } else {
+ errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
+ }
+ }
+ }
+
+ public void TestArabic() {
+ String DATA[] = {
+ "Arabic",
+ "\u062a\u062a\u0645\u062a\u0639 "+
+ "\u0627\u0644\u0644\u063a\u0629 "+
+ "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+
+ "\u0628\u0628\u0646\u0638\u0645 "+
+ "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+
+ "\u062c\u0645\u064a\u0644\u0629"
+ };
+
+ Transliterator t = Transliterator.getInstance("Latin-Arabic");
+ for (int i=0; i<DATA.length; i+=2) {
+ expect(t, DATA[i], DATA[i+1]);
+ }
+ }
+
+ /**
+ * Compose the Kana transliterator forward and reverse and try
+ * some strings that should come out unchanged.
+ */
+ public void TestCompoundKana() {
+ Transliterator kana = Transliterator.getInstance("Latin-Kana");
+ Transliterator rkana = Transliterator.getInstance("Kana-Latin");
+ Transliterator[] trans = { kana, rkana };
+ Transliterator t = new CompoundTransliterator("<ID>", trans);
+
+ expect(t, "aaaaa", "aaaaa");
+ }
+
+ /**
+ * Compose the hex transliterators forward and reverse.
+ */
+ public void TestCompoundHex() {
+ Transliterator a = Transliterator.getInstance("Unicode-Hex");
+ Transliterator b = Transliterator.getInstance("Hex-Unicode");
+ Transliterator[] trans = { a, b };
+ Transliterator ab = new CompoundTransliterator("ab", trans);
+ String s = "abcde";
+ expect(ab, s, s);
+
+ trans = new Transliterator[] { b, a };
+ Transliterator ba = new CompoundTransliterator("ba", trans);
+ ReplaceableString str = new ReplaceableString(s);
+ a.transliterate(str);
+ expect(ba, str.toString(), str.toString());
+ }
+
+ /**
+ * Do some basic tests of filtering.
+ */
+ public void TestFiltering() {
+ Transliterator hex = Transliterator.getInstance("Unicode-Hex");
+ hex.setFilter(new UnicodeFilter() {
+ public boolean isIn(char c) {
+ return c != 'c';
+ }
+ });
+ String s = "abcde";
+ String out = hex.transliterate(s);
+ String exp = "\\u0061\\u0062c\\u0064\\u0065";
+ if (out.equals(exp)) {
+ logln("Ok: \"" + exp + "\"");
+ } else {
+ logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
+ }
+ }
+
+ /**
+ * Test pattern quoting and escape mechanisms.
+ */
+ public void TestPatternQuoting() {
+ // Array of 3n items
+ // Each item is <rules>, <input>, <expected output>
+ String[] DATA = {
+ "\u4E01>'[male adult]'", "\u4E01", "[male adult]",
+ };
+
+ for (int i=0; i<DATA.length; i+=3) {
+ logln("Pattern: " + escape(DATA[i]));
+ Transliterator t = new RuleBasedTransliterator("<ID>", DATA[i]);
+ expect(t, DATA[i+1], DATA[i+2]);
+ }
+ }
+
+ //======================================================================
+ // Support methods
+ //======================================================================
+
+ void expect(String rules, String source, String expectedResult) {
+ expect(new RuleBasedTransliterator("<ID>", rules), source, expectedResult);
+ }
+
+ void expect(Transliterator t, String source, String expectedResult,
+ Transliterator reverseTransliterator) {
+ expect(t, source, expectedResult);
+ if (reverseTransliterator != null) {
+ expect(reverseTransliterator, expectedResult, source);
+ }
+ }
+
+ void expect(Transliterator t, String source, String expectedResult) {
+ String result = t.transliterate(source);
+ expectAux(t.getID() + ":String", source, result, expectedResult);
+
+ ReplaceableString rsource = new ReplaceableString(source);
+ t.transliterate(rsource);
+ result = rsource.toString();
+ expectAux(t.getID() + ":Replaceable", source, result, expectedResult);
+
+ // Test keyboard (incremental) transliteration -- this result
+ // must be the same after we finalize (see below).
+ rsource.getStringBuffer().setLength(0);
+ int[] index = { 0, 0, 0 };
+ StringBuffer log = new StringBuffer();
+
+ for (int i=0; i<source.length(); ++i) {
+ if (i != 0) {
+ log.append(" + ");
+ }
+ log.append(source.charAt(i)).append(" -> ");
+ t.keyboardTransliterate(rsource, index,
+ String.valueOf(source.charAt(i)));
+ // Append the string buffer with a vertical bar '|' where
+ // the committed index is.
+ String s = rsource.toString();
+ log.append(s.substring(0, index[Transliterator.CURSOR])).
+ append('|').
+ append(s.substring(index[Transliterator.CURSOR]));
+ }
+
+ // As a final step in keyboard transliteration, we must call
+ // transliterate to finish off any pending partial matches that
+ // were waiting for more input.
+ t.finishKeyboardTransliteration(rsource, index);
+ result = rsource.toString();
+ log.append(" => ").append(rsource.toString());
+
+ expectAux(t.getID() + ":Keyboard", log.toString(),
+ result.equals(expectedResult),
+ expectedResult);
+ }
+
+ void expectAux(String tag, String source,
+ String result, String expectedResult) {
+ expectAux(tag, source + " -> " + result,
+ result.equals(expectedResult),
+ expectedResult);
+ }
+
+ void expectAux(String tag, String summary, boolean pass,
+ String expectedResult) {
+ if (pass) {
+ logln("("+tag+") " + escape(summary));
+ } else {
+ errln("FAIL: ("+tag+") "
+ + escape(summary)
+ + ", expected " + escape(expectedResult));
+ }
+ }
+
+ /**
+ * Escape non-ASCII characters as Unicode.
+ */
+ public static final String escape(String s) {
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i<s.length(); ++i) {
+ char c = s.charAt(i);
+ if (c >= ' ' && c <= 0x007F) {
+ buf.append(c);
+ } else {
+ buf.append("\\u");
+ if (c < 0x1000) {
+ buf.append('0');
+ if (c < 0x100) {
+ buf.append('0');
+ if (c < 0x10) {
+ buf.append('0');
+ }
+ }
+ }
+ buf.append(Integer.toHexString(c));
+ }
+ }
+ return buf.toString();
+ }
+}
diff --git a/src/com/ibm/test/translit/UnicodeSetTest.java b/src/com/ibm/test/translit/UnicodeSetTest.java
new file mode 100755
index 0000000..e7aa4a8
--- /dev/null
+++ b/src/com/ibm/test/translit/UnicodeSetTest.java
@@ -0,0 +1,158 @@
+package test.translit;
+import test.IntlTest;
+import com.ibm.text.*;
+import java.text.*;
+import java.util.*;
+
+/**
+ * @test
+ * @summary General test of UnicodeSet
+ */
+public class UnicodeSetTest extends IntlTest {
+
+ public static void main(String[] args) throws Exception {
+ new UnicodeSetTest().run(args);
+ }
+
+ public void TestPatterns() {
+ UnicodeSet set = new UnicodeSet();
+ expectPattern(set, "[[a-m]&[d-z]&[k-y]]", "km");
+ expectPattern(set, "[[a-z]-[m-y]-[d-r]]", "aczz");
+ expectPattern(set, "[a\\-z]", "--aazz");
+ expectPattern(set, "[-az]", "--aazz");
+ expectPattern(set, "[az-]", "--aazz");
+ expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
+
+ // Throw in a test of complement
+ set.complement();
+ String exp = '\u0000' + "aeeoouu" + (char)('z'+1) + '\uFFFF';
+ expectPairs(set, exp);
+ }
+
+ public void TestCategories() {
+ UnicodeSet set = new UnicodeSet("[:Lu:]");
+ expectContainment(set, "ABC", "abc");
+ }
+
+ public void TestAddRemove() {
+ UnicodeSet set = new UnicodeSet();
+ set.add('a', 'z');
+ expectPairs(set, "az");
+ set.remove('m', 'p');
+ expectPairs(set, "alqz");
+ set.remove('e', 'g');
+ expectPairs(set, "adhlqz");
+ set.remove('d', 'i');
+ expectPairs(set, "acjlqz");
+ set.remove('c', 'r');
+ expectPairs(set, "absz");
+ set.add('f', 'q');
+ expectPairs(set, "abfqsz");
+ set.remove('a', 'g');
+ expectPairs(set, "hqsz");
+ set.remove('a', 'z');
+ expectPairs(set, "");
+
+ // Try removing an entire set from another set
+ expectPattern(set, "[c-x]", "cx");
+ UnicodeSet set2 = new UnicodeSet();
+ expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
+ set.removeAll(set2);
+ expectPairs(set, "deluxx");
+
+ // Try adding an entire set to another set
+ expectPattern(set, "[jackiemclean]", "aacceein");
+ expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
+ set.addAll(set2);
+ expectPairs(set, "aacehort");
+
+ // Test commutativity
+ expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
+ expectPattern(set2, "[jackiemclean]", "aacceein");
+ set.addAll(set2);
+ expectPairs(set, "aacehort");
+ }
+
+ void expectContainment(UnicodeSet set, String charsIn, String charsOut) {
+ StringBuffer bad = new StringBuffer();
+ if (charsIn != null) {
+ for (int i=0; i<charsIn.length(); ++i) {
+ char c = charsIn.charAt(i);
+ if (!set.contains(c)) {
+ bad.append(c);
+ }
+ }
+ if (bad.length() > 0) {
+ logln("Fail: set " + set + " does not contain " + bad +
+ ", expected containment of " + charsIn);
+ } else {
+ logln("Ok: set " + set + " contains " + charsIn);
+ }
+ }
+ if (charsOut != null) {
+ bad.setLength(0);
+ for (int i=0; i<charsOut.length(); ++i) {
+ char c = charsOut.charAt(i);
+ if (set.contains(c)) {
+ bad.append(c);
+ }
+ }
+ if (bad.length() > 0) {
+ logln("Fail: set " + set + " contains " + bad +
+ ", expected non-containment of " + charsOut);
+ } else {
+ logln("Ok: set " + set + " does not contain " + charsOut);
+ }
+ }
+ }
+
+ void expectPattern(UnicodeSet set,
+ String pattern,
+ String expectedPairs) {
+ set.applyPattern(pattern);
+ if (!set.getPairs().equals(expectedPairs)) {
+ errln("FAIL: applyPattern(\"" + pattern +
+ "\") => pairs \"" +
+ escape(set.getPairs()) + "\", expected \"" +
+ escape(expectedPairs) + "\"");
+ } else {
+ logln("Ok: applyPattern(\"" + pattern +
+ "\") => pairs \"" +
+ escape(set.getPairs()) + "\"");
+ }
+ }
+
+ void expectPairs(UnicodeSet set, String expectedPairs) {
+ if (!set.getPairs().equals(expectedPairs)) {
+ errln("FAIL: Expected pair list \"" +
+ escape(expectedPairs) + "\", got \"" +
+ escape(set.getPairs()) + "\"");
+ }
+ }
+
+ /**
+ * Escape non-ASCII characters as Unicode.
+ */
+ static final String escape(String s) {
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i<s.length(); ++i) {
+ char c = s.charAt(i);
+ if (c >= ' ' && c <= 0x007F) {
+ buf.append(c);
+ } else {
+ buf.append("\\u");
+ if (c < 0x1000) {
+ buf.append('0');
+ if (c < 0x100) {
+ buf.append('0');
+ if (c < 0x10) {
+ buf.append('0');
+ }
+ }
+ }
+ buf.append(Integer.toHexString(c));
+ }
+ }
+ return buf.toString();
+ }
+}
diff --git a/src/com/ibm/text/CompoundTransliterator.java b/src/com/ibm/text/CompoundTransliterator.java
new file mode 100755
index 0000000..c358223
--- /dev/null
+++ b/src/com/ibm/text/CompoundTransliterator.java
@@ -0,0 +1,285 @@
+package com.ibm.text;
+
+import java.util.Enumeration;
+import java.util.Vector;
+
+/**
+ * A transliterator that is composed of two or more other
+ * transliterator objects linked together. For example, if one
+ * transliterator transliterates from script A to script B, and
+ * another transliterates from script B to script C, the two may be
+ * combined to form a new transliterator from A to C.
+ *
+ * <p>Composed transliterators may not behave as expected. For
+ * example, inverses may not combine to form the identity
+ * transliterator. See the class documentation for {@link
+ * Transliterator} for details.
+ *
+ * <p>If a non-<tt>null</tt> <tt>UnicodeFilter</tt> is applied to a
+ * <tt>CompoundTransliterator</tt>, it has the effect of being
+ * logically <b>and</b>ed with the filter of each transliterator in
+ * the chain.
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class CompoundTransliterator extends Transliterator {
+
+ private static final boolean DEBUG = false;
+
+ private Transliterator[] trans;
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Constructs a new compound transliterator given an array of
+ * transliterators. The array of transliterators may be of any
+ * length, including zero or one, however, useful compound
+ * transliterators have at least two components.
+ * @param transliterators array of <code>Transliterator</code>
+ * objects
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+ public CompoundTransliterator(String ID, Transliterator[] transliterators,
+ UnicodeFilter filter) {
+ super(ID, filter);
+ trans = new Transliterator[transliterators.length];
+ System.arraycopy(transliterators, 0, trans, 0, trans.length);
+ }
+
+ /**
+ * Constructs a new compound transliterator given an array of
+ * transliterators. The array of transliterators may be of any
+ * length, including zero or one, however, useful compound
+ * transliterators have at least two components.
+ * @param transliterators array of <code>Transliterator</code>
+ * objects
+ */
+ public CompoundTransliterator(String ID, Transliterator[] transliterators) {
+ this(ID, transliterators, null);
+ }
+
+ /**
+ * Returns the number of transliterators in this chain.
+ * @return number of transliterators in this chain.
+ */
+ public int getCount() {
+ return trans.length;
+ }
+
+ /**
+ * Returns the transliterator at the given index in this chain.
+ * @param index index into chain, from 0 to <code>getCount() - 1</code>
+ * @return transliterator at the given index
+ */
+ public Transliterator getTransliterator(int index) {
+ return trans[index];
+ }
+
+ /**
+ * Transliterates a segment of a string. <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @return the new limit index
+ */
+ public int transliterate(Replaceable text, int start, int limit) {
+ for (int i=0; i<trans.length; ++i) {
+ limit = trans[i].transliterate(text, start, limit);
+ }
+ return limit;
+ }
+
+ /**
+ * Implements {@link Transliterator#handleKeyboardTransliterate}.
+ */
+ protected void handleKeyboardTransliterate(Replaceable text,
+ int[] index) {
+ /* Call each transliterator with the same start value and
+ * initial cursor index, but with the limit index as modified
+ * by preceding transliterators. The cursor index must be
+ * reset for each transliterator to give each a chance to
+ * transliterate the text. The initial cursor index is known
+ * to still point to the same place after each transliterator
+ * is called because each transliterator will not change the
+ * text between start and the initial value of cursor.
+ *
+ * IMPORTANT: After the first transliterator, each subsequent
+ * transliterator only gets to transliterate text committed by
+ * preceding transliterators; that is, the cursor (output
+ * value) of transliterator i becomes the limit (input value)
+ * of transliterator i+1. Finally, the overall limit is fixed
+ * up before we return.
+ *
+ * Assumptions we make here:
+ * (1) start <= cursor <= limit ;cursor valid on entry
+ * (2) cursor <= cursor' <= limit' ;cursor doesn't move back
+ * (3) cursor <= limit' ;text before cursor unchanged
+ * - cursor' is the value of cursor after calling handleKT
+ * - limit' is the value of limit after calling handleKT
+ */
+
+ /**
+ * Example: 3 transliterators. This example illustrates the
+ * mechanics we need to implement. S, C, and L are the start,
+ * cursor, and limit. gl is the globalLimit.
+ *
+ * 1. h-u, changes hex to Unicode
+ *
+ * 4 7 a d 0 4 7 a
+ * abc/u0061/u => abca/u
+ * S C L S C L gl=f->a
+ *
+ * 2. upup, changes "x" to "XX"
+ *
+ * 4 7 a 4 7 a
+ * abca/u => abcAA/u
+ * S CL S C
+ * L gl=a->b
+ * 3. u-h, changes Unicode to hex
+ *
+ * 4 7 a 4 7 a d 0 3
+ * abcAA/u => abc/u0041/u0041/u
+ * S C L S C
+ * L gl=b->15
+ * 4. return
+ *
+ * 4 7 a d 0 3
+ * abc/u0041/u0041/u
+ * S C L
+ */
+
+ /**
+ * One more wrinkle. If there is a filter F for the compound
+ * transliterator as a whole, then we need to modify every
+ * non-null filter f in the chain to be f' = F & f. Then,
+ * when we're done, we restore the original filters.
+ *
+ * A possible future optimization is to change f to f' at
+ * construction time, but then if anyone else is using the
+ * transliterators in the chain outside of this context, they
+ * will get unexpected results.
+ */
+ UnicodeFilter F = getFilter();
+ UnicodeFilter[] f = null;
+ if (F != null) {
+ f = new UnicodeFilter[trans.length];
+ for (int i=0; i<f.length; ++i) {
+ f[i] = trans[i].getFilter();
+ trans[i].setFilter(UnicodeFilterLogic.and(F, f[i]));
+ }
+ }
+
+ try {
+ int cursor = index[CURSOR];
+ int limit = index[LIMIT];
+ int globalLimit = limit;
+ /* globalLimit is the overall limit. We keep track of this
+ * since we overwrite index[LIMIT] with the previous
+ * index[CURSOR]. After each transliteration, we update
+ * globalLimit for insertions or deletions that have happened.
+ */
+
+ for (int i=0; i<trans.length; ++i) {
+ index[CURSOR] = cursor; // Reset cursor
+ index[LIMIT] = limit;
+
+ if (DEBUG) {
+ System.out.print(escape(i + ": \"" +
+ substring(text, index[START], index[CURSOR]) + '|' +
+ substring(text, index[CURSOR], index[LIMIT]) +
+ "\" -> \""));
+ }
+
+ trans[i].handleKeyboardTransliterate(text, index);
+
+ if (DEBUG) {
+ System.out.println(escape(
+ substring(text, index[START], index[CURSOR]) + '|' +
+ substring(text, index[CURSOR], index[LIMIT]) +
+ '"'));
+ }
+
+ // Adjust overall limit for insertions/deletions
+ globalLimit += index[LIMIT] - limit;
+ limit = index[CURSOR]; // Move limit to end of committed text
+ }
+ // Cursor is good where it is -- where the last
+ // transliterator left it. Limit needs to be put back
+ // where it was, modulo adjustments for deletions/insertions.
+ index[LIMIT] = globalLimit;
+
+ } finally {
+ // Fixup the transliterator filters, if we had to modify them.
+ if (f != null) {
+ for (int i=0; i<f.length; ++i) {
+ trans[i].setFilter(f[i]);
+ }
+ }
+ }
+ }
+
+ /**
+ * Returns the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context.
+ * @return maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+ protected int getMaximumContextLength() {
+ int max = 0;
+ for (int i=0; i<trans.length; ++i) {
+ int len = trans[i].getMaximumContextLength();
+ if (len > max) {
+ max = len;
+ }
+ }
+ return max;
+ }
+
+ /**
+ * DEBUG
+ * Returns a substring of a Replaceable.
+ */
+ private static final String substring(Replaceable str, int start, int limit) {
+ StringBuffer buf = new StringBuffer();
+ while (start < limit) {
+ buf.append(str.charAt(start++));
+ }
+ return buf.toString();
+ }
+
+ /**
+ * DEBUG
+ * Escapes non-ASCII characters as Unicode.
+ */
+ private static final String escape(String s) {
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i<s.length(); ++i) {
+ char c = s.charAt(i);
+ if (c >= ' ' && c <= 0x007F) {
+ buf.append(c);
+ } else {
+ buf.append("\\u");
+ if (c < 0x1000) {
+ buf.append('0');
+ if (c < 0x100) {
+ buf.append('0');
+ if (c < 0x10) {
+ buf.append('0');
+ }
+ }
+ }
+ buf.append(Integer.toHexString(c));
+ }
+ }
+ return buf.toString();
+ }
+}
diff --git a/src/com/ibm/text/HexToUnicodeTransliterator.java b/src/com/ibm/text/HexToUnicodeTransliterator.java
new file mode 100755
index 0000000..18673e1
--- /dev/null
+++ b/src/com/ibm/text/HexToUnicodeTransliterator.java
@@ -0,0 +1,130 @@
+package com.ibm.text;
+import java.util.*;
+
+/**
+ * A transliterator that converts from hexadecimal Unicode
+ * escape sequences to the characters they represent. For example, "U+0040"
+ * and '\u0040'. It recognizes the
+ * prefixes "U+", "u+", "\U", and "\u". Hex values may be
+ * upper- or lowercase.
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: HexToUnicodeTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class HexToUnicodeTransliterator extends Transliterator {
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Package accessible ID for this transliterator.
+ */
+ static String _ID = "Hex-Unicode";
+
+ /**
+ * Constructs a transliterator.
+ */
+ public HexToUnicodeTransliterator() {
+ super(_ID, null);
+ }
+
+ /**
+ * Transliterates a segment of a string. <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @return the new limit index
+ */
+ public int transliterate(Replaceable text, int start, int limit) {
+ int[] offsets = { start, limit, start };
+ handleKeyboardTransliterate(text, offsets);
+ return offsets[LIMIT];
+ }
+
+ /**
+ * Implements {@link Transliterator#handleKeyboardTransliterate}.
+ */
+ protected void handleKeyboardTransliterate(Replaceable text,
+ int[] offsets) {
+ /**
+ * Performs transliteration changing Unicode hexadecimal
+ * escapes to characters. For example, "U+0040" -> '@'. A fixed
+ * set of prefixes is recognized: "\u", "\U", "u+", "U+".
+ */
+ int cursor = offsets[CURSOR];
+ int limit = offsets[LIMIT];
+
+ int maxCursor = limit - 6;
+ loop:
+ while (cursor <= maxCursor) {
+ char c = filteredCharAt(text, cursor + 5);
+ int digit0 = Character.digit(c, 16);
+ if (digit0 < 0) {
+ if (c == '\\') {
+ cursor += 5;
+ } else if (c == 'U' || c == 'u' || c == '+') {
+ cursor += 4;
+ } else {
+ cursor += 6;
+ }
+ continue;
+ }
+
+ int u = digit0;
+
+ for (int i=4; i>=2; --i) {
+ c = filteredCharAt(text, cursor + i);
+ int digit = Character.digit(c, 16);
+ if (digit < 0) {
+ if (c == 'U' || c == 'u' || c == '+') {
+ cursor += i-1;
+ } else {
+ cursor += 6;
+ }
+ continue loop;
+ }
+ u |= digit << (4 * (5-i));
+ }
+
+ c = filteredCharAt(text, cursor);
+ char d = filteredCharAt(text, cursor + 1);
+ if (((c == 'U' || c == 'u') && d == '+')
+ || (c == '\\' && (d == 'U' || d == 'u'))) {
+
+ // At this point, we have a match; replace cursor..cursor+5
+ // with u.
+ text.replace(cursor, cursor+6, String.valueOf((char) u));
+ limit -= 5;
+ maxCursor -= 5;
+
+ ++cursor;
+ } else {
+ cursor += 6;
+ }
+ }
+
+ offsets[LIMIT] = limit;
+ offsets[CURSOR] = cursor;
+ }
+
+ private char filteredCharAt(Replaceable text, int i) {
+ char c;
+ UnicodeFilter filter = getFilter();
+ return (filter == null) ? text.charAt(i) :
+ (filter.isIn(c = text.charAt(i)) ? c : '\uFFFF');
+ }
+
+ /**
+ * Return the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context.
+ * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+ * @return maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+ protected int getMaximumContextLength() {
+ return 0;
+ }
+}
diff --git a/src/com/ibm/text/NullTransliterator.java b/src/com/ibm/text/NullTransliterator.java
new file mode 100755
index 0000000..cf469e8
--- /dev/null
+++ b/src/com/ibm/text/NullTransliterator.java
@@ -0,0 +1,43 @@
+package com.ibm.text;
+import java.util.*;
+
+/**
+ * A transliterator that leaves text unchanged.
+ */
+public class NullTransliterator extends Transliterator {
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 2000. All rights reserved.";
+
+ /**
+ * Package accessible ID for this transliterator.
+ */
+ static String _ID = "Null";
+
+ /**
+ * Constructs a transliterator.
+ */
+ public NullTransliterator() {
+ super(_ID, null);
+ }
+
+ /**
+ * Transliterates a segment of a string. <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @return the new limit index
+ */
+ public int transliterate(Replaceable text, int start, int limit) {
+ return limit;
+ }
+
+ /**
+ * Implements {@link Transliterator#handleKeyboardTransliterate}.
+ */
+ protected void handleKeyboardTransliterate(Replaceable text,
+ int[] offsets) {
+ offsets[CURSOR] = offsets[LIMIT];
+ }
+}
diff --git a/src/com/ibm/text/Replaceable.java b/src/com/ibm/text/Replaceable.java
new file mode 100755
index 0000000..b4c8519
--- /dev/null
+++ b/src/com/ibm/text/Replaceable.java
@@ -0,0 +1,77 @@
+package com.ibm.text;
+
+/**
+ * <code>Replaceable</code> is an interface that supports the
+ * operation of replacing a substring with another piece of text.
+ * <code>Replaceable</code> is needed in order to change a piece of
+ * text while retaining style attributes. For example, if the string
+ * "the <b>bold</b> font" has range (4, 8) replaced with "strong",
+ * then it becomes "the <b>strong</b> font".
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: Replaceable.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public interface Replaceable {
+ /**
+ * Return the number of characters in the text.
+ * @return number of characters in text
+ */
+ int length();
+
+ /**
+ * Return the character at the given offset into the text.
+ * @param offset an integer between 0 and <code>length()</code>-1
+ * inclusive
+ * @return character of text at given offset
+ */
+ char charAt(int offset);
+
+ /**
+ * Copies characters from this object into the destination
+ * character array. The first character to be copied is at index
+ * <code>srcStart</code>; the last character to be copied is at
+ * index <code>srcLimit-1</code> (thus the total number of
+ * characters to be copied is <code>srcLimit-srcStart</code>). The
+ * characters are copied into the subarray of <code>dst</code>
+ * starting at index <code>dstStart</code> and ending at index
+ * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+ *
+ * @param srcStart the beginning index to copy, inclusive; <code>0
+ * <= start <= limit</code>.
+ * @param srcLimit the ending index to copy, exclusive;
+ * <code>start <= limit <= length()</code>.
+ * @param dst the destination array.
+ * @param dstStart the start offset in the destination array.
+ */
+ void getChars(int srcStart, int srcLimit, char dst[], int dstStart);
+
+ /**
+ * Replace a substring of this object with the given text.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param text the text to replace characters <code>start</code>
+ * to <code>limit - 1</code>
+ */
+ void replace(int start, int limit, String text);
+
+ /**
+ * Replace a substring of this object with the given text.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param chars the text to replace characters <code>start</code>
+ * to <code>limit - 1</code>
+ * @param charsStart the beginning index into <code>chars</code>,
+ * inclusive; <code>0 <= start <= limit</code>.
+ * @param charsLen the number of characters of <code>chars</code>.
+ */
+ void replace(int start, int limit, char[] chars,
+ int charsStart, int charsLen);
+ // Note: We use length rather than limit to conform to StringBuffer
+ // and System.arraycopy.
+}
diff --git a/src/com/ibm/text/ReplaceableString.java b/src/com/ibm/text/ReplaceableString.java
new file mode 100755
index 0000000..d6a7df0
--- /dev/null
+++ b/src/com/ibm/text/ReplaceableString.java
@@ -0,0 +1,159 @@
+package com.ibm.text;
+
+/**
+ * <code>ReplaceableString</code> is an adapter class that implements the
+ * <code>Replaceable</code> API around an ordinary <code>StringBuffer</code>.
+ *
+ * <p><em>Note:</em> This class does not support attributes and is not
+ * intended for general use. Most clients will need to implement
+ * {@link Replaceable} in their text representation class.
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @see Replaceable
+ * @author Alan Liu
+ * @version $RCSfile: ReplaceableString.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class ReplaceableString implements Replaceable {
+ private StringBuffer buf;
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Construct a new object with the given initial contents.
+ * @param str initial contents
+ */
+ public ReplaceableString(String str) {
+ buf = new StringBuffer(str);
+ }
+
+ /**
+ * Construct a new object using <code>buf</code> for internal
+ * storage. The contents of <code>buf</code> at the time of
+ * construction are used as the initial contents. <em>Note!
+ * Modifications to <code>buf</code> will modify this object, and
+ * vice versa.</em>
+ * @param buf object to be used as internal storage
+ */
+ public ReplaceableString(StringBuffer buf) {
+ this.buf = buf;
+ }
+
+ /**
+ * Construct a new empty object.
+ */
+ public ReplaceableString() {
+ buf = new StringBuffer();
+ }
+
+ /**
+ * Return the contents of this object as a <code>String</code>.
+ * @return string contents of this object
+ */
+ public String toString() {
+ return buf.toString();
+ }
+
+ /**
+ * Return the internal storage of this object. <em>Note! Any
+ * changes made to the returned object affect this object's
+ * contents, and vice versa.</em>
+ * @return internal buffer used by this object
+ */
+ public StringBuffer getStringBuffer() {
+ return buf;
+ }
+
+ /**
+ * Return the number of characters contained in this object.
+ * <code>Replaceable</code> API.
+ */
+ public int length() {
+ return buf.length();
+ }
+
+ /**
+ * Return the character at the given position in this object.
+ * <code>Replaceable</code> API.
+ * @param offset offset into the contents, from 0 to
+ * <code>length()</code> - 1
+ */
+ public char charAt(int offset) {
+ return buf.charAt(offset);
+ }
+
+ /**
+ * Copies characters from this object into the destination
+ * character array. The first character to be copied is at index
+ * <code>srcStart</code>; the last character to be copied is at
+ * index <code>srcLimit-1</code> (thus the total number of
+ * characters to be copied is <code>srcLimit-srcStart</code>). The
+ * characters are copied into the subarray of <code>dst</code>
+ * starting at index <code>dstStart</code> and ending at index
+ * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+ *
+ * @param srcStart the beginning index to copy, inclusive; <code>0
+ * <= start <= limit</code>.
+ * @param srcLimit the ending index to copy, exclusive;
+ * <code>start <= limit <= length()</code>.
+ * @param dst the destination array.
+ * @param dstStart the start offset in the destination array.
+ */
+ public void getChars(int srcStart, int srcLimit, char dst[], int dstStart) {
+ buf.getChars(srcStart, srcLimit, dst, dstStart);
+ }
+
+ /**
+ * Replace zero or more characters with new characters.
+ * <code>Replaceable</code> API.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param text new text to replace characters <code>start</code> to
+ * <code>limit - 1</code>
+ */
+ public void replace(int start, int limit, String text) {
+ if (start == limit) {
+ buf.insert(start, text);
+ } else {
+ char[] tail = null;
+ if (limit < buf.length()) {
+ tail = new char[buf.length() - limit];
+ buf.getChars(limit, buf.length(), tail, 0);
+ }
+ buf.setLength(start);
+ buf.append(text);
+ if (tail != null) {
+ buf.append(tail);
+ }
+ }
+ }
+
+ /**
+ * Replace a substring of this object with the given text.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param chars the text to replace characters <code>start</code>
+ * to <code>limit - 1</code>
+ * @param charsStart the beginning index into <code>chars</code>,
+ * inclusive; <code>0 <= start <= limit</code>.
+ * @param charsLen the number of characters of <code>chars</code>.
+ */
+ public void replace(int start, int limit, char[] chars,
+ int charsStart, int charsLen) {
+ char[] tail = null;
+ if (limit < buf.length()) {
+ tail = new char[buf.length() - limit];
+ buf.getChars(limit, buf.length(), tail, 0);
+ }
+ buf.setLength(start);
+ buf.append(chars, charsStart, charsLen);
+ if (tail != null) {
+ buf.append(tail);
+ }
+ }
+}
diff --git a/src/com/ibm/text/RuleBasedTransliterator.java b/src/com/ibm/text/RuleBasedTransliterator.java
new file mode 100755
index 0000000..aac3011
--- /dev/null
+++ b/src/com/ibm/text/RuleBasedTransliterator.java
@@ -0,0 +1,1116 @@
+package com.ibm.text;
+
+import java.util.Hashtable;
+import java.util.Vector;
+import java.text.ParsePosition;
+
+/**
+ * A transliterator that reads a set of rules in order to determine how to perform
+ * translations. Rules are stored in resource bundles indexed by name. Rules are separated by
+ * semicolons (';'). To include a literal semicolon, prefix it with a backslash ('\;').
+ * Whitespace, as defined by <code>Character.isWhitespace()</code>, is ignored. If the first
+ * non-blank character on a line is '#', the entire line is ignored as a comment. </p>
+ *
+ * <p>Each set of rules consists of two groups, one forward, and one reverse. This is a
+ * convention that is not enforced; rules for one direction may be omitted, with the result
+ * that translations in that direction will not modify the source text. </p>
+ *
+ * <p><b>Rule syntax</b> </p>
+ *
+ * <p>Rule statements take one of the following forms:
+ *
+ * <dl>
+ * <dt><code>alefmadda=\u0622</code></dt>
+ * <dd><strong>Variable definition.</strong> The name on the left is assigned the character or
+ * expression on the right. Names may not contain any special characters (see list below).
+ * Duplicate names (including duplicates of simple variables or category names) cause an
+ * exception to be thrown. If the right hand side consists of one character, then the
+ * variable stands for that character. In this example, after this statement, instances of
+ * the left hand name surrounded by braces, "<code>{alefmadda}</code>", will be
+ * replaced by the Unicode character U+0622. If the right hand side is longer than one
+ * character, then it is interpreted as a character category expression; see below for
+ * details.</dd>
+ * <dt> </dt>
+ * <dt><code>softvowel=[eiyEIY]</code></dt>
+ * <dd><strong>Category definition.</strong> The name on the left is assigned to stand for a
+ * set of characters. The same rules for names of simple variables apply. After this
+ * statement, the left hand variable will be interpreted as indicating a set of characters in
+ * appropriate contexts. The pattern syntax defining sets of characters is defined by {@link
+ * UnicodeSet}. Examples of valid patterns are:<table>
+ * <tr valign="top">
+ * <td nowrap><code>[abc]</code></td>
+ * <td>The set containing the characters 'a', 'b', and 'c'.</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>[^abc]</code></td>
+ * <td>The set of all characters <em>except</em> 'a', 'b', and 'c'.</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>[A-Z]</code></td>
+ * <td>The set of all characters from 'A' to 'Z' in Unicode order.</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>[:Lu:]</code></td>
+ * <td>The set of Unicode uppercase letters. See <a href="http://www.unicode.org">www.unicode.org</a>
+ * for a complete list of categories and their two-letter codes.</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>[^a-z[:Lu:][:Ll:]]</code></td>
+ * <td>The set of all characters <em>except</em> 'a' through 'z' and uppercase or lowercase
+ * letters.</td>
+ * </tr>
+ * </table>
+ * <p>See {@link UnicodeSet} for more documentation and examples. </p>
+ * </dd>
+ * <dt><code>ai>{alefmadda}</code></dt>
+ * <dd><strong>Forward translation rule.</strong> This rule states that the string on the left
+ * will be changed to the string on the right when performing forward transliteration.</dd>
+ * <dt> </dt>
+ * <dt><code>ai<{alefmadda}</code></dt>
+ * <dd><strong>Reverse translation rule.</strong> This rule states that the string on the right
+ * will be changed to the string on the left when performing reverse transliteration.</dd>
+ * </dl>
+ *
+ * <dl>
+ * <dt><code>ai<>{alefmadda}</code></dt>
+ * <dd><strong>Bidirectional translation rule.</strong> This rule states that the string on the
+ * right will be changed to the string on the left when performing forward transliteration,
+ * and vice versa when performing reverse transliteration.</dd>
+ * </dl>
+ *
+ * <p>Forward and reverse translation rules consist of a <em>match pattern</em> and an <em>output
+ * string</em>. The match pattern consists of literal characters, optionally preceded by
+ * context, and optionally followed by context. Context characters, like literal pattern
+ * characters, must be matched in the text being transliterated. However, unlike literal
+ * pattern characters, they are not replaced by the output text. For example, the pattern
+ * "<code>(abc)def</code>" indicates the characters "<code>def</code>"
+ * must be preceded by "<code>abc</code>" for a successful match. If there is a
+ * successful match, "<code>def</code>" will be replaced, but not "<code>abc</code>".
+ * The initial '<code>(</code>' is optional, so "<code>abc)def</code>" is
+ * equivalent to "<code>(abc)def</code>". Another example is "<code>123(456)</code>"
+ * (or "<code>123(456</code>") in which the literal pattern "<code>123</code>"
+ * must be followed by "<code>456</code>". </p>
+ *
+ * <p>The output string of a forward or reverse rule consists of characters to replace the
+ * literal pattern characters. If the output string contains the character '<code>|</code>',
+ * this is taken to indicate the location of the <em>cursor</em> after replacement. The
+ * cursor is the point in the text at which the next replacement, if any, will be applied. </p>
+ *
+ * <p>In addition to being defined in variables, <code>UnicodeSet</code> patterns may be
+ * embedded directly into rule strings. Thus, the following two rules are equivalent:</p>
+ *
+ * <blockquote>
+ * <p><code>vowel=[aeiou]; {vowel}>*; # One way to do this<br>
+ * [aeiou]>*;
+ * #
+ * Another way</code></p>
+ * </blockquote>
+ *
+ * <p><b>Example</b> </p>
+ *
+ * <p>The following example rules illustrate many of the features of the rule language. </p>
+ *
+ * <table cellpadding="4">
+ * <tr valign="top">
+ * <td>Rule 1.</td>
+ * <td nowrap><code>(abc)def>x|y</code></td>
+ * </tr>
+ * <tr valign="top">
+ * <td>Rule 2.</td>
+ * <td nowrap><code>xyz>r</code></td>
+ * </tr>
+ * <tr valign="top">
+ * <td>Rule 3.</td>
+ * <td nowrap><code>yz>q</code></td>
+ * </tr>
+ * </table>
+ *
+ * <p>Applying these rules to the string "<code>adefabcdefz</code>" yields the
+ * following results: </p>
+ *
+ * <table cellpadding="4">
+ * <tr valign="top">
+ * <td nowrap><code>|adefabcdefz</code></td>
+ * <td>Initial state, no rules match. Advance cursor.</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>a|defabcdefz</code></td>
+ * <td>Still no match. Rule 1 does not match because the preceding context is not present.</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>ad|efabcdefz</code></td>
+ * <td>Still no match. Keep advancing until there is a match...</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>ade|fabcdefz</code></td>
+ * <td>...</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>adef|abcdefz</code></td>
+ * <td>...</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>adefa|bcdefz</code></td>
+ * <td>...</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>adefab|cdefz</code></td>
+ * <td>...</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>adefabc|defz</code></td>
+ * <td>Rule 1 matches; replace "<code>def</code>" with "<code>xy</code>"
+ * and back up the cursor to before the '<code>y</code>'.</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>adefabcx|yz</code></td>
+ * <td>Although "<code>xyz</code>" is present, rule 2 does not match because the
+ * cursor is before the '<code>y</code>', not before the '<code>x</code>'. Rule 3 does match.
+ * Replace "<code>yz</code>" with "<code>q</code>".</td>
+ * </tr>
+ * <tr valign="top">
+ * <td nowrap><code>adefabcxq|</code></td>
+ * <td>The cursor is at the end; transliteration is complete.</td>
+ * </tr>
+ * </table>
+ *
+ * <p>The order of rules is significant. If multiple rules may match at some point, the first
+ * matching rule is applied. </p>
+ *
+ * <p>Forward and reverse rules may have an empty output string. Otherwise, an empty left or
+ * right hand side of any statement is a syntax error. </p>
+ *
+ * <p>Single quotes are used to quote the special characters <code>=><{}[]()|</code>.
+ * To specify a single quote itself, inside or outside of quotes, use two single quotes in a
+ * row. For example, the rule "<code>'>'>o''clock</code>" changes the string
+ * "<code>></code>" to the string "<code>o'clock</code>". </p>
+ *
+ * <p><b>Notes</b> </p>
+ *
+ * <p>While a RuleBasedTransliterator is being built, it checks that the rules are added in
+ * proper order. For example, if the rule "a>x" is followed by the rule
+ * "ab>y", then the second rule will throw an exception. The reason is that the
+ * second rule can never be triggered, since the first rule always matches anything it
+ * matches. In other words, the first rule <em>masks</em> the second rule. </p>
+ *
+ * <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
+ *
+ * @author Alan Liu
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.10 $ $Date: 2000/01/13 23:53:23 $
+ *
+ * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.10 2000/01/13 23:53:23 Alan
+ * Fix bugs found during ICU port
+ *
+ * Revision 1.9 2000/01/11 04:12:06 Alan
+ * Cleanup, embellish comments
+ *
+ * Revision 1.8 2000/01/11 02:25:03 Alan
+ * Rewrite UnicodeSet and RBT parsers for better performance and new syntax
+ *
+ * Revision 1.7 2000/01/06 01:36:36 Alan
+ * Allow string arrays in rule resource bundles
+ *
+ * Revision 1.6 2000/01/04 21:43:57 Alan
+ * Add rule indexing, and move masking check to TransliterationRuleSet.
+ *
+ * Revision 1.5 1999/12/22 01:40:54 Alan
+ * Consolidate rule pattern anteContext, key, and postContext into one string.
+ *
+ * Revision 1.4 1999/12/22 01:05:54 Alan
+ * Improve masking checking; turn it off by default, for better performance
+ */
+public class RuleBasedTransliterator extends Transliterator {
+ /**
+ * Direction constant passed to constructor to create a transliterator
+ * using the forward rules.
+ */
+ public static final int FORWARD = 0;
+
+ /**
+ * Direction constant passed to constructor to create a transliterator
+ * using the reverse rules.
+ */
+ public static final int REVERSE = 1;
+
+ private Data data;
+
+ static final boolean DEBUG = false;
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Constructs a new transliterator from the given rules.
+ * @param rules rules, separated by ';'
+ * @param direction either FORWARD or REVERSE.
+ * @exception IllegalArgumentException if rules are malformed
+ * or direction is invalid.
+ */
+ public RuleBasedTransliterator(String ID, String rules, int direction,
+ UnicodeFilter filter) {
+ super(ID, filter);
+ if (direction != FORWARD && direction != REVERSE) {
+ throw new IllegalArgumentException("Invalid direction");
+ }
+ data = parse(rules, direction);
+ }
+
+ /**
+ * Constructs a new transliterator from the given rules in the
+ * <code>FORWARD</code> direction.
+ * @param rules rules, separated by ';'
+ * @exception IllegalArgumentException if rules are malformed
+ * or direction is invalid.
+ */
+ public RuleBasedTransliterator(String ID, String rules) {
+ this(ID, rules, FORWARD, null);
+ }
+
+ RuleBasedTransliterator(String ID, Data data, UnicodeFilter filter) {
+ super(ID, filter);
+ this.data = data;
+ }
+
+ static Data parse(String[] rules, int direction) {
+ return new Parser(rules, direction).getData();
+ }
+
+ static Data parse(String rules, int direction) {
+ return parse(new String[] { rules }, direction);
+ }
+
+ /**
+ * Transliterates a segment of a string. <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result buffer to receive the transliterated text; previous
+ * contents are discarded
+ */
+ public void transliterate(String text, int start, int limit,
+ StringBuffer result) {
+ /* In the following loop there is a virtual buffer consisting of the
+ * text transliterated so far followed by the untransliterated text. There is
+ * also a cursor, which may be in the already transliterated buffer or just
+ * before the untransliterated text.
+ *
+ * Example: rules 1. ab>x|y
+ * 2. yc>z
+ *
+ * []|eabcd start - no match, copy e to tranlated buffer
+ * [e]|abcd match rule 1 - copy output & adjust cursor
+ * [ex|y]cd match rule 2 - copy output & adjust cursor
+ * [exz]|d no match, copy d to transliterated buffer
+ * [exzd]| done
+ *
+ * cursor: an index into the virtual buffer, 0..result.length()-1.
+ * Matches take place at the cursor. If there is no match, the cursor
+ * is advanced, and one character is moved from the source text to the
+ * result buffer.
+ *
+ * start, limit: these designate the substring of the source text which
+ * has not been processed yet. The range of offsets is start..limit-1.
+ * At any moment the virtual buffer consists of result +
+ * text.substring(start, limit).
+ */
+ int cursor = 0;
+ result.setLength(0);
+ while (start < limit || cursor < result.length()) {
+ TransliterationRule r = data.ruleSet.findMatch(text, start, limit, result,
+ cursor, data.setVariables, getFilter());
+ if (DEBUG) {
+ StringBuffer buf = new StringBuffer(
+ result.toString() + '#' + text.substring(start, limit));
+ buf.insert(cursor <= result.length()
+ ? cursor : (cursor + 1),
+ '|');
+ System.err.print((r == null ? "nomatch:" : ("match:" + r + ", "))
+ + buf);
+ }
+
+ if (r == null) {
+ if (cursor == result.length()) {
+ result.append(text.charAt(start++));
+ }
+ ++cursor;
+ } else {
+ // resultPad is length of result to right of cursor; >= 0
+ int resultPad = result.length() - cursor;
+ char[] tail = null;
+ if (r.getKeyLength() > resultPad) {
+ start += r.getKeyLength() - resultPad;
+ } else if (r.getKeyLength() < resultPad) {
+ tail = new char[resultPad - r.getKeyLength()];
+ result.getChars(cursor + r.getKeyLength(), result.length(),
+ tail, 0);
+ }
+ result.setLength(cursor);
+ result.append(r.getOutput());
+ if (tail != null) {
+ result.append(tail);
+ }
+ cursor += r.getCursorPos();
+ }
+
+ if (DEBUG) {
+ StringBuffer buf = new StringBuffer(
+ result.toString() + '#' + text.substring(start, limit));
+ buf.insert(cursor <= result.length()
+ ? cursor : (cursor + 1),
+ '|');
+ System.err.println(" => " + buf);
+ }
+ }
+ }
+
+ /**
+ * Transliterates a segment of a string. <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @return The new limit index
+ */
+ public int transliterate(Replaceable text, int start, int limit) {
+ /* When using Replaceable, the algorithm is simpler, since we don't have
+ * two separate buffers. We keep start and limit fixed the entire time,
+ * relative to the text -- limit may move numerically if text is
+ * inserted or removed. The cursor moves from start to limit, with
+ * replacements happening under it.
+ *
+ * Example: rules 1. ab>x|y
+ * 2. yc>z
+ *
+ * |eabcd start - no match, advance cursor
+ * e|abcd match rule 1 - change text & adjust cursor
+ * ex|ycd match rule 2 - change text & adjust cursor
+ * exz|d no match, advance cursor
+ * exzd| done
+ */
+ int cursor = start;
+ while (cursor < limit) {
+ TransliterationRule r = data.ruleSet.findMatch(text, start, limit,
+ cursor, data.setVariables, getFilter());
+ if (r == null) {
+ ++cursor;
+ } else {
+ text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
+ limit += r.getOutput().length() - r.getKeyLength();
+ cursor += r.getCursorPos();
+ }
+ }
+ return limit;
+ }
+
+ /**
+ * Implements {@link Transliterator#handleKeyboardTransliterate}.
+ */
+ protected void handleKeyboardTransliterate(Replaceable text,
+ int[] index) {
+ int start = index[START];
+ int limit = index[LIMIT];
+ int cursor = index[CURSOR];
+
+ if (DEBUG) {
+ System.out.print("\"" +
+ escape(rsubstring(text, start, cursor)) + '|' +
+ escape(rsubstring(text, cursor, limit)) + "\"");
+ }
+
+ boolean partial[] = new boolean[1];
+
+ while (cursor < limit) {
+ TransliterationRule r = data.ruleSet.findIncrementalMatch(
+ text, start, limit, cursor, data.setVariables, partial, getFilter());
+ /* If we match a rule then apply it by replacing the key
+ * with the rule output and repositioning the cursor
+ * appropriately. If we get a partial match, then we
+ * can't do anything without more text; return with the
+ * cursor at the current position. If we get null, then
+ * there is no match at this position, and we can advance
+ * the cursor.
+ */
+ if (r == null) {
+ if (partial[0]) {
+ break;
+ } else {
+ ++cursor;
+ }
+ } else {
+ text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
+ limit += r.getOutput().length() - r.getKeyLength();
+ cursor += r.getCursorPos();
+ }
+ }
+
+ if (DEBUG) {
+ System.out.println(" -> \"" +
+ escape(rsubstring(text, start, cursor)) + '|' +
+ escape(rsubstring(text, cursor, cursor)) + '|' +
+ escape(rsubstring(text, cursor, limit)) + "\"");
+ }
+
+ index[LIMIT] = limit;
+ index[CURSOR] = cursor;
+ }
+
+ /**
+ * Returns the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context.
+ * @return Maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+ protected int getMaximumContextLength() {
+ return data.ruleSet.getMaximumContextLength();
+ }
+
+
+ /**
+ * FOR DEBUGGING: Return a substring of a Replaceable.
+ */
+ private static String rsubstring(Replaceable r, int start, int limit) {
+ StringBuffer buf = new StringBuffer();
+ while (start < limit) {
+ buf.append(r.charAt(start++));
+ }
+ return buf.toString();
+ }
+
+ /**
+ * FOR DEBUGGING: Escape non-ASCII characters as Unicode.
+ */
+ private static final String escape(String s) {
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i<s.length(); ++i) {
+ char c = s.charAt(i);
+ if (c >= ' ' && c <= 0x007F) {
+ if (c == '\\') {
+ buf.append("\\\\"); // That is, "\\"
+ } else {
+ buf.append(c);
+ }
+ } else {
+ buf.append("\\u");
+ if (c < 0x1000) {
+ buf.append('0');
+ if (c < 0x100) {
+ buf.append('0');
+ if (c < 0x10) {
+ buf.append('0');
+ }
+ }
+ }
+ buf.append(Integer.toHexString(c));
+ }
+ }
+ return buf.toString();
+ }
+
+
+
+
+
+ static class Data {
+ public Data() {
+ variableNames = new Hashtable();
+ setVariables = new Hashtable();
+ ruleSet = new TransliterationRuleSet();
+ }
+
+ /**
+ * Rule table. May be empty.
+ */
+ public TransliterationRuleSet ruleSet;
+
+ /**
+ * Map variable name (String) to variable (Character). A variable
+ * name may correspond to a single literal character, in which
+ * case the character is stored in this hash. It may also
+ * correspond to a UnicodeSet, in which case a character is
+ * again stored in this hash, but the character is a stand-in: it
+ * is a key for a secondary lookup in data.setVariables. The stand-in
+ * also represents the UnicodeSet in the stored rules.
+ */
+ public Hashtable variableNames;
+
+ /**
+ * Map category variable (Character) to set (UnicodeSet).
+ * Variables that correspond to a set of characters are mapped
+ * from variable name to a stand-in character in data.variableNames.
+ * The stand-in then serves as a key in this hash to lookup the
+ * actual UnicodeSet object. In addition, the stand-in is
+ * stored in the rule text to represent the set of characters.
+ */
+ public Hashtable setVariables;
+ }
+
+
+
+
+
+
+ private static class Parser {
+ /**
+ * Current rule being parsed.
+ */
+ private String rules;
+
+ private int direction;
+
+ private Data data;
+
+ /**
+ * The next available stand-in for variables. This starts at some point in
+ * the private use area (discovered dynamically) and increments up toward
+ * <code>variableLimit</code>. At any point during parsing, available
+ * variables are <code>variableNext..variableLimit-1</code>.
+ */
+ private char variableNext;
+
+ /**
+ * The last available stand-in for variables. This is discovered
+ * dynamically. At any point during parsing, available variables are
+ * <code>variableNext..variableLimit-1</code>.
+ */
+ private char variableLimit;
+
+ // Operators
+ private static final char VARIABLE_DEF_OP = '=';
+ private static final char FORWARD_RULE_OP = '>';
+ private static final char REVERSE_RULE_OP = '<';
+ private static final char FWDREV_RULE_OP = '~'; // internal rep of <> op
+
+ private static final String OPERATORS = "=><";
+
+ // Other special characters
+ private static final char QUOTE = '\'';
+ private static final char ESCAPE = '\\';
+ private static final char END_OF_RULE = ';';
+ private static final char RULE_COMMENT_CHAR = '#';
+
+ private static final char VARIABLE_REF_OPEN = '{';
+ private static final char VARIABLE_REF_CLOSE = '}';
+ private static final char CONTEXT_OPEN = '(';
+ private static final char CONTEXT_CLOSE = ')';
+ private static final char SET_OPEN = '[';
+ private static final char SET_CLOSE = ']';
+ private static final char CURSOR_POS = '|';
+
+ /**
+ * @param rules list of rules, separated by semicolon characters
+ * @exception IllegalArgumentException if there is a syntax error in the
+ * rules
+ */
+ public Parser(String[] ruleArray, int direction) {
+ this.direction = direction;
+ data = new Data();
+ parseRules(ruleArray);
+ }
+
+ public Data getData() {
+ return data;
+ }
+
+ /**
+ * Parse an array of zero or more rules. The strings in the array are
+ * treated as if they were concatenated together, with rule terminators
+ * inserted between array elements if not present already.
+ *
+ * Any previous rules are discarded. Typically this method is called exactly
+ * once, during construction.
+ * @exception IllegalArgumentException if there is a syntax error in the
+ * rules
+ */
+ private void parseRules(String[] ruleArray) {
+ determineVariableRange(ruleArray);
+
+ StringBuffer errors = null;
+
+ try {
+ for (int i=0; i<ruleArray.length; ++i) {
+ String rule = ruleArray[i];
+ int pos = 0;
+ int limit = rule.length();
+ while (pos < limit) {
+ char c = rule.charAt(pos++);
+ if (Character.isWhitespace(c)) {
+ // Ignore leading whitespace. Note that this is not
+ // Unicode spaces, but Java spaces -- a subset,
+ // representing whitespace likely to be seen in code.
+ continue;
+ }
+ // Skip lines starting with the comment character
+ if (c == RULE_COMMENT_CHAR) {
+ pos = rule.indexOf("\n", pos) + 1;
+ if (pos == 0) {
+ break; // No "\n" found; rest of rule is a commnet
+ }
+ continue; // Either fall out or restart with next line
+ }
+ // We've found the start of a rule. c is its first
+ // character, and pos points past c. Lexically parse the
+ // rule into component pieces.
+ pos = parseRule(rule, --pos, limit);
+ }
+ }
+ } catch (IllegalArgumentException e) {
+ // errors = new StringBuffer(e.getMessage());
+ }
+
+ // Index the rules
+ try {
+ data.ruleSet.freeze(data.setVariables);
+ } catch (IllegalArgumentException e) {
+ if (errors == null) {
+ errors = new StringBuffer(e.getMessage());
+ } else {
+ errors.append("\n").append(e.getMessage());
+ }
+ }
+
+ if (errors != null) {
+ throw new IllegalArgumentException(errors.toString());
+ }
+ }
+
+ /**
+ * MAIN PARSER. Parse the next rule in the given rule string, starting
+ * at pos. Return the index after the last character parsed. Do not
+ * parse characters at or after limit.
+ *
+ * Important: The character at pos must be a non-whitespace character
+ * that is not the comment character.
+ *
+ * This method handles quoting, escaping, and whitespace removal. It
+ * parses the end-of-rule character. It recognizes context and cursor
+ * indicators. Once it does a lexical breakdown of the rule at pos, it
+ * creates a rule object and adds it to our rule list.
+ */
+ private int parseRule(String rule, int pos, int limit) {
+ // Locate the left side, operator, and right side
+ int start = pos;
+ char operator = 0;
+
+ StringBuffer buf = new StringBuffer();
+ int cursor = -1; // position of cursor in buf
+ int ante = -1; // position of ante context marker ')' in buf
+ int post = -1; // position of post context marker '(' in buf
+ int postClose = -1; // position of post context close ')' in buf
+
+ // Assigned to buf and its adjuncts after the LHS has been
+ // parsed. Thereafter, buf etc. refer to the RHS.
+ String left = null;
+ int leftCursor = -1, leftAnte = -1, leftPost = -1, leftPostClose = -1;
+
+ main:
+ while (pos < limit) {
+ char c = rule.charAt(pos++);
+ if (Character.isWhitespace(c)) {
+ // Ignore whitespace. Note that this is not Unicode
+ // spaces, but Java spaces -- a subset, representing
+ // whitespace likely to be seen in code.
+ continue;
+ }
+ // Handle escapes
+ if (c == ESCAPE) {
+ if (pos == limit) {
+ syntaxError("Trailing backslash", rule, start);
+ }
+ buf.append(rule.charAt(pos++));
+ continue;
+ }
+ // Handle quoted matter
+ if (c == QUOTE) {
+ int iq = rule.indexOf(QUOTE, pos);
+ if (iq == pos) {
+ buf.append(c); // Parse [''] outside quotes as [']
+ ++pos;
+ } else {
+ /* This loop picks up a segment of quoted text of the
+ * form 'aaaa' each time through. If this segment
+ * hasn't really ended ('aaaa''bbbb') then it keeps
+ * looping, each time adding on a new segment. When it
+ * reaches the final quote it breaks.
+ */
+ for (;;) {
+ if (iq < 0) {
+ syntaxError("Unterminated quote", rule, start);
+ }
+ buf.append(rule.substring(pos, iq));
+ pos = iq+1;
+ if (pos < limit && rule.charAt(pos) == QUOTE) {
+ // Parse [''] inside quotes as [']
+ iq = rule.indexOf(QUOTE, pos+1);
+ // Continue looping
+ } else {
+ break;
+ }
+ }
+ }
+ continue;
+ }
+ if (OPERATORS.indexOf(c) >= 0) {
+ if (operator != 0) {
+ syntaxError("Unquoted " + c, rule, start);
+ }
+ // Found an operator char. Check for forward-reverse operator.
+ if (c == REVERSE_RULE_OP &&
+ (pos < limit && rule.charAt(pos) == FORWARD_RULE_OP)) {
+ ++pos;
+ operator = FWDREV_RULE_OP;
+ } else {
+ operator = c;
+ }
+ left = buf.toString(); // lhs
+ leftCursor = cursor;
+ leftAnte = ante;
+ leftPost = post;
+ leftPostClose = postClose;
+
+ buf.setLength(0);
+ cursor = ante = post = postClose = -1;
+ continue;
+ }
+ switch (c) {
+ case END_OF_RULE:
+ break main;
+ case VARIABLE_REF_OPEN:
+ {
+ int j = rule.indexOf(VARIABLE_REF_CLOSE, pos);
+ if (pos == j || j < 0) { // empty or unterminated
+ syntaxError("Malformed variable reference", rule, start);
+ }
+ String name = rule.substring(pos, j);
+ pos = j+1;
+ buf.append(getVariableDef(name));
+ }
+ break;
+ case CONTEXT_OPEN:
+ if (post >= 0) {
+ syntaxError("Multiple post contexts", rule, start);
+ }
+ // Ignore CONTEXT_OPEN if buffer length is zero -- that means
+ // this is the optional opening delimiter for the ante context.
+ if (buf.length() > 0) {
+ post = buf.length();
+ }
+ break;
+ case CONTEXT_CLOSE:
+ if (postClose >= 0) {
+ syntaxError("Unexpected " + c, rule, start);
+ }
+ if (post >= 0) {
+ // This is probably the optional closing delimiter
+ // for the post context; save the pos and check later.
+ postClose = buf.length();
+ } else if (ante >= 0) {
+ syntaxError("Multiple ante contexts", rule, start);
+ } else {
+ ante = buf.length();
+ }
+ break;
+ case SET_OPEN:
+ ParsePosition pp = new ParsePosition(pos-1); // Backup to opening '['
+ buf.append(registerSet(new UnicodeSet(rule, pp,
+ data.variableNames, data.setVariables)));
+ pos = pp.getIndex();
+ break;
+ case VARIABLE_REF_CLOSE:
+ case SET_CLOSE:
+ syntaxError("Unquoted " + c, rule, start);
+ case CURSOR_POS:
+ if (cursor >= 0) {
+ syntaxError("Multiple cursors", rule, start);
+ }
+ cursor = buf.length();
+ break;
+ default:
+ buf.append(c);
+ break;
+ }
+ }
+ if (operator == 0) {
+ syntaxError("No operator", rule, start);
+ }
+
+ // Check context close parameters
+ if ((leftPostClose >= 0 && leftPostClose != left.length()) ||
+ (postClose >= 0 && postClose != buf.length())) {
+ syntaxError("Extra text after ]", rule, start);
+ }
+
+ // Context is only allowed on the input side; that is, the left side
+ // for forward rules. Cursors are only allowed on the output side;
+ // that is, the right side for forward rules. Bidirectional rules
+ // ignore elements that do not apply.
+
+ switch (operator) {
+ case VARIABLE_DEF_OP:
+ // LHS is the name. RHS is a single character, either a literal
+ // or a set (already parsed). If RHS is longer than one
+ // character, it is either a multi-character string, or multiple
+ // sets, or a mixture of chars and sets -- syntax error.
+ if (buf.length() != 1) {
+ syntaxError("Malformed RHS", rule, start);
+ }
+ if (data.variableNames.get(left) != null) {
+ syntaxError("Duplicate definition of {" +
+ left + "}", rule, start);
+ }
+ data.variableNames.put(left, new Character(buf.charAt(0)));
+ break;
+
+ case FORWARD_RULE_OP:
+ if (direction == FORWARD) {
+ if (ante >= 0 || post >= 0 || leftCursor >= 0) {
+ syntaxError("Malformed rule", rule, start);
+ }
+ data.ruleSet.addRule(new TransliterationRule(
+ left, leftAnte, leftPost,
+ buf.toString(), cursor));
+ } // otherwise ignore the rule; it's not the direction we want
+ break;
+
+ case REVERSE_RULE_OP:
+ if (direction == REVERSE) {
+ if (leftAnte >= 0 || leftPost >= 0 || cursor >= 0) {
+ syntaxError("Malformed rule", rule, start);
+ }
+ data.ruleSet.addRule(new TransliterationRule(
+ buf.toString(), ante, post,
+ left, leftCursor));
+ } // otherwise ignore the rule; it's not the direction we want
+ break;
+
+ case FWDREV_RULE_OP:
+ if (direction == FORWARD) {
+ // The output side is the right; trim off any context
+ String output = buf.toString().substring(ante < 0 ? 0 : ante,
+ post < 0 ? buf.length() : post);
+ data.ruleSet.addRule(new TransliterationRule(
+ left, leftAnte, leftPost,
+ output, cursor));
+ } else {
+ // The output side is the left; trim off any context
+ String output = left.substring(leftAnte < 0 ? 0 : leftAnte,
+ leftPost < 0 ? left.length() : leftPost);
+ data.ruleSet.addRule(new TransliterationRule(
+ buf.toString(), ante, post,
+ output, leftCursor));
+ }
+ break;
+ }
+
+ return pos;
+ }
+
+ /**
+ * Throw an exception indicating a syntax error. Search the rule string
+ * for the probable end of the rule. Of course, if the error is that
+ * the end of rule marker is missing, then the rule end will not be found.
+ * In any case the rule start will be correctly reported.
+ * @param msg error description
+ * @param rule pattern string
+ * @param start position of first character of current rule
+ */
+ private static final void syntaxError(String msg, String rule, int start) {
+ int end = quotedIndexOf(rule, start, rule.length(), ";");
+ if (end < 0) {
+ end = rule.length();
+ }
+ throw new IllegalArgumentException(msg + " in " +
+ rule.substring(start, end));
+ }
+
+ /**
+ * Allocate a private-use substitution character for the given set,
+ * register it in the setVariables hash, and return the substitution
+ * character.
+ */
+ private final char registerSet(UnicodeSet set) {
+ if (variableNext >= variableLimit) {
+ throw new RuntimeException("Private use variables exhausted");
+ }
+ Character c = new Character(variableNext++);
+ data.setVariables.put(c, set);
+ return c.charValue();
+ }
+
+ /**
+ * Returns the single character value of the given variable name. Defined
+ * names are recognized.
+ * @exception IllegalArgumentException if the name is unknown.
+ */
+ private char getVariableDef(String name) {
+ Character ch = (Character) data.variableNames.get(name);
+ if (ch == null) {
+ throw new IllegalArgumentException("Undefined variable: "
+ + name);
+ }
+ return ch.charValue();
+ }
+
+ /**
+ * Determines what part of the private use region of Unicode we can use for
+ * variable stand-ins. The correct way to do this is as follows: Parse each
+ * rule, and for forward and reverse rules, take the FROM expression, and
+ * make a hash of all characters used. The TO expression should be ignored.
+ * When done, everything not in the hash is available for use. In practice,
+ * this method may employ some other algorithm for improved speed.
+ */
+ private final void determineVariableRange(String[] ruleArray) {
+ // As an initial implementation, we just run through all the
+ // characters, ignoring any quoting. This works since the quote
+ // mechanisms are outside the private use area.
+
+ Range r = new Range('\uE000', 0x1900); // Private use area
+ r = r.largestUnusedSubrange(ruleArray);
+
+ if (r == null) {
+ throw new RuntimeException(
+ "No private use characters available for variables");
+ }
+
+ variableNext = r.start;
+ variableLimit = (char) (r.start + r.length);
+
+ if (variableNext >= variableLimit) {
+ throw new RuntimeException(
+ "Too few private use characters available for variables");
+ }
+ }
+
+ /**
+ * Returns the index of the first character in a set, ignoring quoted text.
+ * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
+ * found by a search for "h". Unlike String.indexOf(), this method searches
+ * not for a single character, but for any character of the string
+ * <code>setOfChars</code>.
+ * @param text text to be searched
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param setOfChars string with one or more distinct characters
+ * @return Offset of the first character in <code>setOfChars</code>
+ * found, or -1 if not found.
+ * @see #indexOf
+ */
+ private static int quotedIndexOf(String text, int start, int limit,
+ String setOfChars) {
+ for (int i=start; i<limit; ++i) {
+ char c = text.charAt(i);
+ if (c == ESCAPE) {
+ ++i;
+ } else if (c == QUOTE) {
+ while (++i < limit
+ && text.charAt(i) != QUOTE) {}
+ } else if (setOfChars.indexOf(c) >= 0) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+
+
+ /**
+ * A range of Unicode characters. Support the operations of testing for
+ * inclusion (does this range contain this character?) and splitting.
+ * Splitting involves breaking a range into two smaller ranges around a
+ * character inside the original range. The split character is not included
+ * in either range. If the split character is at either extreme end of the
+ * range, one of the split products is an empty range.
+ *
+ * This class is used internally to determine the largest available private
+ * use character range for variable stand-ins.
+ */
+ private static class Range implements Cloneable {
+ char start;
+ int length;
+
+ Range(char start, int length) {
+ this.start = start;
+ this.length = length;
+ }
+
+ public Object clone() {
+ return new Range(start, length);
+ }
+
+ boolean contains(char c) {
+ return c >= start && (c - start) < length;
+ }
+
+ /**
+ * Assume that contains(c) is true. Split this range into two new
+ * ranges around the character c. Make this range one of the new ranges
+ * (modify it in place) and return the other new range. The character
+ * itself is not included in either range. If the split results in an
+ * empty range (that is, if c == start or c == start + length - 1) then
+ * return null.
+ */
+ Range split(char c) {
+ if (c == start) {
+ ++start;
+ --length;
+ return null;
+ } else if (c - start == length - 1) {
+ --length;
+ return null;
+ } else {
+ ++c;
+ Range r = new Range(c, start + length - c);
+ length = --c - start;
+ return r;
+ }
+ }
+
+ /**
+ * Finds the largest unused subrange by the given string. A
+ * subrange is unused by a string if the string contains no
+ * characters in that range. If the given string contains no
+ * characters in this range, then this range itself is
+ * returned.
+ */
+ Range largestUnusedSubrange(String[] strings) {
+ Vector v = new Vector(1);
+ v.addElement(clone());
+
+ for (int k=0; k<strings.length; ++k) {
+ String str = strings[k];
+ int n = str.length();
+ for (int i=0; i<n; ++i) {
+ char c = str.charAt(i);
+ if (contains(c)) {
+ for (int j=0; j<v.size(); ++j) {
+ Range r = (Range) v.elementAt(j);
+ if (r.contains(c)) {
+ r = r.split(c);
+ if (r != null) {
+ v.addElement(r);
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ Range bestRange = null;
+ for (int j=0; j<v.size(); ++j) {
+ Range r = (Range) v.elementAt(j);
+ if (bestRange == null || r.length > bestRange.length) {
+ bestRange = r;
+ }
+ }
+
+ return bestRange;
+ }
+ }
+ }
+}
diff --git a/src/com/ibm/text/TransliterationRule.java b/src/com/ibm/text/TransliterationRule.java
new file mode 100755
index 0000000..518b385
--- /dev/null
+++ b/src/com/ibm/text/TransliterationRule.java
@@ -0,0 +1,552 @@
+package com.ibm.text;
+
+import java.util.Dictionary;
+
+/**
+ * A transliteration rule used by
+ * <code>RuleBasedTransliterator</code>.
+ * <code>TransliterationRule</code> is an immutable object.
+ *
+ * <p>A rule consists of an input pattern and an output string. When
+ * the input pattern is matched, the output string is emitted. The
+ * input pattern consists of zero or more characters which are matched
+ * exactly (the key) and optional context. Context must match if it
+ * is specified. Context may be specified before the key, after the
+ * key, or both. The key, preceding context, and following context
+ * may contain variables. Variables represent a set of Unicode
+ * characters, such as the letters <i>a</i> through <i>z</i>.
+ * Variables are detected by looking up each character in a supplied
+ * variable list to see if it has been so defined.
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.8 $ $Date: 2000/01/13 23:53:23 $
+ *
+ * $Log: TransliterationRule.java,v $
+ * Revision 1.8 2000/01/13 23:53:23 Alan
+ * Fix bugs found during ICU port
+ *
+ * Revision 1.7 2000/01/11 04:12:06 Alan
+ * Cleanup, embellish comments
+ *
+ * Revision 1.6 2000/01/11 02:25:03 Alan
+ * Rewrite UnicodeSet and RBT parsers for better performance and new syntax
+ *
+ * Revision 1.5 2000/01/04 21:43:57 Alan
+ * Add rule indexing, and move masking check to TransliterationRuleSet.
+ *
+ * Revision 1.4 1999/12/22 01:40:54 Alan
+ * Consolidate rule pattern anteContext, key, and postContext into one string.
+ *
+ * Revision 1.3 1999/12/22 01:05:54 Alan
+ * Improve masking checking; turn it off by default, for better performance
+ *
+ * Revision 1.2 1999/12/21 23:58:44 Alan
+ * Detect a>x masking a>y
+ *
+ */
+class TransliterationRule {
+ /**
+ * Constant returned by <code>getMatchDegree()</code> indicating a mismatch
+ * between the text and this rule. One or more characters of the context or
+ * key do not match the text.
+ * @see #getMatchDegree
+ */
+ public static final int MISMATCH = 0;
+
+ /**
+ * Constant returned by <code>getMatchDegree()</code> indicating a partial
+ * match between the text and this rule. All characters of the text match
+ * the corresponding context or key, but more characters are required for a
+ * complete match. There are some key or context characters at the end of
+ * the pattern that remain unmatched because the text isn't long enough.
+ * @see #getMatchDegree
+ */
+ public static final int PARTIAL_MATCH = 1;
+
+ /**
+ * Constant returned by <code>getMatchDegree()</code> indicating a complete
+ * match between the text and this rule. The text matches all context and
+ * key characters.
+ * @see #getMatchDegree
+ */
+ public static final int FULL_MATCH = 2;
+
+ /**
+ * The string that must be matched, consisting of the anteContext, key,
+ * and postContext, concatenated together, in that order. Some components
+ * may be empty (zero length).
+ * @see anteContextLength
+ * @see keyLength
+ */
+ private String pattern;
+
+ /**
+ * The string that is emitted if the key, anteContext, and postContext
+ * are matched.
+ */
+ private String output;
+
+ /**
+ * The length of the string that must match before the key. If
+ * zero, then there is no matching requirement before the key.
+ * Substring [0,anteContextLength) of pattern is the anteContext.
+ */
+ private int anteContextLength;
+
+ /**
+ * The length of the key. Substring [anteContextLength,
+ * anteContextLength + keyLength) is the key.
+ */
+ private int keyLength;
+
+ /**
+ * The position of the cursor after emitting the output string, from 0 to
+ * output.length(). For most rules with no special cursor specification,
+ * the cursorPos is output.length().
+ */
+ private int cursorPos;
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Construct a new rule with the given input, output text, and other
+ * attributes. A cursor position may be specified for the output text.
+ * @param input input string, including key and optional ante and
+ * post context
+ * @param anteContextPos offset into input to end of ante context, or -1 if
+ * none. Must be <= input.length() if not -1.
+ * @param postContextPos offset into input to start of post context, or -1
+ * if none. Must be <= input.length() if not -1, and must be >=
+ * anteContextPos.
+ * @param output output string
+ * @param cursorPos offset into output at which cursor is located, or -1 if
+ * none. If less than zero, then the cursor is placed after the
+ * <code>output</code>; that is, -1 is equivalent to
+ * <code>output.length()</code>. If greater than
+ * <code>output.length()</code> then an exception is thrown.
+ */
+ public TransliterationRule(String input,
+ int anteContextPos, int postContextPos,
+ String output,
+ int cursorPos) {
+ // Do range checks only when warranted to save time
+ if (anteContextPos < 0) {
+ anteContextLength = 0;
+ } else {
+ if (anteContextPos > input.length()) {
+ throw new IllegalArgumentException("Invalid ante context");
+ }
+ anteContextLength = anteContextPos;
+ }
+ if (postContextPos < 0) {
+ keyLength = input.length() - anteContextLength;
+ } else {
+ if (postContextPos < anteContextLength ||
+ postContextPos > input.length()) {
+ throw new IllegalArgumentException("Invalid post context");
+ }
+ keyLength = postContextPos - anteContextLength;
+ }
+ if (cursorPos < 0) {
+ this.cursorPos = output.length();
+ } else {
+ if (cursorPos > output.length()) {
+ throw new IllegalArgumentException("Invalid cursor position");
+ }
+ this.cursorPos = cursorPos;
+ }
+ pattern = input;
+ this.output = output;
+ }
+
+ /**
+ * Return the length of the key. Equivalent to <code>getKey().length()</code>.
+ * @return the length of the match key.
+ */
+ public int getKeyLength() {
+ return keyLength;
+ }
+
+ /**
+ * Return the output string.
+ * @return the output string.
+ */
+ public String getOutput() {
+ return output;
+ }
+
+ /**
+ * Return the position of the cursor within the output string.
+ * @return a value from 0 to <code>getOutput().length()</code>, inclusive.
+ */
+ public int getCursorPos() {
+ return cursorPos;
+ }
+
+ /**
+ * Return the preceding context length. This method is needed to
+ * support the <code>Transliterator</code> method
+ * <code>getMaximumContextLength()</code>.
+ */
+ public int getAnteContextLength() {
+ return anteContextLength;
+ }
+
+ /**
+ * Internal method. Returns 8-bit index value for this rule.
+ * This is the low byte of the first character of the key,
+ * unless the first character of the key is a set. If it's a
+ * set, or otherwise can match multiple keys, the index value is -1.
+ */
+ final int getIndexValue(Dictionary variables) {
+ if (anteContextLength == pattern.length()) {
+ // A pattern with just ante context {such as foo)>bar} can
+ // match any key.
+ return -1;
+ }
+ char c = pattern.charAt(anteContextLength);
+ return variables.get(new Character(c)) == null ? (c & 0xFF) : -1;
+ }
+
+ /**
+ * Internal method. Returns true if this rule matches the given
+ * index value. The index value is an 8-bit integer, 0..255,
+ * representing the low byte of the first character of the key.
+ * It matches this rule if it matches the first character of the
+ * key, or if the first character of the key is a set, and the set
+ * contains any character with a low byte equal to the index
+ * value. If the rule contains only ante context, as in foo)>bar,
+ * then it will match any key.
+ */
+ final boolean matchesIndexValue(int v, Dictionary variables) {
+ if (anteContextLength == pattern.length()) {
+ // A pattern with just ante context {such as foo)>bar} can
+ // match any key.
+ return true;
+ }
+ char c = pattern.charAt(anteContextLength);
+ UnicodeSet set = (UnicodeSet) variables.get(new Character(c));
+ return set == null ? (c & 0xFF) == v : set.containsIndexValue(v);
+ }
+
+ /**
+ * Return true if this rule masks another rule. If r1 masks r2 then
+ * r1 matches any input string that r2 matches. If r1 masks r2 and r2 masks
+ * r1 then r1 == r2. Examples: "a>x" masks "ab>y". "a>x" masks "a[b]>y".
+ * "[c]a>x" masks "[dc]a>y".
+ */
+ public boolean masks(TransliterationRule r2) {
+ /* Rule r1 masks rule r2 if the string formed of the
+ * antecontext, key, and postcontext overlaps in the following
+ * way:
+ *
+ * r1: aakkkpppp
+ * r2: aaakkkkkpppp
+ * ^
+ *
+ * The strings must be aligned at the first character of the
+ * key. The length of r1 to the left of the alignment point
+ * must be <= the length of r2 to the left; ditto for the
+ * right. The characters of r1 must equal (or be a superset
+ * of) the corresponding characters of r2. The superset
+ * operation should be performed to check for UnicodeSet
+ * masking.
+ */
+
+ /* LIMITATION of the current mask algorithm: Some rule
+ * maskings are currently not detected. For example,
+ * "{Lu}]a>x" masks "A]a>y". This can be added later. TODO
+ */
+
+ int left = anteContextLength;
+ int left2 = r2.anteContextLength;
+ int right = pattern.length() - left;
+ int right2 = r2.pattern.length() - left2;
+ return left <= left2 && right <= right2 &&
+ r2.pattern.substring(left2 - left).startsWith(pattern);
+ }
+
+ /**
+ * Return a string representation of this object.
+ * @return string representation of this object
+ */
+ public String toString() {
+ return getClass().getName() + '{'
+ + escape((anteContextLength > 0 ? ("(" + pattern.substring(0, anteContextLength) +
+ ") ") : "")
+ + pattern.substring(anteContextLength, anteContextLength + keyLength)
+ + (anteContextLength + keyLength < pattern.length() ?
+ (" (" + pattern.substring(anteContextLength + keyLength) + ")") : "")
+ + " > "
+ + (cursorPos < output.length()
+ ? (output.substring(0, cursorPos) + '|' + output.substring(cursorPos))
+ : output))
+ + '}';
+ }
+
+ /**
+ * Return true if this rule matches the given text. The text being matched
+ * occupies a virtual buffer consisting of the contents of
+ * <code>result</code> concatenated to a substring of <code>text</code>.
+ * The substring is specified by <code>start</code> and <code>limit</code>.
+ * The value of <code>cursor</code> is an index into this virtual buffer,
+ * from 0 to the length of the buffer. In terms of the parameters,
+ * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+ * start</code>.
+ * @param text the untranslated text
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result translated text so far
+ * @param cursor position at which to translate next, an offset into result.
+ * If greater than or equal to result.length(), represents offset start +
+ * cursor - result.length() into text.
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+ public final boolean matches(String text, int start, int limit,
+ StringBuffer result, int cursor,
+ Dictionary variables,
+ UnicodeFilter filter) {
+ // Match anteContext, key, and postContext
+ return regionMatches(text, start, limit, result,
+ cursor - anteContextLength,
+ pattern, variables, filter);
+ }
+
+ /**
+ * Return true if this rule matches the given text.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text. This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+ public final boolean matches(Replaceable text, int start, int limit,
+ int cursor, Dictionary variables,
+ UnicodeFilter filter) {
+ // Match anteContext, key, and postContext
+ return regionMatches(text, start, limit,
+ cursor - anteContextLength,
+ pattern, variables, filter);
+ }
+
+ /**
+ * Return the degree of match between this rule and the given text. The
+ * degree of match may be mismatch, a partial match, or a full match. A
+ * mismatch means at least one character of the text does not match the
+ * context or key. A partial match means some context and key characters
+ * match, but the text is not long enough to match all of them. A full
+ * match means all context and key characters match.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text. This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
+ * <code>FULL_MATCH</code>.
+ * @see #MISMATCH
+ * @see #PARTIAL_MATCH
+ * @see #FULL_MATCH
+ */
+ public int getMatchDegree(Replaceable text, int start, int limit,
+ int cursor, Dictionary variables,
+ UnicodeFilter filter) {
+ int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength,
+ pattern, variables, filter);
+ return len < anteContextLength ? MISMATCH :
+ (len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
+ }
+
+ /**
+ * Return true if a template matches the text. The entire length of the
+ * template is compared to the text at the cursor. As in
+ * <code>matches()</code>, the text being matched occupies a virtual buffer
+ * consisting of the contents of <code>result</code> concatenated to a
+ * substring of <code>text</code>. See <code>matches()</code> for details.
+ * @param text the untranslated text
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result translated text so far
+ * @param cursor position at which to translate next, an offset into result.
+ * If greater than or equal to result.length(), represents offset start +
+ * cursor - result.length() into text.
+ * @param template the text to match against. All characters must match.
+ * @param variables a dictionary of variables mapping <code>Character</code>
+ * to <code>UnicodeSet</code>
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return true if there is a match
+ */
+ protected static boolean regionMatches(String text, int start, int limit,
+ StringBuffer result, int cursor,
+ String template,
+ Dictionary variables,
+ UnicodeFilter filter) {
+ int rlen = result.length();
+ if (cursor < 0
+ || (cursor + template.length()) > (rlen + limit - start)) {
+ return false;
+ }
+ for (int i=0; i<template.length(); ++i, ++cursor) {
+ if (!charMatches(template.charAt(i),
+ cursor < rlen ? result.charAt(cursor)
+ : text.charAt(cursor - rlen + start),
+ variables, filter)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Return true if a template matches the text. The entire length of the
+ * template is compared to the text at the cursor.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text. This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param template the text to match against. All characters must match.
+ * @param variables a dictionary of variables mapping <code>Character</code>
+ * to <code>UnicodeSet</code>
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return true if there is a match
+ */
+ protected static boolean regionMatches(Replaceable text, int start, int limit,
+ int cursor,
+ String template, Dictionary variables,
+ UnicodeFilter filter) {
+ if (cursor < start
+ || (cursor + template.length()) > limit) {
+ return false;
+ }
+ for (int i=0; i<template.length(); ++i, ++cursor) {
+ if (!charMatches(template.charAt(i), text.charAt(cursor),
+ variables, filter)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Return the number of characters of the text that match this rule. If
+ * there is a mismatch, return -1. If the text is not long enough to match
+ * any characters, return 0.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text. This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param template the text to match against. All characters must match.
+ * @param variables a dictionary of variables mapping <code>Character</code>
+ * to <code>UnicodeSet</code>
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return -1 if there is a mismatch, 0 if the text is not long enough to
+ * match any characters, otherwise the number of characters of text that
+ * match this rule.
+ */
+ protected static int getRegionMatchLength(Replaceable text, int start,
+ int limit, int cursor,
+ String template,
+ Dictionary variables,
+ UnicodeFilter filter) {
+ if (cursor < start) {
+ return -1;
+ }
+ int i;
+ for (i=0; i<template.length() && cursor<limit; ++i, ++cursor) {
+ if (!charMatches(template.charAt(i), text.charAt(cursor),
+ variables, filter)) {
+ return -1;
+ }
+ }
+ return i;
+ }
+
+ /**
+ * Return true if the given key matches the given text. This method
+ * accounts for the fact that the key character may represent a character
+ * set. Note that the key and text characters may not be interchanged
+ * without altering the results.
+ * @param keyChar a character in the match key
+ * @param textChar a character in the text being transliterated
+ * @param variables a dictionary of variables mapping <code>Character</code>
+ * to <code>UnicodeSet</code>
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+ protected static final boolean charMatches(char keyChar, char textChar,
+ Dictionary variables, UnicodeFilter filter) {
+ UnicodeSet set = null;
+ return (filter == null || filter.isIn(textChar)) &&
+ ((set = (UnicodeSet) variables.get(new Character(keyChar)))
+ == null) ?
+ keyChar == textChar : set.contains(textChar);
+ }
+
+ /**
+ * Escape non-ASCII characters as Unicode.
+ */
+ public static final String escape(String s) {
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i<s.length(); ++i) {
+ char c = s.charAt(i);
+ if (c >= ' ' && c <= 0x007F) {
+ buf.append(c);
+ } else {
+ buf.append("\\u");
+ if (c < 0x1000) {
+ buf.append('0');
+ if (c < 0x100) {
+ buf.append('0');
+ if (c < 0x10) {
+ buf.append('0');
+ }
+ }
+ }
+ buf.append(Integer.toHexString(c));
+ }
+ }
+ return buf.toString();
+ }
+}
diff --git a/src/com/ibm/text/TransliterationRuleSet.java b/src/com/ibm/text/TransliterationRuleSet.java
new file mode 100755
index 0000000..b4640bb
--- /dev/null
+++ b/src/com/ibm/text/TransliterationRuleSet.java
@@ -0,0 +1,316 @@
+package com.ibm.text;
+
+import java.util.*;
+
+/**
+ * A set of rules for a <code>RuleBasedTransliterator</code>. This set encodes
+ * the transliteration in one direction from one set of characters or short
+ * strings to another. A <code>RuleBasedTransliterator</code> consists of up to
+ * two such sets, one for the forward direction, and one for the reverse.
+ *
+ * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
+ * finding a matching rule at a given point in the text. This is accomplished
+ * by the <code>findMatch()</code> method.
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.5 $ $Date: 2000/01/04 21:43:57 $
+ *
+ * $Log: TransliterationRuleSet.java,v $
+ * Revision 1.5 2000/01/04 21:43:57 Alan
+ * Add rule indexing, and move masking check to TransliterationRuleSet.
+ *
+ * Revision 1.4 1999/12/22 01:40:54 Alan
+ * Consolidate rule pattern anteContext, key, and postContext into one string.
+ *
+ * Revision 1.3 1999/12/22 01:05:54 Alan
+ * Improve masking checking; turn it off by default, for better performance
+ *
+ * Revision 1.2 1999/12/22 00:01:36 Alan
+ * Detect a>x masking a>y
+ *
+ */
+class TransliterationRuleSet {
+ /**
+ * Vector of rules, in the order added. This is only used while the rule
+ * set is getting built. After that, freeze() reorders and indexes the
+ * rules, and this Vector is freed.
+ */
+ private Vector ruleVector;
+
+ /**
+ * Length of the longest preceding context
+ */
+ private int maxContextLength;
+
+ /**
+ * Sorted and indexed table of rules. This is created by freeze() from
+ * the rules in ruleVector.
+ */
+ private TransliterationRule[] rules;
+
+ /**
+ * Index table. For text having a first character c, compute x = c&0xFF.
+ * Now use rules[index[x]..index[x+1]-1]. This index table is created by
+ * freeze().
+ */
+ private int[] index;
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Construct a new empty rule set.
+ */
+ public TransliterationRuleSet() {
+ ruleVector = new Vector();
+ maxContextLength = 0;
+ }
+
+ /**
+ * Return the maximum context length.
+ * @return the length of the longest preceding context.
+ */
+ public int getMaximumContextLength() {
+ return maxContextLength;
+ }
+
+ /**
+ * Add a rule to this set. Rules are added in order, and order is
+ * significant.
+ * @param rule the rule to add
+ */
+ public void addRule(TransliterationRule rule) {
+ if (ruleVector == null) {
+ throw new IllegalArgumentException("Cannot add rules after freezing");
+ }
+ ruleVector.addElement(rule);
+ int len;
+ if ((len = rule.getAnteContextLength()) > maxContextLength) {
+ maxContextLength = len;
+ }
+ }
+
+ /**
+ * Close this rule set to further additions, check it for masked rules,
+ * and index it to optimize performance. Once this method is called,
+ * addRule() can no longer be called.
+ * @exception IllegalArgumentException if some rules are masked
+ */
+ public void freeze(Dictionary variables) {
+ /* Construct the rule array and index table. We reorder the
+ * rules by sorting them into 256 bins. Each bin contains all
+ * rules matching the index value for that bin. A rule
+ * matches an index value if string whose first key character
+ * has a low byte equal to the index value can match the rule.
+ *
+ * Each bin contains zero or more rules, in the same order
+ * they were found originally. However, the total rules in
+ * the bins may exceed the number in the original vector,
+ * since rules that have a variable as their first key
+ * character will generally fall into more than one bin.
+ *
+ * That is, each bin contains all rules that either have that
+ * first index value as their first key character, or have
+ * a set containing the index value as their first character.
+ */
+ int n = ruleVector.size();
+ index = new int[257]; // [sic]
+ Vector v = new Vector(2*n); // heuristic; adjust as needed
+
+ /* Precompute the index values. This saves a LOT of time.
+ */
+ int[] indexValue = new int[n];
+ for (int j=0; j<n; ++j) {
+ TransliterationRule r = (TransliterationRule) ruleVector.elementAt(j);
+ indexValue[j] = r.getIndexValue(variables);
+ }
+ for (int x=0; x<256; ++x) {
+ index[x] = v.size();
+ for (int j=0; j<n; ++j) {
+ if (indexValue[j] >= 0) {
+ if (indexValue[j] == x) {
+ v.addElement(ruleVector.elementAt(j));
+ }
+ } else {
+ // If the indexValue is < 0, then the first key character is
+ // a set, and we must use the more time-consuming
+ // matchesIndexValue check. In practice this happens
+ // rarely, so we seldom tread this code path.
+ TransliterationRule r = (TransliterationRule) ruleVector.elementAt(j);
+ if (r.matchesIndexValue(x, variables)) {
+ v.addElement(r);
+ }
+ }
+ }
+ }
+ index[256] = v.size();
+
+ /* Freeze things into an array.
+ */
+ rules = new TransliterationRule[v.size()];
+ v.copyInto(rules);
+ ruleVector = null;
+
+ StringBuffer errors = null;
+
+ /* Check for masking. This is MUCH faster than our old check,
+ * which was each rule against each following rule, since we
+ * only have to check for masking within each bin now. It's
+ * 256*O(n2^2) instead of O(n1^2), where n1 is the total rule
+ * count, and n2 is the per-bin rule count. But n2<<n1, so
+ * it's a big win.
+ */
+ for (int x=0; x<256; ++x) {
+ for (int j=index[x]; j<index[x+1]-1; ++j) {
+ TransliterationRule r1 = rules[j];
+ for (int k=j+1; k<index[x+1]; ++k) {
+ TransliterationRule r2 = rules[k];
+ if (r1.masks(r2)) {
+ if (errors == null) {
+ errors = new StringBuffer();
+ } else {
+ errors.append("\n");
+ }
+ errors.append("Rule " + r1 + " masks " + r2);
+ }
+ }
+ }
+ }
+
+ if (errors != null) {
+ throw new IllegalArgumentException(errors.toString());
+ }
+ }
+
+ /**
+ * Attempt to find a matching rule at the specified point in the text. The
+ * text being matched occupies a virtual buffer consisting of the contents
+ * of <code>result</code> concatenated to a substring of <code>text</code>.
+ * The substring is specified by <code>start</code> and <code>limit</code>.
+ * The value of <code>cursor</code> is an index into this virtual buffer,
+ * from 0 to the length of the buffer. In terms of the parameters,
+ * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+ * start</code>.
+ * @param text the untranslated text
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result tranlated text
+ * @param cursor position at which to translate next, an offset into result.
+ * If greater than or equal to result.length(), represents offset start +
+ * cursor - result.length() into text.
+ * @param variables a dictionary mapping variables to the sets they
+ * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return the matching rule, or null if none found.
+
+ */
+ public TransliterationRule findMatch(String text, int start, int limit,
+ StringBuffer result, int cursor,
+ Dictionary variables,
+ UnicodeFilter filter) {
+ /* We only need to check our indexed bin of the rule table,
+ * based on the low byte of the first key character.
+ */
+ int rlen = result.length();
+ int x = 0xFF & (cursor < rlen ? result.charAt(cursor)
+ : text.charAt(cursor - rlen + start));
+ for (int i=index[x]; i<index[x+1]; ++i) {
+ if (rules[i].matches(text, start, limit, result, cursor, variables, filter)) {
+ return rules[i];
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Attempt to find a matching rule at the specified point in the text.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text. This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param variables a dictionary mapping variables to the sets they
+ * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return the matching rule, or null if none found.
+ */
+ public TransliterationRule findMatch(Replaceable text, int start, int limit,
+ int cursor,
+ Dictionary variables,
+ UnicodeFilter filter) {
+ /* We only need to check our indexed bin of the rule table,
+ * based on the low byte of the first key character.
+ */
+ int x = text.charAt(cursor) & 0xFF;
+ for (int i=index[x]; i<index[x+1]; ++i) {
+ if (rules[i].matches(text, start, limit, cursor, variables, filter)) {
+ return rules[i];
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Attempt to find a matching rule at the specified point in the text.
+ * Unlike <code>findMatch()</code>, this method does an incremental match.
+ * An incremental match requires that there be no partial matches that might
+ * pre-empt the full match that is found. If there are partial matches,
+ * then null is returned. A non-null result indicates that a full match has
+ * been found, and that it cannot be pre-empted by a partial match
+ * regardless of what additional text is added to the translation buffer.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text. This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param variables a dictionary mapping variables to the sets they
+ * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+ * @param partial output parameter. <code>partial[0]</code> is set to
+ * true if a partial match is returned.
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return the matching rule, or null if none found, or if the text buffer
+ * does not have enough text yet to unambiguously match a rule.
+ */
+ public TransliterationRule findIncrementalMatch(Replaceable text, int start,
+ int limit, int cursor,
+ Dictionary variables,
+ boolean partial[],
+ UnicodeFilter filter) {
+ /* We only need to check our indexed bin of the rule table,
+ * based on the low byte of the first key character.
+ */
+ partial[0] = false;
+ int x = text.charAt(cursor) & 0xFF;
+ for (int i=index[x]; i<index[x+1]; ++i) {
+ int match = rules[i].getMatchDegree(text, start, limit, cursor,
+ variables, filter);
+ switch (match) {
+ case TransliterationRule.FULL_MATCH:
+ return rules[i];
+ case TransliterationRule.PARTIAL_MATCH:
+ partial[0] = true;
+ return null;
+ }
+ }
+ return null;
+ }
+}
diff --git a/src/com/ibm/text/Transliterator.java b/src/com/ibm/text/Transliterator.java
new file mode 100755
index 0000000..ef159ce
--- /dev/null
+++ b/src/com/ibm/text/Transliterator.java
@@ -0,0 +1,891 @@
+package com.ibm.text;
+
+import java.util.*;
+import java.text.MessageFormat;
+
+/**
+ * <code>Transliterator</code> is an abstract class that
+ * transliterates text from one format to another. The most common
+ * kind of transliterator is a script, or alphabet, transliterator.
+ * For example, a Russian to Latin transliterator changes Russian text
+ * written in Cyrillic characters to phonetically equivalent Latin
+ * characters. It does not <em>translate</em> Russian to English!
+ * Transliteration, unlike translation, operates on characters, without
+ * reference to the meanings of words and sentences.
+ *
+ * <p>Although script conversion is its most common use, a
+ * transliterator can actually perform a more general class of tasks.
+ * In fact, <code>Transliterator</code> defines a very general API
+ * which specifies only that a segment of the input text is replaced
+ * by new text. The particulars of this conversion are determined
+ * entirely by subclasses of <code>Transliterator</code>.
+ *
+ * <p><b>Transliterators are stateless</b>
+ *
+ * <p><code>Transliterator</code> objects are <em>stateless</em>; they
+ * retain no information between calls to
+ * <code>transliterate()</code>. As a result, threads may share
+ * transliterators without synchronizing them. This might seem to
+ * limit the complexity of the transliteration operation. In
+ * practice, subclasses perform complex transliterations by delaying
+ * the replacement of text until it is known that no other
+ * replacements are possible. In other words, although the
+ * <code>Transliterator</code> objects are stateless, the source text
+ * itself embodies all the needed information, and delayed operation
+ * allows arbitrary complexity.
+ *
+ * <p><b>Batch transliteration</b>
+ *
+ * <p>The simplest way to perform transliteration is all at once, on a
+ * string of existing text. This is referred to as <em>batch</em>
+ * transliteration. For example, given a string <code>input</code>
+ * and a transliterator <code>t</code>, the call
+ *
+ * <blockquote><code>String result = t.transliterate(input);
+ * </code></blockquote>
+ *
+ * will transliterate it and return the result. Other methods allow
+ * the client to specify a substring to be transliterated and to use
+ * {@link Replaceable} objects instead of strings, in order to
+ * preserve out-of-band information (such as text styles).
+ *
+ * <p><b>Keyboard transliteration</b>
+ *
+ * <p>Somewhat more involved is <em>keyboard</em>, or incremental
+ * transliteration. This is the transliteration of text that is
+ * arriving from some source (typically the user's keyboard) one
+ * character at a time, or in some other piecemeal fashion.
+ *
+ * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
+ * stores the text. As text is inserted, as much as possible is
+ * transliterated on the fly. This means a GUI that displays the
+ * contents of the buffer may show text being modified as each new
+ * character arrives.
+ *
+ * <p>Consider the simple <code>RuleBasedTransliterator</code>:
+ *
+ * <blockquote><code>
+ * th>{theta}<br>
+ * t>{tau}
+ * </code></blockquote>
+ *
+ * When the user types 't', nothing will happen, since the
+ * transliterator is waiting to see if the next character is 'h'. To
+ * remedy this, we introduce the notion of a cursor, marked by a '|'
+ * in the output string:
+ *
+ * <blockquote><code>
+ * t>|{tau}<br>
+ * {tau}h>{theta}
+ * </code></blockquote>
+ *
+ * Now when the user types 't', tau appears, and if the next character
+ * is 'h', the tau changes to a theta. This is accomplished by
+ * maintaining a cursor position (independent of the insertion point,
+ * and invisible in the GUI) across calls to
+ * <code>keyboardTransliterate()</code>. Typically, the cursor will
+ * be coincident with the insertion point, but in a case like the one
+ * above, it will precede the insertion point.
+ *
+ * <p>Keyboard transliteration methods maintain a set of three indices
+ * that are updated with each call to
+ * <code>keyboardTransliterate()</code>, including the cursor, start,
+ * and limit. Since these indices are changed by the method, they are
+ * passed in an <code>int[]</code> array. The <code>START</code> index
+ * marks the beginning of the substring that the transliterator will
+ * look at. It is advanced as text becomes committed (but it is not
+ * the committed index; that's the <code>CURSOR</code>). The
+ * <code>CURSOR</code> index, described above, marks the point at
+ * which the transliterator last stopped, either because it reached
+ * the end, or because it required more characters to disambiguate
+ * between possible inputs. The <code>CURSOR</code> can also be
+ * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
+ * Any characters before the <code>CURSOR</code> index are frozen;
+ * future keyboard transliteration calls within this input sequence
+ * will not change them. New text is inserted at the
+ * <code>LIMIT</code> index, which marks the end of the substring that
+ * the transliterator looks at.
+ *
+ * <p>Because keyboard transliteration assumes that more characters
+ * are to arrive, it is conservative in its operation. It only
+ * transliterates when it can do so unambiguously. Otherwise it waits
+ * for more characters to arrive. When the client code knows that no
+ * more characters are forthcoming, perhaps because the user has
+ * performed some input termination operation, then it should call
+ * <code>finishKeyboardTransliteration()</code> to complete any
+ * pending transliterations.
+ *
+ * <p><b>Inverses</b>
+ *
+ * <p>Pairs of transliterators may be inverses of one another. For
+ * example, if transliterator <b>A</b> transliterates characters by
+ * incrementing their Unicode value (so "abc" -> "def"), and
+ * transliterator <b>B</b> decrements character values, then <b>A</b>
+ * is an inverse of <b>B</b> and vice versa. If we compose <b>A</b>
+ * with <b>B</b> in a compound transliterator, the result is the
+ * indentity transliterator, that is, a transliterator that does not
+ * change its input text.
+ *
+ * The <code>Transliterator</code> method <code>getInverse()</code>
+ * returns a transliterator's inverse, if one exists, or
+ * <code>null</code> otherwise. However, the result of
+ * <code>getInverse()</code> usually will <em>not</em> be a true
+ * mathematical inverse. This is because true inverse transliterators
+ * are difficult to formulate. For example, consider two
+ * transliterators: <b>AB</b>, which transliterates the character 'A'
+ * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'. It might
+ * seem that these are exact inverses, since
+ *
+ * <blockquote>"A" x <b>AB</b> -> "B"<br>
+ * "B" x <b>BA</b> -> "A"</blockquote>
+ *
+ * where 'x' represents transliteration. However,
+ *
+ * <blockquote>"ABCD" x <b>AB</b> -> "BBCD"<br>
+ * "BBCD" x <b>BA</b> -> "AACD"</blockquote>
+ *
+ * so <b>AB</b> composed with <b>BA</b> is not the
+ * identity. Nonetheless, <b>BA</b> may be usefully considered to be
+ * <b>AB</b>'s inverse, and it is on this basis that
+ * <b>AB</b><code>.getInverse()</code> could legitimately return
+ * <b>BA</b>.
+ *
+ * <p><b>IDs and display names</b>
+ *
+ * <p>A transliterator is designated by a short identifier string or
+ * <em>ID</em>. IDs follow the format <em>source-destination</em>,
+ * where <em>source</em> describes the entity being replaced, and
+ * <em>destination</em> describes the entity replacing
+ * <em>source</em>. The entities may be the names of scripts,
+ * particular sequences of characters, or whatever else it is that the
+ * transliterator converts to or from. For example, a transliterator
+ * from Russian to Latin might be named "Russian-Latin". A
+ * transliterator from keyboard escape sequences to Latin-1 characters
+ * might be named "KeyboardEscape-Latin1". By convention, system
+ * entity names are in English, with the initial letters of words
+ * capitalized; user entity names may follow any format so long as
+ * they do not contain dashes.
+ *
+ * <p>In addition to programmatic IDs, transliterator objects have
+ * display names for presentation in user interfaces, returned by
+ * {@link #getDisplayName}.
+ *
+ * <p><b>Factory methods and registration</b>
+ *
+ * <p>In general, client code should use the factory method
+ * <code>getInstance()</code> to obtain an instance of a
+ * transliterator given its ID. Valid IDs may be enumerated using
+ * <code>getAvailableIDs()</code>. Since transliterators are
+ * stateless, multiple calls to <code>getInstance()</code> with the
+ * same ID will return the same object.
+ *
+ * <p>In addition to the system transliterators registered at startup,
+ * user transliterators may be registered by calling
+ * <code>registerInstance()</code> at run time. To register a
+ * transliterator subclass without instantiating it (until it is
+ * needed), users may call <code>registerClass()</code>.
+ *
+ * <p><b>Subclassing</b>
+ *
+ * <p>Subclasses must implement the abstract
+ * <code>transliterate()</code> method. They should also override the
+ * <code>transliterate()</code> method taking a <code>String</code>
+ * and <code>StringBuffer</code> if the performance of these methods
+ * can be improved over the performance obtained by the default
+ * implementations in this class. Subclasses must also implement
+ * <code>handleKeyboardTransliterate()</code>.
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.6 $ $Date: 2000/01/06 17:38:25 $
+ */
+public abstract class Transliterator {
+ /**
+ * In the <code>keyboardTransliterate()</code>
+ * <code>index[]</code> array, the beginning index, inclusive
+ * @see #keyboardTransliterate
+ */
+ public static final int START = 0;
+
+ /**
+ * In the <code>keyboardTransliterate()</code>
+ * <code>index[]</code> array, the ending index, exclusive
+ * @see #keyboardTransliterate
+ */
+ public static final int LIMIT = 1;
+
+ /**
+ * In the <code>keyboardTransliterate()</code>
+ * <code>index[]</code> array, the next character to be considered
+ * for transliteration
+ * @see #keyboardTransliterate
+ */
+ public static final int CURSOR = 2;
+
+ /**
+ * Programmatic name, e.g., "Latin-Arabic".
+ */
+ private String ID;
+
+ /**
+ * This transliterator's filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+ private UnicodeFilter filter;
+
+ /**
+ * Dictionary of known transliterators. Keys are <code>String</code>
+ * names, values are one of the following:
+ *
+ * <ul><li><code>Transliterator</code> objects
+ *
+ * <li><code>Class</code> objects. Such objects must represent
+ * subclasses of <code>Transliterator</code>, and must satisfy the
+ * constraints described in <code>registerClass()</code>
+ *
+ * <li><code>RULE_BASED_PLACEHOLDER</code>, in which case the ID
+ * will have its first '-' removed and be appended to
+ * RB_RULE_BASED_PREFIX to form a resource bundle name from which
+ * the RB_RULE key is looked up to obtain the rule.
+ *
+ * <li><code>REVERSE_RULE_BASED_PLACEHOLDER</code>. Like
+ * <code>RULE_BASED_PLACEHOLDER</code>, except the entity names in
+ * the ID are reversed, and the argument
+ * RuleBasedTransliterator.REVERSE is pased to the
+ * RuleBasedTransliterator constructor.
+ * </ul>
+ */
+ private static Hashtable cache;
+
+ /**
+ * Internal object used to stand for instances of
+ * <code>RuleBasedTransliterator</code> that have not been
+ * constructed yet in the <code>cache</code>. When a
+ * <code>getInstance()</code> call retrieves this object, it is
+ * replaced by the actual <code>RuleBasedTransliterator</code>.
+ * This allows <code>Transliterator</code> to delay instantiation
+ * of such transliterators until they are needed.
+ */
+ private static final Object RULE_BASED_PLACEHOLDER = new Object();
+
+ /**
+ * Internal object used to stand for instances of
+ * <code>RuleBasedTransliterator</code> that have not been
+ * constructed yet in the <code>cache</code>. These instances are
+ * constructed with an argument
+ * <code>RuleBasedTransliterator.REVERSE</code>.
+ */
+ private static final Object REVERSE_RULE_BASED_PLACEHOLDER = new Object();
+
+ /**
+ * Prefix for resource bundle key for the display name for a
+ * transliterator. The ID is appended to this to form the key.
+ * The resource bundle value should be a String.
+ */
+ private static final String RB_DISPLAY_NAME_PREFIX = "%Translit%%";
+
+ /**
+ * Prefix for resource bundle key for the display name for a
+ * transliterator SCRIPT. The ID is appended to this to form the key.
+ * The resource bundle value should be a String.
+ */
+ private static final String RB_SCRIPT_DISPLAY_NAME_PREFIX = "%Translit%";
+
+ /**
+ * Resource bundle key for display name pattern.
+ * The resource bundle value should be a String forming a
+ * MessageFormat pattern, e.g.:
+ * "{0,choice,0#|1#{1} Transliterator|2#{1} to {2} Transliterator}".
+ */
+ private static final String RB_DISPLAY_NAME_PATTERN = "TransliteratorNamePattern";
+
+ /**
+ * Resource bundle key for the list of RuleBasedTransliterator IDs.
+ * The resource bundle value should be a String[] with each element
+ * being a valid ID. The ID will be appended to RB_RULE_BASED_PREFIX
+ * to obtain the class name in which the RB_RULE key will be sought.
+ */
+ private static final String RB_RULE_BASED_IDS = "RuleBasedTransliteratorIDs";
+
+ /**
+ * Resource bundle containing display name keys and the
+ * RB_RULE_BASED_IDS array.
+ *
+ * <p>If we ever integrate this with the Sun JDK, the resource bundle
+ * root will change to java.text.resources.LocaleElements
+ */
+ private static final String RB_LOCALE_ELEMENTS =
+ "com.ibm.text.resources.LocaleElements";
+
+ /**
+ * Prefix for resource bundle containing RuleBasedTransliterator
+ * RB_RULE string. The ID is munged to remove the first '-' then appended
+ * to this String to obtain the class name.
+ */
+ private static final String RB_RULE_BASED_PREFIX =
+ "com.ibm.text.resources.TransliterationRule$";
+
+ /**
+ * Resource bundle key for the RuleBasedTransliterator rule.
+ */
+ private static final String RB_RULE = "Rule";
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Default constructor.
+ * @param ID the string identifier for this transliterator
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+ protected Transliterator(String ID, UnicodeFilter filter) {
+ if (ID == null) {
+ throw new NullPointerException();
+ }
+ this.ID = ID;
+ this.filter = filter;
+ }
+
+ /**
+ * Transliterates the segment of a string that begins at the
+ * character at offset <code>start</code> and extends to the
+ * character at offset <code>limit - 1</code>, with optional
+ * filtering. A default implementaion is provided here;
+ * subclasses should provide a more efficient implementation if
+ * possible.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result buffer to receive the transliterated text; previous
+ * contents are discarded
+ */
+ public void transliterate(String text, int start, int limit,
+ StringBuffer result) {
+ /* This is a default implementation that should be replaced by
+ * a more efficient subclass implementation if possible.
+ */
+ result.setLength(0);
+ result.append(text.substring(start, limit));
+ transliterate(new ReplaceableString(result),
+ 0, result.length());
+ }
+
+ /**
+ * Transliterates a segment of a string, with optional filtering.
+ * Subclasses must override this abstract method.
+ *
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param filter the filter. Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator. If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return The new limit index. The text previously occupying <code>[start,
+ * limit)</code> has been transliterated, possibly to a string of a different
+ * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
+ * <em>new-limit</em> is the return value.
+ */
+ public abstract int transliterate(Replaceable text, int start, int limit);
+
+ /**
+ * Transliterates an entire string. Convenience method.
+ * @param text the string to be transliterated
+ * @param result buffer to receive the transliterated text; previous
+ * contents are discarded
+ */
+ public final void transliterate(String text, StringBuffer result) {
+ transliterate(text, 0, text.length(), result);
+ }
+
+ /**
+ * Transliterate an entire string and returns the result. Convenience method.
+ *
+ * @param text the string to be transliterated
+ * @return The transliterated text
+ */
+ public final String transliterate(String text) {
+ StringBuffer result = new StringBuffer();
+ transliterate(text, 0, text.length(), result);
+ return result.toString();
+ }
+
+ /**
+ * Transliterates an entire string in place. Convenience method.
+ * @param text the string to be transliterated
+ */
+ public final void transliterate(Replaceable text) {
+ transliterate(text, 0, text.length());
+ }
+
+ /**
+ * Transliterates the portion of the text buffer that can be
+ * transliterated unambiguosly after new text has been inserted,
+ * typically as a result of a keyboard event. The new text in
+ * <code>insertion</code> will be inserted into <code>text</code>
+ * at <code>index[LIMIT]</code>, advancing
+ * <code>index[LIMIT]</code> by <code>insertion.length()</code>.
+ * Then the transliterator will try to transliterate characters of
+ * <code>text</code> between <code>index[CURSOR]</code> and
+ * <code>index[LIMIT]</code>. Characters before
+ * <code>index[CURSOR]</code> will not be changed.
+ *
+ * <p>Upon return, values in <code>index[]</code> will be updated.
+ * <code>index[START]</code> will be advanced to the first
+ * character that future calls to this method will read.
+ * <code>index[CURSOR]</code> and <code>index[LIMIT]</code> will
+ * be adjusted to delimit the range of text that future calls to
+ * this method may change.
+ *
+ * <p>Typical usage of this method begins with an initial call
+ * with <code>index[START]</code> and <code>index[LIMIT]</code>
+ * set to indicate the portion of <code>text</code> to be
+ * transliterated, and <code>index[CURSOR] == index[START]</code>.
+ * Thereafter, <code>index[]</code> can be used without
+ * modification in future calls, provided that all changes to
+ * <code>text</code> are made via this method.
+ *
+ * <p>This method assumes that future calls may be made that will
+ * insert new text into the buffer. As a result, it only performs
+ * unambiguous transliterations. After the last call to this
+ * method, there may be untransliterated text that is waiting for
+ * more input to resolve an ambiguity. In order to perform these
+ * pending transliterations, clients should call {@link
+ * #finishKeyboardTransliteration} after the last call to this
+ * method has been made.
+ *
+ * @param text the buffer holding transliterated and untransliterated text
+ * @param index an array of three integers.
+ *
+ * <ul><li><code>index[START]</code>: the beginning index,
+ * inclusive; <code>0 <= index[START] <= index[LIMIT]</code>.
+ *
+ * <li><code>index[LIMIT]</code>: the ending index, exclusive;
+ * <code>index[START] <= index[LIMIT] <= text.length()</code>.
+ * <code>insertion</code> is inserted at
+ * <code>index[LIMIT]</code>.
+ *
+ * <li><code>index[CURSOR]</code>: the next character to be
+ * considered for transliteration; <code>index[START] <=
+ * index[CURSOR] <= index[LIMIT]</code>. Characters before
+ * <code>index[CURSOR]</code> will not be changed by future calls
+ * to this method.</ul>
+ *
+ * @param insertion text to be inserted and possibly
+ * transliterated into the translation buffer at
+ * <code>index[LIMIT]</code>. If <code>null</code> then no text
+ * is inserted.
+ * @see #START
+ * @see #LIMIT
+ * @see #CURSOR
+ * @see #handleKeyboardTransliterate
+ * @exception IllegalArgumentException if <code>index[]</code>
+ * is invalid
+ */
+ public final void keyboardTransliterate(Replaceable text, int[] index,
+ String insertion) {
+ if (index.length < 3 ||
+ index[START] < 0 ||
+ index[LIMIT] > text.length() ||
+ index[CURSOR] < index[START] ||
+ index[CURSOR] > index[LIMIT]) {
+ throw new IllegalArgumentException("Invalid index array");
+ }
+
+ int originalStart = index[START];
+ if (insertion != null) {
+ text.replace(index[LIMIT], index[LIMIT], insertion);
+ index[LIMIT] += insertion.length();
+ }
+
+ handleKeyboardTransliterate(text, index);
+
+ index[START] = Math.max(index[CURSOR] - getMaximumContextLength(),
+ originalStart);
+ }
+
+ /**
+ * Transliterates the portion of the text buffer that can be
+ * transliterated unambiguosly after a new character has been
+ * inserted, typically as a result of a keyboard event. This is a
+ * convenience method; see {@link
+ * #keyboardTransliterate(Replaceable, int[], String)} for details.
+ * @param text the buffer holding transliterated and
+ * untransliterated text
+ * @param index an array of three integers. See {@link
+ * #keyboardTransliterate(Replaceable, int[], String)}.
+ * @param insertion text to be inserted and possibly
+ * transliterated into the translation buffer at
+ * <code>index[LIMIT]</code>.
+ * @see #keyboardTransliterate(Replaceable, int[], String)
+ */
+ public final void keyboardTransliterate(Replaceable text, int[] index,
+ char insertion) {
+ keyboardTransliterate(text, index, String.valueOf(insertion));
+ }
+
+ /**
+ * Transliterates the portion of the text buffer that can be
+ * transliterated unambiguosly. This is a convenience method; see
+ * {@link #keyboardTransliterate(Replaceable, int[], String)} for
+ * details.
+ * @param text the buffer holding transliterated and
+ * untransliterated text
+ * @param index an array of three integers. See {@link
+ * #keyboardTransliterate(Replaceable, int[], String)}.
+ * @see #keyboardTransliterate(Replaceable, int[], String)
+ */
+ public final void keyboardTransliterate(Replaceable text, int[] index) {
+ keyboardTransliterate(text, index, null);
+ }
+
+ /**
+ * Finishes any pending transliterations that were waiting for
+ * more characters. Clients should call this method as the last
+ * call after a sequence of one or more calls to
+ * <code>keyboardTransliterate()</code>.
+ * @param text the buffer holding transliterated and
+ * untransliterated text.
+ * @param index the array of indices previously passed to {@link
+ * #keyboardTransliterate}
+ */
+ public final void finishKeyboardTransliteration(Replaceable text,
+ int[] index) {
+ transliterate(text, index[START], index[LIMIT]);
+ }
+
+ /**
+ * Abstract method that concrete subclasses define to implement
+ * keyboard transliteration. This method should transliterate all
+ * characters between <code>index[CURSOR]</code> and
+ * <code>index[LIMIT]</code> that can be unambiguously
+ * transliterated, regardless of future insertions of text at
+ * <code>index[LIMIT]</code>. <code>index[CURSOR]</code> should
+ * be advanced past committed characters (those that will not
+ * change in future calls to this method).
+ * <code>index[LIMIT]</code> should be updated to reflect text
+ * replacements that shorten or lengthen the text between
+ * <code>index[CURSOR]</code> and <code>index[LIMIT]</code>. Upon
+ * return, neither <code>index[CURSOR]</code> nor
+ * <code>index[LIMIT]</code> should be less than the initial value
+ * of <code>index[CURSOR]</code>. <code>index[START]</code>
+ * should <em>not</em> be changed.
+ *
+ * @param text the buffer holding transliterated and
+ * untransliterated text
+ * @param index an array of three integers. See {@link
+ * #keyboardTransliterate(Replaceable, int[], String)}.
+ * @see #keyboardTransliterate
+ */
+ protected abstract void handleKeyboardTransliterate(Replaceable text,
+ int[] index);
+
+ /**
+ * Returns the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context. The default implementation supplied
+ * by <code>Transliterator</code> returns zero; subclasses
+ * that use preceding context should override this method to return the
+ * correct value. For example, if a transliterator translates "ddd" (where
+ * d is any digit) to "555" when preceded by "(ddd)", then the preceding
+ * context length is 5, the length of "(ddd)".
+ *
+ * @return The maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+ protected int getMaximumContextLength() {
+ return 0;
+ }
+
+ /**
+ * Returns a programmatic identifier for this transliterator.
+ * If this identifier is passed to <code>getInstance()</code>, it
+ * will return this object, if it has been registered.
+ * @see #registerInstance
+ * @see #registerClass
+ * @see #getAvailableIDs
+ */
+ public final String getID() {
+ return ID;
+ }
+
+ /**
+ * Returns a name for this transliterator that is appropriate for
+ * display to the user in the default locale. See {@link
+ * #getDisplayName(Locale)} for details.
+ */
+ public final static String getDisplayName(String ID) {
+ return getDisplayName(ID, Locale.getDefault());
+ }
+
+ /**
+ * Returns a name for this transliterator that is appropriate for
+ * display to the user in the given locale. This name is taken
+ * from the locale resource data in the standard manner of the
+ * <code>java.text</code> package.
+ *
+ * <p>If no localized names exist in the system resource bundles,
+ * a name is synthesized using a localized
+ * <code>MessageFormat</code> pattern from the resource data. The
+ * arguments to this pattern are an integer followed by one or two
+ * strings. The integer is the number of strings, either 1 or 2.
+ * The strings are formed by splitting the ID for this
+ * transliterator at the first '-'. If there is no '-', then the
+ * entire ID forms the only string.
+ * @param inLocale the Locale in which the display name should be
+ * localized.
+ * @see java.text.MessageFormat
+ */
+ public static String getDisplayName(String ID, Locale inLocale) {
+ ResourceBundle bundle = ResourceBundle.getBundle(
+ RB_LOCALE_ELEMENTS, inLocale);
+
+ // Use display name for the entire transliterator, if it
+ // exists.
+ try {
+ return bundle.getString(RB_DISPLAY_NAME_PREFIX + ID);
+ } catch (MissingResourceException e) {}
+
+ try {
+ // Construct the formatter first; if getString() fails
+ // we'll exit the try block
+ MessageFormat format = new MessageFormat(
+ bundle.getString(RB_DISPLAY_NAME_PATTERN));
+ // Construct the argument array
+ int i = ID.indexOf('-');
+ Object[] args = (i < 0)
+ ? new Object[] { new Integer(1), ID }
+ : new Object[] { new Integer(2), ID.substring(0, i),
+ ID.substring(i+1) };
+
+ // Use display names for the scripts, if they exist
+ for (int j=1; j<=((i<0)?1:2); ++j) {
+ try {
+ args[j] = bundle.getString(RB_SCRIPT_DISPLAY_NAME_PREFIX +
+ (String) args[j]);
+ } catch (MissingResourceException e) {}
+ }
+
+ // Format it using the pattern in the resource
+ return format.format(args);
+ } catch (MissingResourceException e2) {}
+
+ // We should not reach this point unless there is something
+ // wrong with the build or the RB_DISPLAY_NAME_PATTERN has
+ // been deleted from the root RB_LOCALE_ELEMENTS resource.
+ throw new RuntimeException();
+ }
+
+ /**
+ * Returns the filter used by this transliterator, or <tt>null</tt>
+ * if this transliterator uses no filter.
+ */
+ public UnicodeFilter getFilter() {
+ return filter;
+ }
+
+ /**
+ * Changes the filter used by this transliterator. If the filter
+ * is set to <tt>null</tt> then no filtering will occur.
+ *
+ * <p>Callers must take care if a transliterator is in use by
+ * multiple threads. The filter should not be changed by one
+ * thread while another thread may be transliterating.
+ */
+ public void setFilter(UnicodeFilter filter) {
+ this.filter = filter;
+ }
+
+ /**
+ * Returns this transliterator's inverse. See the class
+ * documentation for details. This implementation simply inverts
+ * the two entities in the ID and attempts to retrieve the
+ * resulting transliterator. That is, if <code>getID()</code>
+ * returns "A-B", then this method will return the result of
+ * <code>getInstance("B-A")</code>, or <code>null</code> if that
+ * call fails.
+ *
+ * <p>This method does not take filtering into account. The
+ * returned transliterator will have no filter.
+ *
+ * <p>Subclasses with knowledge of their inverse may wish to
+ * override this method.
+ *
+ * @return a transliterator that is an inverse, not necessarily
+ * exact, of this transliterator, or <code>null</code> if no such
+ * transliterator is registered.
+ * @see #registerInstance
+ */
+ public Transliterator getInverse() {
+ int i = ID.indexOf('-');
+ if (i >= 0) {
+ String inverseID = ID.substring(i+1) + '-' + ID.substring(0, i);
+ return internalGetInstance(inverseID);
+ }
+ return null;
+ }
+
+ /**
+ * Returns a <code>Transliterator</code> object given its ID.
+ * The ID must be either a system transliterator ID or a ID registered
+ * using <code>registerInstance()</code>.
+ *
+ * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
+ * @return A <code>Transliterator</code> object with the given ID
+ * @exception IllegalArgumentException if the given ID is invalid.
+ * @see #registerInstance
+ * @see #getAvailableIDs
+ * @see #getID
+ */
+ public static Transliterator getInstance(String ID) {
+ Transliterator t = internalGetInstance(ID);
+ if (t != null) {
+ return t;
+ }
+ throw new IllegalArgumentException("Unsupported transliterator: "
+ + ID);
+ }
+
+ /**
+ * Returns a transliterator object given its ID. Unlike getInstance(),
+ * this method returns null if it cannot make use of the given ID.
+ */
+ private static Transliterator internalGetInstance(String ID) {
+ Object obj = cache.get(ID);
+ RuleBasedTransliterator.Data data = null;
+
+ if (obj instanceof RuleBasedTransliterator.Data) {
+ data = (RuleBasedTransliterator.Data) obj;
+ // Fall through to construct transliterator from cached Data object.
+ } else if (obj instanceof Class) {
+ try {
+ return (Transliterator) ((Class) obj).newInstance();
+ } catch (InstantiationException e) {
+ } catch (IllegalAccessException e2) {}
+ } else {
+ synchronized (cache) {
+ boolean isReverse = (obj == REVERSE_RULE_BASED_PLACEHOLDER);
+ String resourceName = RB_RULE_BASED_PREFIX;
+ int i = ID.indexOf('-');
+ if (i < 0) {
+ resourceName += ID;
+ } else {
+ String IDLeft = ID.substring(0, i);
+ String IDRight = ID.substring(i+1);
+ resourceName += isReverse ? (IDRight + '$' + IDLeft)
+ : (IDLeft + '$' + IDRight);
+ }
+ try {
+ ResourceBundle resource = ResourceBundle.getBundle(resourceName);
+
+ // We allow the resource bundle to contain either an array
+ // of rules, or a single rule string.
+ String[] ruleArray;
+ try {
+ ruleArray = resource.getStringArray(RB_RULE);
+ } catch (Exception e) {
+ // This is a ClassCastException under JDK 1.1.8
+ ruleArray = new String[] { resource.getString(RB_RULE) };
+ }
+
+ data = RuleBasedTransliterator.parse(ruleArray,
+ isReverse
+ ? RuleBasedTransliterator.REVERSE
+ : RuleBasedTransliterator.FORWARD);
+
+ cache.put(ID, data);
+ // Fall through to construct transliterator from Data object.
+ } catch (MissingResourceException e) {}
+ }
+ }
+
+ if (data != null) {
+ return new RuleBasedTransliterator(ID, data, null);
+ }
+
+ return null;
+ }
+
+ /**
+ * Registers a subclass of <code>Transliterator</code> with the
+ * system. This subclass must have a public constructor taking no
+ * arguments. When that constructor is called, the resulting
+ * object must return the <code>ID</code> passed to this method if
+ * its <code>getID()</code> method is called.
+ *
+ * @param ID the result of <code>getID()</code> for this
+ * transliterator
+ * @param transClass a subclass of <code>Transliterator</code>
+ * @see #registerInstance
+ * @see #unregister
+ */
+ public static void registerClass(String ID, Class transClass) {
+ cache.put(ID, transClass);
+ }
+
+ /**
+ * Unregisters a transliterator or class. This may be either
+ * a system transliterator or a user transliterator or class.
+ *
+ * @param ID the ID of the transliterator or class
+ * @return the <code>Object</code> that was registered with
+ * <code>ID</code>, or <code>null</code> if none was
+ * @see #registerInstance
+ * @see #registerClass
+ */
+ public static Object unregister(String ID) {
+ return cache.remove(ID);
+ }
+
+ /**
+ * Returns an enumeration over the programmatic names of registered
+ * <code>Transliterator</code> objects. This includes both system
+ * transliterators and user transliterators registered using
+ * <code>registerInstance()</code>. The enumerated names may be
+ * passed to <code>getInstance()</code>.
+ *
+ * @return An <code>Enumeration</code> over <code>String</code> objects
+ * @see #getInstance
+ * @see #registerInstance
+ */
+ public static final Enumeration getAvailableIDs() {
+ return cache.keys();
+ }
+
+ static {
+ ResourceBundle bundle = ResourceBundle.getBundle(RB_LOCALE_ELEMENTS);
+
+ try {
+ String[] ruleBasedIDs = bundle.getStringArray(RB_RULE_BASED_IDS);
+
+ cache = new Hashtable();
+
+ for (int i=0; i<ruleBasedIDs.length; ++i) {
+ String ID = ruleBasedIDs[i];
+ boolean isReverse = (ID.charAt(0) == '*');
+ if (isReverse) {
+ ID = ID.substring(1);
+ }
+ cache.put(ID, isReverse ? REVERSE_RULE_BASED_PLACEHOLDER
+ : RULE_BASED_PLACEHOLDER);
+ }
+ } catch (MissingResourceException e) {}
+
+ // Register non-rule-based transliterators
+ registerClass(HexToUnicodeTransliterator._ID,
+ HexToUnicodeTransliterator.class);
+ registerClass(UnicodeToHexTransliterator._ID,
+ UnicodeToHexTransliterator.class);
+ registerClass(NullTransliterator._ID,
+ NullTransliterator.class);
+ }
+}
diff --git a/src/com/ibm/text/UnicodeFilter.java b/src/com/ibm/text/UnicodeFilter.java
new file mode 100755
index 0000000..3753883
--- /dev/null
+++ b/src/com/ibm/text/UnicodeFilter.java
@@ -0,0 +1,22 @@
+package com.ibm.text;
+
+/**
+ * <code>UnicodeFilter</code> defines a protocol for selecting a
+ * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
+ * Currently, filters are used in conjunction with classes like {@link
+ * Transliterator} to only process selected characters through a
+ * transformation.
+ *
+ * {@link UnicodeFilterLogic}
+ */
+
+public interface UnicodeFilter {
+
+ /**
+ * Returns <tt>true</tt> for characters that are in the selected
+ * subset. In other words, if a character is <b>to be
+ * filtered</b>, then <tt>isIn()</tt> returns
+ * <b><tt>false</tt></b>.
+ */
+ public boolean isIn(char c);
+}
diff --git a/src/com/ibm/text/UnicodeFilterLogic.java b/src/com/ibm/text/UnicodeFilterLogic.java
new file mode 100755
index 0000000..f9e6ec1
--- /dev/null
+++ b/src/com/ibm/text/UnicodeFilterLogic.java
@@ -0,0 +1,112 @@
+package com.ibm.text;
+
+/**
+ * <code>UnicodeFilterLogic</code> provides logical operators on
+ * {@link UnicodeFilter} objects. This class cannot be instantiated;
+ * it consists only of static methods. The static methods return
+ * filter objects that perform logical inversion (<tt>not</tt>),
+ * intersection (<tt>and</tt>), or union (<tt>or</tt>) of the given
+ * filter objects.
+ */
+public final class UnicodeFilterLogic {
+
+ /**
+ * Returns a <tt>UnicodeFilter</tt> that implements the inverse of
+ * the given filter.
+ */
+ public static UnicodeFilter not(final UnicodeFilter f) {
+ return new UnicodeFilter() {
+ public boolean isIn(char c) {
+ return !f.isIn(c);
+ }
+ };
+ }
+
+ /**
+ * Returns a <tt>UnicodeFilter</tt> that implements a short
+ * circuit AND of the result of the two given filters. That is,
+ * if <tt>f.isIn()</tt> is <tt>false</tt>, then <tt>g.isIn()</tt>
+ * is not called, and <tt>isIn()</tt> returns <tt>false</tt>.
+ *
+ * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+ */
+ public static UnicodeFilter and(final UnicodeFilter f,
+ final UnicodeFilter g) {
+ if (f == null) {
+ return g;
+ }
+ if (g == null) {
+ return f;
+ }
+ return new UnicodeFilter() {
+ public boolean isIn(char c) {
+ return f.isIn(c) && g.isIn(c);
+ }
+ };
+ }
+
+ /**
+ * Returns a <tt>UnicodeFilter</tt> that implements a short
+ * circuit AND of the result of the given filters. That is, if
+ * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+ * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+ * <tt>isIn()</tt> returns <tt>false</tt>.
+ */
+ public static UnicodeFilter and(final UnicodeFilter[] f) {
+ return new UnicodeFilter() {
+ public boolean isIn(char c) {
+ for (int i=0; i<f.length; ++i) {
+ if (!f[i].isIn(c)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ };
+ }
+
+ /**
+ * Returns a <tt>UnicodeFilter</tt> that implements a short
+ * circuit OR of the result of the two given filters. That is, if
+ * <tt>f.isIn()</tt> is <tt>true</tt>, then <tt>g.isIn()</tt> is
+ * not called, and <tt>isIn()</tt> returns <tt>true</tt>.
+ *
+ * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+ */
+ public static UnicodeFilter or(final UnicodeFilter f,
+ final UnicodeFilter g) {
+ if (f == null) {
+ return g;
+ }
+ if (g == null) {
+ return f;
+ }
+ return new UnicodeFilter() {
+ public boolean isIn(char c) {
+ return f.isIn(c) || g.isIn(c);
+ }
+ };
+ }
+
+ /**
+ * Returns a <tt>UnicodeFilter</tt> that implements a short
+ * circuit OR of the result of the given filters. That is, if
+ * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+ * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+ * <tt>isIn()</tt> returns <tt>true</tt>.
+ */
+ public static UnicodeFilter or(final UnicodeFilter[] f) {
+ return new UnicodeFilter() {
+ public boolean isIn(char c) {
+ for (int i=0; i<f.length; ++i) {
+ if (f[i].isIn(c)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+ }
+
+ // TODO: Add nand() & nor() for convenience, if needed.
+}
diff --git a/src/com/ibm/text/UnicodeSet.java b/src/com/ibm/text/UnicodeSet.java
new file mode 100755
index 0000000..c2d0bf5
--- /dev/null
+++ b/src/com/ibm/text/UnicodeSet.java
@@ -0,0 +1,1384 @@
+package com.ibm.text;
+
+import java.text.*;
+import java.util.Dictionary;
+
+/**
+ * A mutable set of Unicode characters. Objects of this class
+ * represent <em>character classes</em> used in regular expressions.
+ * Such classes specify a subset of the set of all Unicode characters,
+ * which in this implementation is the characters from U+0000 to
+ * U+FFFF, ignoring surrogates.
+ *
+ * <p>This class supports two APIs. The first is modeled after Java 2's
+ * <code>java.util.Set</code> interface, although this class does not
+ * implement that interface. All methods of <code>Set</code> are
+ * supported, with the modification that they take a character range
+ * or single character instead of an <code>Object</code>, and they
+ * take a <code>UnicodeSet</code> instead of a <code>Collection</code>.
+ *
+ * <p>The second API is the
+ * <code>applyPattern()</code>/<code>toPattern()</code> API from the
+ * <code>java.text.Format</code>-derived classes. Unlike the
+ * methods that add characters, add categories, and control the logic
+ * of the set, the method <code>applyPattern()</code> sets all
+ * attributes of a <code>UnicodeSet</code> at once, based on a
+ * string pattern.
+ *
+ * <p>In addition, the set complement operation is supported through
+ * the <code>complement()</code> method.
+ *
+ * <p><b>Pattern syntax</b></p>
+ *
+ * Patterns are accepted by the constructors and the
+ * <code>applyPattern()</code> methods and returned by the
+ * <code>toPattern()</code> method. These patterns follow a syntax
+ * similar to that employed by version 8 regular expression character
+ * classes:
+ *
+ * <blockquote>
+ * <table>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>pattern := </code></td>
+ * <td valign="top"><code>('[' '^'? item* ']') |
+ * ('[:' '^'? category ':]')</code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>item := </code></td>
+ * <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>pattern-expr := </code></td>
+ * <td valign="top"><code>pattern | pattern-expr pattern |
+ * pattern-expr op pattern<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>op := </code></td>
+ * <td valign="top"><code>'&' | '-'<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>special := </code></td>
+ * <td valign="top"><code>'[' | ']' | '-'<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>char := </code></td>
+ * <td valign="top"><em>any character that is not</em><code> special<br>
+ * | ('\u005C' </code><em>any character</em><code>)<br>
+ * | ('\u005Cu' hex hex hex hex)<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>hex := </code></td>
+ * <td valign="top"><em>any character for which
+ * </em><code>Character.digit(c, 16)</code><em>
+ * returns a non-negative result</em></td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="right"><code>category := </code></td>
+ * <td valign="top"><code>'M' | 'N' | 'Z' | 'C' | 'L' | 'P' |
+ * 'S' | 'Mn' | 'Mc' | 'Me' | 'Nd' | 'Nl' | 'No' | 'Zs' | 'Zl' |
+ * 'Zp' | 'Cc' | 'Cf' | 'Cs' | 'Co' | 'Cn' | 'Lu' | 'Ll' | 'Lt'
+ * | 'Lm' | 'Lo' | 'Pc' | 'Pd' | 'Ps' | 'Pe' | 'Po' | 'Sm' |
+ * 'Sc' | 'Sk' | 'So'</code></td>
+ * </tr>
+ * </table>
+ * <br>
+ * <table border="1">
+ * <tr>
+ * <td>Legend: <table>
+ * <tr>
+ * <td nowrap valign="top"><code>a := b</code></td>
+ * <td width="20" valign="top"> </td>
+ * <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>a?</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">zero or one instance of <code>a</code><br>
+ * </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>a*</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">one or more instances of <code>a</code><br>
+ * </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>a | b</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">either <code>a</code> or <code>b</code><br>
+ * </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>'a'</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">the literal string between the quotes </td>
+ * </tr>
+ * </table>
+ * </td>
+ * </tr>
+ * </table>
+ * </blockquote>
+ *
+ * Any character may be preceded by a backslash in order to remove any special
+ * meaning. White space characters, as defined by Character.isWhitespace(), are
+ * ignored, unless they are escaped.
+ *
+ * Patterns specify individual characters, ranges of characters, and
+ * Unicode character categories. When elements are concatenated, they
+ * specify their union. To complement a set, place a '^' immediately
+ * after the opening '[' or '[:'. In any other location, '^' has no
+ * special meaning.
+ *
+ * <p>Ranges are indicated by placing two a '-' between two
+ * characters, as in "a-z". This specifies the range of all
+ * characters from the left to the right, in Unicode order. If the
+ * left and right characters are the same, then the range consists of
+ * just that character. If the left character is greater than the
+ * right character it is a syntax error. If a '-' occurs as the first
+ * character after the opening '[' or '[^', or if it occurs as the
+ * last character before the closing ']', then it is taken as a
+ * literal. Thus "[a\u005C-b]", "[-ab]", and "[ab-]" all indicate the same
+ * set of three characters, 'a', 'b', and '-'.
+ *
+ * <p>Sets may be intersected using the '&' operator or the asymmetric
+ * set difference may be taken using the '-' operator, for example,
+ * "[[:L:]&[\u005Cu0000-\u005Cu0FFF]]" indicates the set of all Unicode letters
+ * with values less than 4096. Operators ('&' and '|') have equal
+ * precedence and bind left-to-right. Thus
+ * "[[:L:]-[a-z]-[\u005Cu0100-\u005Cu01FF]]" is equivalent to
+ * "[[[:L:]-[a-z]]-[\u005Cu0100-\u005Cu01FF]]". This only really matters for
+ * difference; intersection is commutative.
+ *
+ * <table>
+ * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
+ * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
+ * through 'z' and all letters in between, in Unicode order
+ * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
+ * all characters but 'a' through 'z',
+ * that is, U+0000 through 'a'-1 and 'z'+1 through U+FFFF
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
+ * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
+ * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
+ * <td>The asymmetric difference of sets specified by <em>pat1</em> and
+ * <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[:Lu:]</code>
+ * <td>The set of characters belonging to the given
+ * Unicode category, as defined by <code>Character.getType()</code>; in
+ * this case, Unicode uppercase letters
+ * <tr valign=top><td nowrap><code>[:L:]</code>
+ * <td>The set of characters belonging to all Unicode categories
+ * starting wih 'L', that is, <code>[[:Lu:][:Ll:][:Lt:][:Lm:][:Lo:]]</code>.
+ * </table>
+ *
+ * <p><b>Character categories.</b>
+ *
+ * Character categories are specified using the POSIX-like syntax
+ * '[:Lu:]'. The complement of a category is specified by inserting
+ * '^' after the opening '[:'. The following category names are
+ * recognized. Actual determination of category data uses
+ * <code>Character.getType()</code>, so it reflects the underlying
+ * implmementation used by <code>Character</code>. As of Java 2 and
+ * JDK 1.1.8, this is Unicode 2.1.2.
+ *
+ * <pre>
+ * Normative
+ * Mn = Mark, Non-Spacing
+ * Mc = Mark, Spacing Combining
+ * Me = Mark, Enclosing
+ *
+ * Nd = Number, Decimal Digit
+ * Nl = Number, Letter
+ * No = Number, Other
+ *
+ * Zs = Separator, Space
+ * Zl = Separator, Line
+ * Zp = Separator, Paragraph
+ *
+ * Cc = Other, Control
+ * Cf = Other, Format
+ * Cs = Other, Surrogate
+ * Co = Other, Private Use
+ * Cn = Other, Not Assigned
+ *
+ * Informative
+ * Lu = Letter, Uppercase
+ * Ll = Letter, Lowercase
+ * Lt = Letter, Titlecase
+ * Lm = Letter, Modifier
+ * Lo = Letter, Other
+ *
+ * Pc = Punctuation, Connector
+ * Pd = Punctuation, Dash
+ * Ps = Punctuation, Open
+ * Pe = Punctuation, Close
+ * *Pi = Punctuation, Initial quote
+ * *Pf = Punctuation, Final quote
+ * Po = Punctuation, Other
+ *
+ * Sm = Symbol, Math
+ * Sc = Symbol, Currency
+ * Sk = Symbol, Modifier
+ * So = Symbol, Other
+ * </pre>
+ * *Unsupported by Java (and hence unsupported by UnicodeSet).
+ *
+ * @author Alan Liu
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.5 $ $Date: 2000/01/13 23:53:23 $
+ */
+public class UnicodeSet {
+ /**
+ * The internal representation is a StringBuffer of even length.
+ * Each pair of characters represents a range that is included in
+ * the set. A single character c is represented as cc. Thus, the
+ * ranges in the set are (a,b), a and b inclusive, where a =
+ * pairs.charAt(i) and b = pairs.charAt(i+1) for all even i, 0 <=
+ * i <= pairs.length()-2. Pairs are always stored in ascending
+ * Unicode order. Pairs are always stored in shortest form. For
+ * example, if the pair "hh", representing the single character
+ * 'h', is added to the pairs list "agik", representing the ranges
+ * 'a'-'g' and 'i'-'k', the result is "ak", not "aghhik".
+ *
+ * This representation format was originally used in Richard
+ * Gillam's CharSet class.
+ */
+ private StringBuffer pairs;
+
+ private static final String CATEGORY_NAMES =
+ // 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2
+ //0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 8 9 0 1 2 3 4 5 6 7 8
+ "CnLuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCf--CoCsPdPsPePcPoSmScSkSo";
+
+ private static final int UNSUPPORTED_CATEGORY = 17;
+
+ private static final char VARIABLE_REF_OPEN = '{';
+ private static final char VARIABLE_REF_CLOSE = '}';
+
+ private static final int CATEGORY_COUNT = 29;
+
+ /**
+ * A cache mapping character category integers, as returned by
+ * Character.getType(), to pairs strings. Entries are initially
+ * null and are created on demand.
+ */
+ private static final String[] CATEGORY_PAIRS_CACHE =
+ new String[CATEGORY_COUNT];
+
+ //----------------------------------------------------------------
+ // Debugging and testing
+ //----------------------------------------------------------------
+
+ /**
+ * Return the representation of this set as a list of character
+ * ranges. Ranges are listed in ascending Unicode order. For
+ * example, the set [a-zA-M3] is represented as "33AMaz".
+ */
+ public String getPairs() {
+ return pairs.toString();
+ }
+
+ //----------------------------------------------------------------
+ // Public API
+ //----------------------------------------------------------------
+
+ /**
+ * Constructs an empty set.
+ */
+ public UnicodeSet() {
+ pairs = new StringBuffer();
+ }
+
+ /**
+ * Constructs a set from the given pattern. See the class description
+ * for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @exception IllegalArgumentException if the pattern contains
+ * a syntax error.
+ */
+ public UnicodeSet(String pattern) {
+ applyPattern(pattern);
+ }
+
+ /**
+ * Constructs a set from the given pattern. See the class description
+ * for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param pos on input, the position in pattern at which to start parsing.
+ * On output, the position after the last character parsed.
+ * @param varNameToChar a mapping from variable names (String) to characters
+ * (Character). May be null. If varCharToSet is non-null, then names may
+ * map to either single characters or sets, depending on whether a mapping
+ * exists in varCharToSet. If varCharToSet is null then all names map to
+ * single characters.
+ * @param varCharToSet a mapping from characters (Character objects from
+ * varNameToChar) to UnicodeSet objects. May be null. Is only used if
+ * varNameToChar is also non-null.
+ * @exception <code>IllegalArgumentException</code> if the pattern
+ * contains a syntax error.
+ */
+ public UnicodeSet(String pattern, ParsePosition pos,
+ Dictionary varNameToChar, Dictionary varCharToSet) {
+ applyPattern(pattern, pos, varNameToChar, varCharToSet);
+ }
+
+ /**
+ * Constructs a set from the given Unicode character category.
+ * @param category an integer indicating the character category as
+ * returned by <code>Character.getType()</code>.
+ * @exception <code>IllegalArgumentException</code> if the given
+ * category is invalid.
+ */
+ public UnicodeSet(int category) {
+ if (category < 0 || category >= CATEGORY_COUNT ||
+ category == UNSUPPORTED_CATEGORY) {
+ throw new IllegalArgumentException("Invalid category");
+ }
+ pairs = new StringBuffer(getCategoryPairs(category));
+ }
+
+ /**
+ * Modifies this set to represent the set specified by the given pattern.
+ * See the class description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @exception <code>IllegalArgumentException</code> if the pattern
+ * contains a syntax error.
+ */
+ public void applyPattern(String pattern) {
+ ParsePosition pos = new ParsePosition(0);
+ pairs = parse(pattern, pos, null, null);
+
+ // Skip over trailing whitespace
+ int i = pos.getIndex();
+ int n = pattern.length();
+ while (i < n && Character.isWhitespace(pattern.charAt(i))) {
+ ++i;
+ }
+
+ if (i != n) {
+ throw new IllegalArgumentException("Parse of \"" + pattern +
+ "\" failed at " + i);
+ }
+ }
+
+ /**
+ * Modifies this set to represent the set specified by the given pattern.
+ * @param pattern a string specifying what characters are in the set
+ * @param pos on input, the position in pattern at which to start parsing.
+ * On output, the position after the last character parsed.
+ * @param varNameToChar a mapping from variable names (String) to characters
+ * (Character). May be null. If varCharToSet is non-null, then names may
+ * map to either single characters or sets, depending on whether a mapping
+ * exists in varCharToSet. If varCharToSet is null then all names map to
+ * single characters.
+ * @param varCharToSet a mapping from characters (Character objects from
+ * varNameToChar) to UnicodeSet objects. May be null. Is only used if
+ * varNameToChar is also non-null.
+ * @exception <code>IllegalArgumentException</code> if the pattern
+ * contains a syntax error.
+ */
+ private void applyPattern(String pattern, ParsePosition pos,
+ Dictionary varNameToChar, Dictionary varCharToSet) {
+ pairs = parse(pattern, pos, varNameToChar, varCharToSet);
+ }
+
+ /**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a UnicodeSet constructor, it
+ * will produce another set that is equal to this one.
+ */
+ public String toPattern() {
+ StringBuffer result = new StringBuffer();
+ result.append('[');
+
+ // iterate through the ranges in the UnicodeSet
+ for (int i=0; i<pairs.length(); i+=2) {
+ // for a range with the same beginning and ending point,
+ // output that character, otherwise, output the start and
+ // end points of the range separated by a dash
+ result.append(pairs.charAt(i));
+ if (pairs.charAt(i) != pairs.charAt(i+1)) {
+ result.append('-').append(pairs.charAt(i+1));
+ }
+ }
+
+ return result.append(']').toString();
+ }
+
+ /**
+ * Returns the number of elements in this set (its cardinality),
+ * <em>n</em>, where <code>0 <= </code><em>n</em><code> <= 65536</code>.
+ *
+ * @return the number of elements in this set (its cardinality).
+ */
+ public int size() {
+ int n = 0;
+ for (int i=0; i<pairs.length(); i+=2) {
+ n += pairs.charAt(i+1) - pairs.charAt(i) + 1;
+ }
+ return n;
+ }
+
+ /**
+ * Returns <tt>true</tt> if this set contains no elements.
+ *
+ * @return <tt>true</tt> if this set contains no elements.
+ */
+ public boolean isEmpty() {
+ return pairs.length() == 0;
+ }
+
+ /**
+ * Returns <tt>true</tt> if this set contains the specified range
+ * of chars.
+ *
+ * @return <tt>true</tt> if this set contains the specified range
+ * of chars.
+ */
+ public boolean contains(char first, char last) {
+ // Set i to the end of the smallest range such that its end
+ // point >= last, or pairs.length() if no such range exists.
+ int i = 1;
+ while (i<pairs.length() && last>pairs.charAt(i)) i+=2;
+ return i<pairs.length() && first>=pairs.charAt(i-1);
+ }
+
+ /**
+ * Returns <tt>true</tt> if this set contains the specified char.
+ *
+ * @return <tt>true</tt> if this set contains the specified char.
+ */
+ public boolean contains(char c) {
+ return contains(c, c);
+ }
+
+ /**
+ * Returns <tt>true</tt> if this set contains any character whose low byte
+ * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for
+ * indexing.
+ */
+ public boolean containsIndexValue(int v) {
+ /* The index value v, in the range [0,255], is contained in this set if
+ * it is contained in any pair of this set. Pairs either have the high
+ * bytes equal, or unequal. If the high bytes are equal, then we have
+ * aaxx..aayy, where aa is the high byte. Then v is contained if xx <=
+ * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa.
+ * Then v is contained if xx <= v || v <= yy. (This is identical to the
+ * time zone month containment logic.)
+ */
+ for (int i=0; i<pairs.length(); i+=2) {
+ char low = pairs.charAt(i);
+ char high = pairs.charAt(i+1);
+ if ((low & 0xFF00) == (high & 0xFF00)) {
+ if ((low & 0xFF) <= v && v <= (high & 0xFF)) {
+ return true;
+ }
+ } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Adds the specified range to this set if it is not already
+ * present. If this set already contains the specified range,
+ * the call leaves this set unchanged. If <code>last > first</code>
+ * then an empty range is added, leaving the set unchanged.
+ *
+ * @param first first character, inclusive, of range to be added
+ * to this set.
+ * @param last last character, inclusive, of range to be added
+ * to this set.
+ */
+ public void add(char first, char last) {
+ if (first <= last) {
+ addPair(pairs, first, last);
+ }
+ }
+
+ /**
+ * Adds the specified character to this set if it is not already
+ * present. If this set already contains the specified character,
+ * the call leaves this set unchanged.
+ */
+ public final void add(char c) {
+ add(c, c);
+ }
+
+ /**
+ * Removes the specified range from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns. If <code>last > first</code> then an empty range is
+ * removed, leaving the set unchanged.
+ *
+ * @param first first character, inclusive, of range to be removed
+ * from this set.
+ * @param last last character, inclusive, of range to be removed
+ * from this set.
+ */
+ public void remove(char first, char last) {
+ if (first <= last) {
+ removePair(pairs, first, last);
+ }
+ }
+
+ /**
+ * Removes the specified character from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns.
+ */
+ public final void remove(char c) {
+ remove(c, c);
+ }
+
+ /**
+ * Returns <tt>true</tt> if the specified set is a <i>subset</i>
+ * of this set.
+ *
+ * @param c set to be checked for containment in this set.
+ * @return <tt>true</tt> if this set contains all of the elements of the
+ * specified set.
+ */
+ public boolean containsAll(UnicodeSet c) {
+ // The specified set is a subset if all of its pairs are contained
+ // in this set.
+ int i = 1;
+ for (int j=0; j<c.pairs.length(); j+=2) {
+ char last = c.pairs.charAt(j+1);
+ // Set i to the end of the smallest range such that its
+ // end point >= last, or pairs.length() if no such range
+ // exists.
+ while (i<pairs.length() && last>pairs.charAt(i)) i+=2;
+ if (i>pairs.length() || c.pairs.charAt(j) < pairs.charAt(i-1)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ *
+ * @param c set whose elements are to be added to this set.
+ * @see #add(char, char)
+ */
+ public void addAll(UnicodeSet c) {
+ doUnion(pairs, c.pairs.toString());
+ }
+
+ /**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ *
+ * @param c set that defines which elements this set will retain.
+ */
+ public void retainAll(UnicodeSet c) {
+ doIntersection(pairs, c.pairs.toString());
+ }
+
+ /**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ *
+ * @param c set that defines which elements will be removed from
+ * this set.
+ */
+ public void removeAll(UnicodeSet c) {
+ doDifference(pairs, c.pairs.toString());
+ }
+
+ /**
+ * Inverts this set. This operation modifies this set so that
+ * its value is its complement. This is equivalent to the pseudo code:
+ * <code>this = new UnicodeSet("[\u0000-\uFFFF]").removeAll(this)</code>.
+ */
+ public void complement() {
+ doComplement(pairs);
+ }
+
+ /**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ */
+ public void clear() {
+ pairs.setLength(0);
+ }
+
+ /**
+ * Compares the specified object with this set for equality. Returns
+ * <tt>true</tt> if the specified object is also a set, the two sets
+ * have the same size, and every member of the specified set is
+ * contained in this set (or equivalently, every member of this set is
+ * contained in the specified set).
+ *
+ * @param o Object to be compared for equality with this set.
+ * @return <tt>true</tt> if the specified Object is equal to this set.
+ */
+ public boolean equals(Object o) {
+ return o instanceof UnicodeSet &&
+ pairs.equals(((UnicodeSet)o).pairs);
+ }
+
+ /**
+ * Returns the hash code value for this set.
+ *
+ * @return the hash code value for this set.
+ * @see Object#hashCode()
+ */
+ public int hashCode() {
+ return pairs.hashCode();
+ }
+
+ /**
+ * Return a programmer-readable string representation of this object.
+ */
+ public String toString() {
+ return getClass().getName() + '{' + toPattern() + '}';
+ }
+
+ //----------------------------------------------------------------
+ // Implementation: Pattern parsing
+ //----------------------------------------------------------------
+
+ /**
+ * Parses the given pattern, starting at the given position. The character
+ * at pattern.charAt(pos.getIndex()) must be '[', or the parse fails.
+ * Parsing continues until the corresponding closing ']'. If a syntax error
+ * is encountered between the opening and closing brace, the parse fails.
+ * Upon return from a successful parse, the ParsePosition is updated to
+ * point to the character following the closing ']', and a StringBuffer
+ * containing a pairs list for the parsed pattern is returned. This method
+ * calls itself recursively to parse embedded subpatterns.
+ *
+ * @param pattern the string containing the pattern to be parsed. The
+ * portion of the string from pos.getIndex(), which must be a '[', to the
+ * corresponding closing ']', is parsed.
+ * @param pos upon entry, the position at which to being parsing. The
+ * character at pattern.charAt(pos.getIndex()) must be a '['. Upon return
+ * from a successful parse, pos.getIndex() is either the character after the
+ * closing ']' of the parsed pattern, or pattern.length() if the closing ']'
+ * is the last character of the pattern string.
+ * @return a StringBuffer containing a pairs list for the parsed substring
+ * of <code>pattern</code>
+ * @exception IllegalArgumentException if the parse fails.
+ */
+ private static StringBuffer parse(String pattern, ParsePosition pos,
+ Dictionary varNameToChar, Dictionary varCharToSet) {
+
+ StringBuffer pairsBuf = new StringBuffer();
+ boolean invert = false;
+
+ int lastChar = -1; // This is either a char (0..FFFF) or -1
+ char lastOp = 0;
+
+ /* This loop iterates over the characters in the pattern. We start at
+ * the position specified by pos. We exit the loop when either a
+ * matching closing ']' is seen, or we read all characters of the
+ * pattern. In the latter case an error will be thrown.
+ */
+
+ /* Pattern syntax:
+ * pat := '[' '^'? elem* ']'
+ * elem := a | a '-' a | set | set op set
+ * set := pat | (a set variable)
+ * op := '&' | '-'
+ * a := (a character, possibly defined by a var)
+ */
+
+ // mode 0: No chars parsed yet; next must be '['
+ // mode 1: '[' seen; if next is '^' or ':' then special
+ // mode 2: '[' '^'? seen; parse pattern and close with ']'
+ // mode 3: '[:' seen; parse category and close with ':]'
+ int mode = 0;
+ int openPos = 0; // offset to opening '['
+ int i = pos.getIndex();
+ int limit = pattern.length();
+ for (; i<limit; ++i) {
+ /* If the next element is a single character, c will be set to it,
+ * and nestedPairs will be null. In this case isLiteral indicates
+ * whether the character should assume special meaning if it has
+ * one. If the next element is a nested set, either via a variable
+ * reference, or via an embedded "[..]" or "[:..:]" pattern, then
+ * nestedPairs will be set to the pairs list for the nested set, and
+ * c's value should be ignored.
+ */
+ char c = pattern.charAt(i);
+ String nestedPairs = null;
+ boolean isLiteral = false;
+
+ // Ignore whitespace. This is not Unicode whitespace, but Java
+ // whitespace, a subset of Unicode whitespace.
+ if (Character.isWhitespace(c)) {
+ continue;
+ }
+
+ // Parse the opening '[' and optional following '^'
+ switch (mode) {
+ case 0:
+ if (c == '[') {
+ mode = 1; // Next look for '^'
+ openPos = i;
+ continue;
+ } else {
+ throw new IllegalArgumentException("Missing opening '['");
+ }
+ case 1:
+ mode = 2;
+ switch (c) {
+ case '^':
+ invert = true;
+ continue; // Back to top to fetch next character
+ case ':':
+ if (i == openPos+1) {
+ // '[:' cannot have whitespace in it
+ --i;
+ c = '[';
+ mode = 3;
+ // Fall through and parse category normally
+ }
+ break; // Fall through
+ case '-':
+ isLiteral = true; // Treat leading '-' as a literal
+ break; // Fall through
+ }
+ // else fall through and parse this character normally
+ }
+
+ // After opening matter is parsed ("[", "[^", or "[:"), the mode
+ // will be 2 if we want a closing ']', or 3 if we should parse a
+ // category and close with ":]".
+
+ /* Handle escapes. If a character is escaped, then it assumes its
+ * literal value. This is true for all characters, both special
+ * characters and characters with no special meaning. We also
+ * interpret '\\uxxxx' Unicode escapes here (as literals).
+ */
+ if (c == '\\') {
+ ++i;
+ if (i < limit) {
+ c = pattern.charAt(i);
+ isLiteral = true;
+ if (c == 'u') {
+ if ((i+4) >= limit) {
+ throw new IllegalArgumentException("Invalid \\u escape");
+ }
+ c = '\u0000';
+ for (int j=(++i)+4; i<j; ++i) { // [sic]
+ int digit = Character.digit(pattern.charAt(i), 16);
+ if (digit<0) {
+ throw new IllegalArgumentException("Invalid \\u escape");
+ }
+ c = (char) ((c << 4) | digit);
+ }
+ --i; // Move i back to last parsed character
+ }
+ } else {
+ throw new IllegalArgumentException("Trailing '\\'");
+ }
+ }
+
+ /* Parse variable references. These are treated as literals. If a
+ * variable refers to a UnicodeSet, nestedPairs is assigned here.
+ * Variable names are only parsed if varNameToChar is not null.
+ * Set variables are only looked up if varCharToSet is not null.
+ */
+ else if (varNameToChar != null && !isLiteral && c == VARIABLE_REF_OPEN) {
+ ++i;
+ int j = pattern.indexOf(VARIABLE_REF_CLOSE, i);
+ if (i == j || j < 0) { // empty or unterminated
+ throw new IllegalArgumentException("Illegal variable reference");
+ }
+ String name = pattern.substring(i, j);
+ ++j;
+ Character ch = (Character) varNameToChar.get(name);
+ if (ch == null) {
+ throw new IllegalArgumentException("Undefined variable: "
+ + name);
+ }
+ c = ch.charValue();
+ isLiteral = true;
+
+ if (varCharToSet != null) {
+ UnicodeSet set = (UnicodeSet) varCharToSet.get(ch);
+ if (set != null) {
+ nestedPairs = set.pairs.toString();
+ }
+ }
+ }
+
+ /* An opening bracket indicates the first bracket of a nested
+ * subpattern, either a normal pattern or a category pattern. We
+ * recognize these here and set nestedPairs accordingly.
+ */
+ else if (!isLiteral && c == '[') {
+ // Handle "[:...:]", representing a character category
+ char d = charAfter(pattern, i);
+ if (d == ':') {
+ i += 2;
+ int j = pattern.indexOf(":]", i);
+ if (j < 0) {
+ throw new IllegalArgumentException("Missing \":]\"");
+ }
+ nestedPairs = getCategoryPairs(pattern.substring(i, j));
+ i = j+1; // Make i point to ']'
+ if (mode == 3) {
+ // Entire pattern is a category; leave parse loop
+ pairsBuf.append(nestedPairs);
+ break;
+ }
+ } else {
+ // Recurse to get the pairs for this nested set.
+ pos.setIndex(i); // Add 2 to point AFTER op
+ nestedPairs = parse(pattern, pos, varNameToChar, varCharToSet).toString();
+ i = pos.getIndex() - 1; // - 1 to point at ']'
+ }
+ }
+
+ /* At this point we have either a character c, or a nested set. If
+ * we have encountered a nested set, either embedded in the pattern,
+ * or as a variable, we have a non-null nestedPairs, and c should be
+ * ignored. Otherwise c is the current character, and isLiteral
+ * indicates whether it is an escaped literal (or variable) or a
+ * normal unescaped character. Unescaped characters '-', '&', and
+ * ']' have special meanings.
+ */
+ if (nestedPairs != null) {
+ if (lastChar >= 0) {
+ if (lastOp != 0) {
+ throw new IllegalArgumentException("Illegal rhs for " + lastChar + lastOp);
+ }
+ addPair(pairsBuf, (char)lastChar, (char)lastChar);
+ lastChar = -1;
+ }
+ switch (lastOp) {
+ case '-':
+ doDifference(pairsBuf, nestedPairs);
+ break;
+ case '&':
+ doIntersection(pairsBuf, nestedPairs);
+ break;
+ case 0:
+ doUnion(pairsBuf, nestedPairs);
+ break;
+ }
+ lastOp = 0;
+ } else if (!isLiteral && c == ']') {
+ // Final closing delimiter. This is the only way we leave this
+ // loop if the pattern is well-formed.
+ break;
+ } else if (lastOp == 0 && !isLiteral && (c == '-' || c == '&')) {
+ lastOp = c;
+ } else if (lastOp == '-') {
+ addPair(pairsBuf, (char)lastChar, c);
+ lastOp = 0;
+ lastChar = -1;
+ } else if (lastOp != 0) {
+ // We have <set>&<char> or <char>&<char>
+ throw new IllegalArgumentException("Unquoted " + lastOp);
+ } else {
+ if (lastChar >= 0) {
+ // We have <char><char>
+ addPair(pairsBuf, (char)lastChar, (char)lastChar);
+ }
+ lastChar = c;
+ }
+ }
+
+ // Handle unprocessed stuff preceding the closing ']'
+ if (lastOp == '-') {
+ // Trailing '-' is treated as literal
+ addPair(pairsBuf, lastOp, lastOp);
+ } else if (lastOp == '&') {
+ throw new IllegalArgumentException("Unquoted trailing " + lastOp);
+ }
+ if (lastChar >= 0) {
+ addPair(pairsBuf, (char)lastChar, (char)lastChar);
+ }
+
+ /**
+ * If we saw a '^' after the initial '[' of this pattern, then perform
+ * the complement. (Inversion after '[:' is handled elsewhere.)
+ */
+ if (invert) {
+ doComplement(pairsBuf);
+ }
+
+ /**
+ * i indexes the last character we parsed or is pattern.length(). In
+ * the latter case, we have run off the end without finding a closing
+ * ']'. Otherwise, we know i < pattern.length(), and we set the
+ * ParsePosition to the next character to be parsed.
+ */
+ if (i == limit) {
+ throw new IllegalArgumentException("Missing ']'");
+ }
+ pos.setIndex(i+1);
+
+ return pairsBuf;
+ }
+
+ //----------------------------------------------------------------
+ // Implementation: Efficient in-place union & difference
+ //----------------------------------------------------------------
+
+ /**
+ * Performs a union operation: adds the range 'c'-'d' to the given
+ * pairs list. The pairs list is modified in place. The result
+ * is normalized (in order and as short as possible). For
+ * example, addPair("am", 'l', 'q') => "aq". addPair("ampz", 'n',
+ * 'o') => "az".
+ */
+ private static void addPair(StringBuffer pairs, char c, char d) {
+ char a = 0;
+ char b = 0;
+ for (int i=0; i<pairs.length(); i+=2) {
+ char e = pairs.charAt(i);
+ char f = pairs.charAt(i+1);
+ if (e <= (d+1) && c <= (f+1)) {
+ // Merge with this range
+ f = (char) Math.max(d, f);
+
+ // Check to see if we need to merge with the
+ // subsequent range also. This happens if we have
+ // "abdf" and are merging in "cc". We only need to
+ // check on the right side -- never on the left.
+ if ((i+2) < pairs.length() &&
+ pairs.charAt(i+2) == (f+1)) {
+ f = pairs.charAt(i+3);
+ stringBufferDelete(pairs, i+2, i+4);
+ }
+ pairs.setCharAt(i, (char) Math.min(c, e));
+ pairs.setCharAt(i+1, f);
+ return;
+ } else if ((b+1) < c && (d+1) < e) {
+ // Insert before this range
+ pairs.insert(i, new char[] { c, d });
+ return;
+ }
+ a = e;
+ b = f;
+ }
+ // If nothing else, fall through and append this new range to
+ // the end.
+ pairs.append(c).append(d);
+ }
+
+ /**
+ * Performs an asymmetric difference: removes the range 'c'-'d'
+ * from the pairs list. The pairs list is modified in place. The
+ * result is normalized (in order and as short as possible). For
+ * example, removePair("am", 'l', 'q') => "ak".
+ * removePair("ampz", 'l', 'q') => "akrz".
+ */
+ private static void removePair(StringBuffer pairs, char c, char d) {
+ // Iterate over pairs until we find a pair that overlaps
+ // with the given range.
+ for (int i=0; i<pairs.length(); i+=2) {
+ char b = pairs.charAt(i+1);
+ if (b < c) {
+ // Range at i is entirely before the given range,
+ // since we have a-b < c-d. No overlap yet...keep
+ // iterating.
+ continue;
+ }
+ char a = pairs.charAt(i);
+ if (d < a) {
+ // Range at i is entirely after the given range; c-d <
+ // a-b. Since ranges are in order, nothing else will
+ // overlap.
+ break;
+ }
+ // Once we get here, we know c <= b and d >= a.
+ // rangeEdited is set to true if we have modified the
+ // range a-b (the range at i) in place.
+ boolean rangeEdited = false;
+ if (c > a) {
+ // If c is after a and before b, then we have overlap
+ // of this sort: a--c==b--d or a--c==d--b, where a-b
+ // and c-d are the ranges of interest. We need to
+ // add the range a,c-1.
+ pairs.setCharAt(i+1, (char)(c-1));
+ // i is already a
+ rangeEdited = true;
+ }
+ if (d < b) {
+ // If d is after a and before b, we overlap like this:
+ // c--a==d--b or a--c==d--b, where a-b is the range at
+ // i and c-d is the range being removed. We need to
+ // add the range d+1,b.
+ if (rangeEdited) {
+ pairs.insert(i+2, new char[] { (char)(d+1), b });
+ i += 2;
+ } else {
+ pairs.setCharAt(i, (char)(d+1));
+ // i+1 is already b
+ rangeEdited = true;
+ }
+ }
+ if (!rangeEdited) {
+ // If we didn't add any ranges, that means the entire
+ // range a-b must be deleted, since we have
+ // c--a==b--d.
+ stringBufferDelete(pairs, i, i+2);
+ i -= 2;
+ }
+ }
+ }
+
+ //----------------------------------------------------------------
+ // Implementation: Fundamental operators
+ //----------------------------------------------------------------
+
+ /**
+ * Changes the pairs list to represent the complement of the set it
+ * currently represents. The pairs list will be normalized (in
+ * order and in shortest possible form) if the original pairs list
+ * was normalized.
+ */
+ private static void doComplement(StringBuffer pairs) {
+ if (pairs.length() == 0) {
+ pairs.append('\u0000').append('\uffff');
+ return;
+ }
+
+ // Change each end to a start and each start to an end of the
+ // gaps between the ranges. That is, 3-7 9-12 becomes x-2 8-8
+ // 13-x, where 'x' represents a range that must now be fixed
+ // up.
+ for (int i=0; i<pairs.length(); i+=2) {
+ pairs.setCharAt(i, (char) (pairs.charAt(i) - 1));
+ pairs.setCharAt(i+1, (char) (pairs.charAt(i+1) + 1));
+ }
+
+ // Fix up the initial range, either by adding a start point of
+ // U+0000, or by deleting the range altogether, if the
+ // original range was U+0000 - x.
+ if (pairs.charAt(0) == '\uFFFF') {
+ stringBufferDelete(pairs, 0, 1);
+ } else {
+ pairs.insert(0, '\u0000');
+ }
+
+ // Fix up the final range, either by adding an end point of
+ // U+FFFF, or by deleting the range altogether, if the
+ // original range was x - U+FFFF.
+ if (pairs.charAt(pairs.length() - 1) == '\u0000') {
+ pairs.setLength(pairs.length() - 1);
+ } else {
+ pairs.append('\uFFFF');
+ }
+ }
+
+ /**
+ * Given two pairs lists, changes the first in place to represent
+ * the union of the two sets.
+ *
+ * This implementation format was stolen from Richard Gillam's
+ * CharSet class.
+ */
+ private static void doUnion(StringBuffer pairs, String c2) {
+ StringBuffer result = new StringBuffer();
+ String c1 = pairs.toString();
+
+ int i = 0;
+ int j = 0;
+
+ // consider all the characters in both strings
+ while (i < c1.length() && j < c2.length()) {
+ char ub;
+
+ // the first character in the result is the lower of the
+ // starting characters of the two strings, and "ub" gets
+ // set to the upper bound of that range
+ if (c1.charAt(i) < c2.charAt(j)) {
+ result.append(c1.charAt(i));
+ ub = c1.charAt(++i);
+ }
+ else {
+ result.append(c2.charAt(j));
+ ub = c2.charAt(++j);
+ }
+
+ // for as long as one of our two pointers is pointing to a range's
+ // end point, or i is pointing to a character that is less than
+ // "ub" plus one (the "plus one" stitches touching ranges together)...
+ while (i % 2 == 1 || j % 2 == 1 || (i < c1.length() && c1.charAt(i)
+ <= ub + 1)) {
+ // advance i to the first character that is greater than
+ // "ub" plus one
+ while (i < c1.length() && c1.charAt(i) <= ub + 1)
+ ++i;
+
+ // if i points to the endpoint of a range, update "ub"
+ // to that character, or if i points to the start of
+ // a range and the endpoint of the preceding range is
+ // greater than "ub", update "up" to _that_ character
+ if (i % 2 == 1)
+ ub = c1.charAt(i);
+ else if (i > 0 && c1.charAt(i - 1) > ub)
+ ub = c1.charAt(i - 1);
+
+ // now advance j to the first character that is greater
+ // that "ub" plus one
+ while (j < c2.length() && c2.charAt(j) <= ub + 1)
+ ++j;
+
+ // if j points to the endpoint of a range, update "ub"
+ // to that character, or if j points to the start of
+ // a range and the endpoint of the preceding range is
+ // greater than "ub", update "up" to _that_ character
+ if (j % 2 == 1)
+ ub = c2.charAt(j);
+ else if (j > 0 && c2.charAt(j - 1) > ub)
+ ub = c2.charAt(j - 1);
+ }
+ // when we finally fall out of this loop, we will have stitched
+ // together a series of ranges that overlap or touch, i and j
+ // will both point to starting points of ranges, and "ub" will
+ // be the endpoint of the range we're working on. Write "ub"
+ // to the result
+ result.append(ub);
+
+ // loop back around to create the next range in the result
+ }
+
+ // we fall out to here when we've exhausted all the characters in
+ // one of the operands. We can append all of the remaining characters
+ // in the other operand without doing any extra work.
+ if (i < c1.length())
+ result.append(c1.substring(i));
+ if (j < c2.length())
+ result.append(c2.substring(j));
+
+ pairs.setLength(0);
+ pairs.append(result.toString());
+ }
+
+ /**
+ * Given two pairs lists, changes the first in place to represent
+ * the asymmetric difference of the two sets.
+ */
+ private static void doDifference(StringBuffer pairs, String pairs2) {
+ StringBuffer p2 = new StringBuffer(pairs2);
+ doComplement(p2);
+ doIntersection(pairs, p2.toString());
+ }
+
+ /**
+ * Given two pairs lists, changes the first in place to represent
+ * the intersection of the two sets.
+ *
+ * This implementation format was stolen from Richard Gillam's
+ * CharSet class.
+ */
+ private static void doIntersection(StringBuffer pairs, String c2) {
+ StringBuffer result = new StringBuffer();
+ String c1 = pairs.toString();
+
+ int i = 0;
+ int j = 0;
+ int oldI;
+ int oldJ;
+
+ // iterate until we've exhausted one of the operands
+ while (i < c1.length() && j < c2.length()) {
+
+ // advance j until it points to a character that is larger than
+ // the one i points to. If this is the beginning of a one-
+ // character range, advance j to point to the end
+ if (i < c1.length() && i % 2 == 0) {
+ while (j < c2.length() && c2.charAt(j) < c1.charAt(i))
+ ++j;
+ if (j < c2.length() && j % 2 == 0 && c2.charAt(j) == c1.charAt(i))
+ ++j;
+ }
+
+ // if j points to the endpoint of a range, save the current
+ // value of i, then advance i until it reaches a character
+ // which is larger than the character pointed at
+ // by j. All of the characters we've advanced over (except
+ // the one currently pointed to by i) are added to the result
+ oldI = i;
+ while (j % 2 == 1 && i < c1.length() && c1.charAt(i) <= c2.charAt(j))
+ ++i;
+ result.append(c1.substring(oldI, i));
+
+ // if i points to the endpoint of a range, save the current
+ // value of j, then advance j until it reaches a character
+ // which is larger than the character pointed at
+ // by i. All of the characters we've advanced over (except
+ // the one currently pointed to by i) are added to the result
+ oldJ = j;
+ while (i % 2 == 1 && j < c2.length() && c2.charAt(j) <= c1.charAt(i))
+ ++j;
+ result.append(c2.substring(oldJ, j));
+
+ // advance i until it points to a character larger than j
+ // If it points at the beginning of a one-character range,
+ // advance it to the end of that range
+ if (j < c2.length() && j % 2 == 0) {
+ while (i < c1.length() && c1.charAt(i) < c2.charAt(j))
+ ++i;
+ if (i < c1.length() && i % 2 == 0 && c2.charAt(j) == c1.charAt(i))
+ ++i;
+ }
+ }
+
+ pairs.setLength(0);
+ pairs.append(result.toString());
+ }
+
+ //----------------------------------------------------------------
+ // Implementation: Generation of pairs for Unicode categories
+ //----------------------------------------------------------------
+
+ /**
+ * Returns a pairs string for the given category, given its name.
+ * The category name must be either a two-letter name, such as
+ * "Lu", or a one letter name, such as "L". One-letter names
+ * indicate the logical union of all two-letter names that start
+ * with that letter. Case is significant. If the name starts
+ * with the character '^' then the complement of the given
+ * character set is returned.
+ *
+ * Although individual categories such as "Lu" are cached, we do
+ * not currently cache single-letter categories such as "L" or
+ * complements such as "^Lu" or "^L". It would be easy to cache
+ * these as well in a hashtable should the need arise.
+ */
+ private static String getCategoryPairs(String catName) {
+ boolean invert = (catName.length() > 1 &&
+ catName.charAt(0) == '^');
+ if (invert) {
+ catName = catName.substring(1);
+ }
+
+ StringBuffer cat = null;
+
+ // if we have two characters, search the category map for that
+ // code and either construct and return a UnicodeSet from the
+ // data in the category map or throw an exception
+ if (catName.length() == 2) {
+ int i = CATEGORY_NAMES.indexOf(catName);
+ if (i>=0 && i%2==0) {
+ i /= 2;
+ if (i != UNSUPPORTED_CATEGORY) {
+ String pairs = getCategoryPairs(i);
+ if (!invert) {
+ return pairs;
+ }
+ cat = new StringBuffer(pairs);
+ }
+ }
+ } else if (catName.length() == 1) {
+ // if we have one character, search the category map for
+ // codes beginning with that letter, and union together
+ // all of the matching sets that we find (or throw an
+ // exception if there are no matches)
+ for (int i=0; i<CATEGORY_COUNT; ++i) {
+ if (i != UNSUPPORTED_CATEGORY &&
+ CATEGORY_NAMES.charAt(2*i) == catName.charAt(0)) {
+ String pairs = getCategoryPairs(i);
+ if (cat == null) {
+ cat = new StringBuffer(pairs);
+ } else {
+ doUnion(cat, pairs);
+ }
+ }
+ }
+ }
+
+ if (cat == null) {
+ throw new IllegalArgumentException("Bad category");
+ }
+
+ if (invert) {
+ doComplement(cat);
+ }
+ return cat.toString();
+ }
+
+ /**
+ * Returns a pairs string for the given category. This string is
+ * cached and returned again if this method is called again with
+ * the same parameter.
+ */
+ private static String getCategoryPairs(int cat) {
+ if (CATEGORY_PAIRS_CACHE[cat] == null) {
+ // Walk through all Unicode characters, noting the start
+ // and end of each range for which Character.getType(c)
+ // returns the given category integer. Since we are
+ // iterating in order, we can simply append the resulting
+ // ranges to the pairs string.
+ StringBuffer pairs = new StringBuffer();
+ int first = -1;
+ int last = -2;
+ for (int i=0; i<=0xFFFF; ++i) {
+ if (Character.getType((char)i) == cat) {
+ if ((last+1) == i) {
+ last = i;
+ } else {
+ if (first >= 0) {
+ pairs.append((char)first).append((char)last);
+ }
+ first = last = i;
+ }
+ }
+ }
+ if (first >= 0) {
+ pairs.append((char)first).append((char)last);
+ }
+ CATEGORY_PAIRS_CACHE[cat] = pairs.toString();
+ }
+ return CATEGORY_PAIRS_CACHE[cat];
+ }
+
+ //----------------------------------------------------------------
+ // Implementation: Utility methods
+ //----------------------------------------------------------------
+
+ /**
+ * Returns the character after the given position, or '\uFFFF' if
+ * there is none.
+ */
+ private static final char charAfter(String str, int i) {
+ return ((++i) < str.length()) ? str.charAt(i) : '\uFFFF';
+ }
+
+ /**
+ * Deletes a range of character from a StringBuffer, from start to
+ * limit-1. This is not part of JDK 1.1 StringBuffer, but is
+ * present in Java 2.
+ * @param start inclusive start of range
+ * @param limit exclusive end of range
+ */
+ private static void stringBufferDelete(StringBuffer buf,
+ int start, int limit) {
+ // In Java 2 just use:
+ // buf.delete(start, limit);
+ char[] chars = null;
+ if (buf.length() > limit) {
+ chars = new char[buf.length() - limit];
+ buf.getChars(limit, buf.length(), chars, 0);
+ }
+ buf.setLength(start);
+ if (chars != null) {
+ buf.append(chars);
+ }
+ }
+}
diff --git a/src/com/ibm/text/UnicodeToHexTransliterator.java b/src/com/ibm/text/UnicodeToHexTransliterator.java
new file mode 100755
index 0000000..1e688f6
--- /dev/null
+++ b/src/com/ibm/text/UnicodeToHexTransliterator.java
@@ -0,0 +1,172 @@
+package com.ibm.text;
+import java.util.*;
+
+/**
+ * A transliterator that converts from Unicode characters to
+ * hexadecimal Unicode escape sequences. It outputs a
+ * prefix specified in the constructor and optionally converts the hex
+ * digits to uppercase.
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: UnicodeToHexTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class UnicodeToHexTransliterator extends Transliterator {
+
+ /**
+ * Package accessible ID for this transliterator.
+ */
+ static String _ID = "Unicode-Hex";
+
+ private String prefix;
+
+ private boolean uppercase;
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Constructs a transliterator.
+ * @param prefix the string that will precede the four hex
+ * digits for UNICODE_HEX transliterators. Ignored
+ * if direction is HEX_UNICODE.
+ * @param uppercase if true, the four hex digits will be
+ * converted to uppercase; otherwise they will be lowercase.
+ * Ignored if direction is HEX_UNICODE.
+ */
+ public UnicodeToHexTransliterator(String prefix, boolean uppercase,
+ UnicodeFilter filter) {
+ super(_ID, filter);
+ this.prefix = prefix;
+ this.uppercase = uppercase;
+ }
+
+ /**
+ * Constructs a transliterator with the default prefix "\u"
+ * that outputs uppercase hex digits.
+ */
+ public UnicodeToHexTransliterator() {
+ this("\\u", true, null);
+ }
+
+ /**
+ * Returns the string that precedes the four hex digits.
+ * @return prefix string
+ */
+ public String getPrefix() {
+ return prefix;
+ }
+
+ /**
+ * Sets the string that precedes the four hex digits.
+ *
+ * <p>Callers must take care if a transliterator is in use by
+ * multiple threads. The prefix should not be changed by one
+ * thread while another thread may be transliterating.
+ * @param prefix prefix string
+ */
+ public void setPrefix(String prefix) {
+ this.prefix = prefix;
+ }
+
+ /**
+ * Returns true if this transliterator outputs uppercase hex digits.
+ */
+ public boolean isUppercase() {
+ return uppercase;
+ }
+
+ /**
+ * Sets if this transliterator outputs uppercase hex digits.
+ *
+ * <p>Callers must take care if a transliterator is in use by
+ * multiple threads. The uppercase mode should not be changed by
+ * one thread while another thread may be transliterating.
+ * @param outputUppercase if true, then this transliterator
+ * outputs uppercase hex digits.
+ */
+ public void setUppercase(boolean outputUppercase) {
+ uppercase = outputUppercase;
+ }
+
+ /**
+ * Transliterates a segment of a string. <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @return the new limit index
+ */
+ public int transliterate(Replaceable text, int start, int limit) {
+ int[] offsets = { start, limit, start };
+ handleKeyboardTransliterate(text, offsets);
+ return offsets[LIMIT];
+ }
+
+ /**
+ * Implements {@link Transliterator#handleKeyboardTransliterate}.
+ */
+ protected void handleKeyboardTransliterate(Replaceable text,
+ int[] offsets) {
+ /**
+ * Performs transliteration changing all characters to
+ * Unicode hexadecimal escapes. For example, '@' -> "U+0040",
+ * assuming the prefix is "U+".
+ */
+ int cursor = offsets[CURSOR];
+ int limit = offsets[LIMIT];
+
+ UnicodeFilter filter = getFilter();
+
+ loop:
+ while (cursor < limit) {
+ char c = text.charAt(cursor);
+ if (filter != null && !filter.isIn(c)) {
+ ++cursor;
+ continue;
+ }
+ String hex = hex(c);
+ text.replace(cursor, cursor+1, hex);
+ int len = hex.length();
+ cursor += len; // Advance cursor by 1 and adjust for new text
+ --len;
+ limit += len;
+ }
+
+ offsets[LIMIT] = limit;
+ offsets[CURSOR] = cursor;
+ }
+
+ /**
+ * Return the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context.
+ * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+ * @return maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+ protected int getMaximumContextLength() {
+ return 0;
+ }
+
+ /**
+ * Form escape sequence.
+ */
+ private final String hex(char c) {
+ StringBuffer buf = new StringBuffer();
+ buf.append(prefix);
+ if (c < 0x1000) {
+ buf.append('0');
+ if (c < 0x100) {
+ buf.append('0');
+ if (c < 0x10) {
+ buf.append('0');
+ }
+ }
+ }
+ String h = Integer.toHexString(c);
+ buf.append(uppercase ? h.toUpperCase() : h);
+ return buf.toString();
+ }
+}
diff --git a/src/com/ibm/text/components/AppletFrame.java b/src/com/ibm/text/components/AppletFrame.java
new file mode 100755
index 0000000..cf6cc39
--- /dev/null
+++ b/src/com/ibm/text/components/AppletFrame.java
@@ -0,0 +1,126 @@
+package com.ibm.text.components;
+import java.applet.*;
+import java.net.URL;
+import java.util.Enumeration;
+import java.awt.*;
+import java.awt.event.*;
+
+/**
+ * <p>A Frame that runs an Applet within itself, making it possible
+ * for an applet to run as an application. Usage:
+ *
+ * <pre>
+ * public class MyApplet extends Applet {
+ * public static void main(String args[]) {
+ * MyApplet applet = new MyApplet();
+ * new AppletFrame("My Applet Running As An App", applet, 640, 480);
+ * }
+ * ...
+ * }
+ * <pre>
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: AppletFrame.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class AppletFrame extends Frame implements AppletStub, AppletContext {
+
+ Applet applet;
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Construct a Frame running the given Applet with the default size
+ * of 640 by 480.
+ * When the Frame is closed, the applet's stop() method is called,
+ * the Frame is dispose()d of, and System.exit(0) is called.
+ *
+ * @param name the Frame title
+ * @param applet the applet to be run
+ */
+ public AppletFrame(String name, Applet applet) {
+ this(name, applet, 640, 480);
+ }
+
+ /**
+ * Construct a Frame running the given Applet with the given size.
+ * When the Frame is closed, the applet's stop() method is called,
+ * the Frame is dispose()d of, and System.exit(0) is called.
+ *
+ * @param name the Frame title
+ * @param applet the applet to be run
+ * @param width width of the Frame
+ * @param height height of the Frame
+ */
+ public AppletFrame(String name, Applet applet, int width, int height) {
+ super(name);
+ this.applet = applet;
+ applet.setStub(this);
+
+ resize(width, height);
+ add("Center", applet);
+ show();
+ addWindowListener(new WindowAdapter() {
+ public void windowClosing(WindowEvent e) {
+ AppletFrame.this.applet.stop();
+ dispose();
+ System.exit(0);
+ }
+ });
+
+ applet.init();
+ applet.start();
+ }
+
+ // AppletStub API
+ public void appletResize(int width,
+ int height) {
+ resize(width, height);
+ }
+
+ public AppletContext getAppletContext() {
+ return this;
+ }
+
+ public URL getCodeBase() {
+ return null;
+ }
+
+ public URL getDocumentBase() {
+ return null;
+ }
+
+ public String getParameter(String name) {
+ return "PARAMETER";
+ }
+
+ public boolean isActive() {
+ return true;
+ }
+
+ // AppletContext API
+ public Applet getApplet(String name) {
+ return applet;
+ }
+
+ public Enumeration getApplets() {
+ return null;
+ }
+
+ public AudioClip getAudioClip(URL url) {
+ return null;
+ }
+
+ public Image getImage(URL url) {
+ return null;
+ }
+
+ public void showDocument(URL url) {}
+ public void showDocument(URL url, String target) {}
+
+ public void showStatus(String status) {
+ System.out.println(status);
+ }
+}
diff --git a/src/com/ibm/text/components/DumbTextComponent.java b/src/com/ibm/text/components/DumbTextComponent.java
new file mode 100755
index 0000000..a400b9a
--- /dev/null
+++ b/src/com/ibm/text/components/DumbTextComponent.java
@@ -0,0 +1,708 @@
+package com.ibm.text.components;
+import java.awt.*;
+import java.awt.event.*;
+import java.text.*;
+import java.awt.datatransfer.*;
+
+// LIU: Changed from final to non-final
+public class DumbTextComponent extends Canvas
+ implements KeyListener, MouseListener, MouseMotionListener, FocusListener
+ {
+ private transient static final String copyright =
+ "Copyright \u00A9 1998, Mark Davis. All Rights Reserved.";
+ private transient static boolean DEBUG = false;
+
+ private String contents = "";
+ private Selection selection = new Selection();
+ private boolean editable = true;
+
+ private transient Selection tempSelection = new Selection();
+ private transient boolean focus;
+ private transient BreakIterator lineBreaker = BreakIterator.getLineInstance();
+ private transient BreakIterator wordBreaker = BreakIterator.getWordInstance();
+ private transient BreakIterator charBreaker = BreakIterator.getCharacterInstance();
+ private transient int lineAscent;
+ private transient int lineHeight;
+ private transient int lineLeading;
+ private transient int lastHeight = 10;
+ private transient int lastWidth = 50;
+ private static final int MAX_LINES = 200; // LIU: Use symbolic name
+ private transient int[] lineStarts = new int[MAX_LINES]; // LIU
+ private transient int lineCount = 1;
+
+ private transient boolean valid = false;
+ private transient FontMetrics fm;
+ private transient boolean redoLines = true;
+ private transient boolean doubleClick = false;
+ private transient TextListener textListener;
+ private transient ActionListener selectionListener;
+ private transient Image cacheImage;
+ private transient Dimension mySize;
+ private transient int xInset = 5;
+ private transient int yInset = 5;
+ private transient Point startPoint = new Point();
+ private transient Point endPoint = new Point();
+ private transient Point caretPoint = new Point();
+ private transient static String clipBoard;
+
+ private static final char CR = '\015'; // LIU
+
+ // ============================================
+
+ public DumbTextComponent() {
+ addMouseListener(this);
+ addMouseMotionListener(this);
+ addKeyListener(this);
+ addFocusListener(this);
+ setCursor(Cursor.getPredefinedCursor(Cursor.TEXT_CURSOR));
+
+ }
+
+// ================ Events ====================
+
+ public boolean isFocusTraversable() { return true; }
+
+ public void addActionListener(ActionListener l) {
+ selectionListener = AWTEventMulticaster.add(selectionListener, l);
+ }
+
+ public void removeActionListener(ActionListener l) {
+ selectionListener = AWTEventMulticaster.remove(selectionListener, l);
+ }
+
+ public void addTextListener(TextListener l) {
+ textListener = AWTEventMulticaster.add(textListener, l);
+ }
+
+ public void removeTextListener(TextListener l) {
+ textListener = AWTEventMulticaster.remove(textListener, l);
+ }
+
+ private transient boolean pressed;
+
+ public void mousePressed(MouseEvent e) {
+ if (DEBUG) System.out.println("mousePressed");
+ if (pressed) {
+ select(e,false);
+ } else {
+ doubleClick = e.getClickCount() > 1;
+ requestFocus();
+ select(e, true);
+ pressed = true;
+ }
+ }
+
+ public void mouseDragged(MouseEvent e) {
+ if (DEBUG) System.out.println("mouseDragged");
+ select(e, false);
+ }
+
+ public void mouseReleased(MouseEvent e) {
+ if (DEBUG) System.out.println("mouseReleased");
+ pressed = false;
+ }
+
+ public void mouseEntered(MouseEvent e) {
+ //if (pressed) select(e, false);
+ }
+
+ public void mouseExited(MouseEvent e){
+ //if (pressed) select(e, false);
+ }
+
+ public void mouseClicked(MouseEvent e) {}
+ public void mouseMoved(MouseEvent e) {}
+
+
+ public void focusGained(FocusEvent e) {
+ if (DEBUG) System.out.println("focusGained");
+ focus = true;
+ valid = false;
+ repaint(16);
+ }
+ public void focusLost(FocusEvent e) {
+ if (DEBUG) System.out.println("focusLost");
+ focus = false;
+ valid = false;
+ repaint(16);
+ }
+
+ public void select(MouseEvent e, boolean first) {
+ point2Offset(e.getPoint(), tempSelection);
+ if (first) {
+ if ((e.getModifiers() & InputEvent.SHIFT_MASK) == 0) {
+ tempSelection.anchor = tempSelection.caret;
+ }
+ }
+ // fix words
+ if (doubleClick) {
+ tempSelection.expand(wordBreaker);
+ }
+ select(tempSelection);
+ }
+
+ public void keyPressed(KeyEvent e) {
+ int code = e.getKeyCode();
+ if (DEBUG) System.out.println("keyPressed "
+ + hex((char)code) + ", " + hex((char)e.getModifiers()));
+ int start = selection.getStart();
+ int end = selection.getEnd();
+ boolean shift = (e.getModifiers() & KeyEvent.SHIFT_MASK) != 0;
+ boolean ctrl = (e.getModifiers() & KeyEvent.CTRL_MASK) != 0;
+ switch (code) {
+ case KeyEvent.VK_Q:
+ if (!ctrl || !editable) break;
+ fixHex();
+ break;
+ case KeyEvent.VK_V:
+ if (!ctrl || !editable) break;
+ insertText(clipBoard);
+ break;
+ case KeyEvent.VK_C:
+ if (!ctrl) break;
+ clipBoard = contents.substring(selection.getStart(), selection.getEnd());
+ break;
+ case KeyEvent.VK_X:
+ if (!ctrl) break;
+ clipBoard = contents.substring(selection.getStart(), selection.getEnd());
+ if (editable) break;
+ insertText("");
+ break;
+ case KeyEvent.VK_A:
+ if (!ctrl) break;
+ select(Integer.MAX_VALUE, 0, false);
+ break;
+ case KeyEvent.VK_RIGHT:
+ tempSelection.set(selection);
+ tempSelection.nextBound(ctrl ? wordBreaker : charBreaker, +1, shift);
+ select(tempSelection);
+ break;
+ case KeyEvent.VK_LEFT:
+ tempSelection.set(selection);
+ tempSelection.nextBound(ctrl ? wordBreaker : charBreaker, -1, shift);
+ select(tempSelection);
+ break;
+ case KeyEvent.VK_UP: // LIU: Add support for up arrow
+ tempSelection.set(selection);
+ tempSelection.caret = lineDelta(tempSelection.caret, -1);
+ if (!shift) {
+ tempSelection.anchor = tempSelection.caret;
+ }
+ select(tempSelection);
+ break;
+ case KeyEvent.VK_DOWN: // LIU: Add support for down arrow
+ tempSelection.set(selection);
+ tempSelection.caret = lineDelta(tempSelection.caret, +1);
+ if (!shift) {
+ tempSelection.anchor = tempSelection.caret;
+ }
+ select(tempSelection);
+ break;
+ case KeyEvent.VK_DELETE: // LIU: Add delete key support
+ if (!editable) break;
+ if (contents.length() == 0) break;
+ start = selection.getStart();
+ end = selection.getEnd();
+ if (start == end) {
+ ++end;
+ if (end > contents.length()) {
+ getToolkit().beep();
+ return;
+ }
+ }
+ replaceRange("", start, end);
+ break;
+ }
+ }
+
+ /**
+ * LIU: Given an offset into contents, moves up or down by lines,
+ * according to lineStarts[].
+ * @param off the offset into contents
+ * @param delta how many lines to move up (< 0) or down (> 0)
+ * @return the new offset into contents
+ */
+ private int lineDelta(int off, int delta) {
+ int line = findLine(off, false);
+ int posInLine = off - lineStarts[line];
+ // System.out.println("off=" + off + " at " + line + ":" + posInLine);
+ line += delta;
+ if (line < 0) {
+ line = posInLine = 0;
+ } else if (line >= lineCount) {
+ return contents.length();
+ }
+ off = lineStarts[line] + posInLine;
+ if (off >= lineStarts[line+1]) {
+ off = lineStarts[line+1] - 1;
+ }
+ return off;
+ }
+
+ public void keyReleased(KeyEvent e) {
+ int code = e.getKeyCode();
+ if (DEBUG) System.out.println("keyReleased "
+ + hex((char)code) + ", " + hex((char)e.getModifiers()));
+ }
+
+ public void keyTyped(KeyEvent e) {
+ char ch = e.getKeyChar();
+ if (DEBUG) System.out.println("keyTyped "
+ + hex((char)ch) + ", " + hex((char)e.getModifiers()));
+ if ((e.getModifiers() & KeyEvent.CTRL_MASK) != 0) return;
+ switch (ch) {
+ case KeyEvent.CHAR_UNDEFINED:
+ break;
+ case KeyEvent.VK_BACK_SPACE:
+ if (!editable) break;
+ if (contents.length() == 0) break;
+ int start = selection.getStart();
+ int end = selection.getEnd();
+ if (start == end) {
+ --start;
+ if (start < 0) {
+ getToolkit().beep(); // LIU: Add audio feedback of NOP
+ return;
+ }
+ }
+ replaceRange("", start, end);
+ break;
+ default:
+ if (!editable) break;
+ // LIU: Dispatch to subclass API
+ handleKeyTyped(e);
+ break;
+ }
+ }
+
+ // LIU: Subclass API for handling of key typing
+ protected void handleKeyTyped(KeyEvent e) {
+ insertText(String.valueOf(e.getKeyChar()));
+ }
+
+// ===================== Control ======================
+
+ public synchronized void setEditable(boolean b) {
+ editable = b;
+ }
+
+ public boolean isEditable() {
+ return editable;
+ }
+
+ public void select(Selection newSelection) {
+ newSelection.pin(contents);
+ if (!selection.equals(newSelection)) {
+ selection.set(newSelection);
+ if (selectionListener != null) {
+ selectionListener.actionPerformed(
+ new ActionEvent(this, ActionEvent.ACTION_PERFORMED,
+ "Selection Changed", 0));
+ }
+ repaint(10);
+ valid = false;
+ }
+ }
+
+ public void select(int start, int end) {
+ select(start, end, false);
+ }
+
+ public void select(int start, int end, boolean clickAfter) {
+ tempSelection.set(start, end, clickAfter);
+ select(tempSelection);
+ }
+
+ public int getSelectionStart() {
+ return selection.getStart();
+ }
+
+ public int getSelectionEnd() {
+ return selection.getEnd();
+ }
+
+ public void setBounds(int x, int y, int w, int h) {
+ super.setBounds(x,y,w,h);
+ redoLines = true;
+ }
+
+ public Dimension getPreferredSize() {
+ return new Dimension(lastWidth,lastHeight);
+ }
+
+ public Dimension getMaximumSize() {
+ return new Dimension(lastWidth,lastHeight);
+ }
+
+ public Dimension getMinimumSize() {
+ return new Dimension(lastHeight,lastHeight);
+ }
+
+ public void setText(String text) {
+ setText2(text);
+ select(tempSelection.set(selection).pin(contents));
+ }
+
+ public void setText2(String text) {
+ contents = text;
+ charBreaker.setText(text);
+ wordBreaker.setText(text);
+ lineBreaker.setText(text);
+ redoLines = true;
+ if (textListener != null)
+ textListener.textValueChanged(
+ new TextEvent(this, TextEvent.TEXT_VALUE_CHANGED));
+ repaint(16);
+ }
+
+ public void insertText(String text) {
+ replaceRange(text, selection.getStart(), selection.getEnd());
+ }
+
+ public void replaceRange(String s, int start, int end) {
+ setText2(contents.substring(0,start) + s
+ + contents.substring(end));
+ select(tempSelection.set(selection).
+ fixAfterReplace(start, end, s.length()));
+ }
+
+ public String getText() {
+ return contents;
+ }
+
+ public void setFont(Font font) {
+ super.setFont(font);
+ redoLines = true;
+ repaint(16);
+ }
+
+ // ================== Graphics ======================
+
+ public void update(Graphics g) {
+ if (DEBUG) System.out.println("update");
+ paint(g);
+ }
+
+ public void paint(Graphics g) {
+ mySize = getSize();
+ if (cacheImage == null
+ || cacheImage.getHeight(this) != mySize.height
+ || cacheImage.getWidth(this) != mySize.width) {
+ cacheImage = createImage(mySize.width, mySize.height);
+ valid = false;
+ }
+ if (!valid || redoLines) {
+ if (DEBUG) System.out.println("painting");
+ paint2(cacheImage.getGraphics());
+ valid = true;
+ }
+ //getToolkit().sync();
+ if (DEBUG) System.out.println("copying");
+ g.drawImage(cacheImage,
+ 0, 0, mySize.width, mySize.height,
+ 0, 0, mySize.width, mySize.height,
+ this);
+ }
+
+ public void paint2(Graphics g) {
+ g.clearRect(0, 0, mySize.width, mySize.height);
+ if (DEBUG) System.out.println("print");
+ if (focus) g.setColor(Color.black);
+ else g.setColor(Color.gray);
+ g.drawRect(0,0,mySize.width-1,mySize.height-1);
+ g.setClip(1,1,
+ mySize.width-2,mySize.height-2);
+ g.setColor(Color.black);
+ g.setFont(getFont());
+ fm = g.getFontMetrics();
+ lineAscent = fm.getAscent();
+ lineLeading = fm.getLeading();
+ lineHeight = lineAscent + fm.getDescent() + lineLeading;
+ int y = yInset + lineAscent;
+ String lastSubstring = "";
+ if (redoLines) fixLineStarts(mySize.width-xInset-xInset);
+ for (int i = 0; i < lineCount; y += lineHeight, ++i) {
+ // LIU: Don't display terminating ^M characters
+ int lim = lineStarts[i+1];
+ if (lim > 0 && contents.length() > 0 &&
+ contents.charAt(lim-1) == CR) --lim;
+ lastSubstring = contents.substring(lineStarts[i],lim);
+ g.drawString(lastSubstring, xInset, y);
+ }
+ drawSelection(g, lastSubstring);
+ lastHeight = y + yInset - lineHeight + yInset;
+ lastWidth = mySize.width-xInset-xInset;
+ }
+
+ void paintRect(Graphics g, int x, int y, int w, int h) {
+ if (focus) {
+ g.fillRect(x, y, w, h);
+ } else {
+ g.drawRect(x, y, w-1, h-1);
+ }
+ }
+
+ public void drawSelection(Graphics g, String lastSubstring) {
+ g.setXORMode(Color.black);
+ if (selection.isCaret()) {
+ offset2Point(selection.caret, selection.clickAfter, caretPoint);
+ } else {
+ if (focus) g.setColor(Color.blue);
+ else g.setColor(Color.yellow);
+ offset2Point(selection.getStart(), true, startPoint);
+ offset2Point(selection.getEnd(), false, endPoint);
+ if (selection.getStart() == selection.caret)
+ caretPoint.setLocation(startPoint);
+ else caretPoint.setLocation(endPoint);
+ if (startPoint.y == endPoint.y) {
+ paintRect(g, startPoint.x, startPoint.y,
+ Math.max(1,endPoint.x-startPoint.x), lineHeight);
+ } else {
+ paintRect(g, startPoint.x, startPoint.y,
+ (mySize.width-xInset)-startPoint.x, lineHeight);
+ if (startPoint.y + lineHeight < endPoint.y)
+ paintRect(g, xInset, startPoint.y + lineHeight,
+ (mySize.width-xInset)-xInset, endPoint.y - startPoint.y - lineHeight);
+ paintRect(g, xInset, endPoint.y, endPoint.x-xInset, lineHeight);
+ }
+ }
+ if (focus || selection.isCaret()) {
+ if (focus) g.setColor(Color.green);
+ else g.setColor(Color.red);
+ int line = caretPoint.x - (selection.clickAfter ? 0 : 1);
+ g.fillRect(line, caretPoint.y, 1, lineHeight);
+ int w = lineHeight/12 + 1;
+ int braces = line - (selection.clickAfter ? -1 : w);
+ g.fillRect(braces, caretPoint.y, w, 1);
+ g.fillRect(braces, caretPoint.y + lineHeight - 1, w, 1);
+ }
+ }
+
+ public Point offset2Point(int off, boolean start, Point p) {
+ int line = findLine(off, start);
+ int width = 0;
+ try {
+ width = fm.stringWidth(
+ contents.substring(lineStarts[line], off));
+ } catch (Exception e) {
+ System.out.println(e);
+ }
+ p.x = width + xInset;
+ if (p.x > mySize.width - xInset)
+ p.x = mySize.width - xInset;
+ p.y = lineHeight * line + yInset;
+ return p;
+ }
+
+ private int findLine(int off, boolean start) {
+ // if it is start, then go to the next line!
+ if (start) ++off;
+ for (int i = 1; i < lineCount; ++i) {
+ // LIU: This was <= ; changed to < to make caret after
+ // final CR in line appear at START of next line.
+ if (off < lineStarts[i]) return i-1;
+ }
+ // LIU: Check for special case; after CR at end of the last line
+ if (off == lineStarts[lineCount] &&
+ off > 0 && contents.length() > 0 && contents.charAt(off-1) == CR) {
+ return lineCount;
+ }
+ return lineCount-1;
+ }
+
+ // offsets on any line will go from start,true to end,false
+ // excluding start,false and end,true
+ public Selection point2Offset(Point p, Selection o) {
+ if (p.y < yInset) {
+ o.caret = 0;
+ o.clickAfter = true;
+ return o;
+ }
+ int line = (p.y - yInset)/lineHeight;
+ if (line >= lineCount) {
+ o.caret = contents.length();
+ o.clickAfter = false;
+ return o;
+ }
+ int target = p.x - xInset;
+ if (target <= 0) {
+ o.caret = lineStarts[line];
+ o.clickAfter = true;
+ return o;
+ }
+ int lowGuess = lineStarts[line];
+ int lowWidth = 0;
+ int highGuess = lineStarts[line+1];
+ int highWidth = fm.stringWidth(contents.substring(lineStarts[line],highGuess));
+ if (target >= highWidth) {
+ o.caret = lineStarts[line+1];
+ o.clickAfter = false;
+ return o;
+ }
+ while (lowGuess < highGuess - 1) {
+ int guess = (lowGuess + highGuess)/2;
+ int width = fm.stringWidth(contents.substring(lineStarts[line],guess));
+ if (width <= target) {
+ lowGuess = guess;
+ lowWidth = width;
+ if (width == target) break;
+ } else {
+ highGuess = guess;
+ highWidth = width;
+ }
+ }
+ // at end, either lowWidth < target < width(low+1), or lowWidth = target
+ int highBound = charBreaker.following(lowGuess);
+ int lowBound = charBreaker.previous();
+ // we are now at character boundaries
+ if (lowBound != lowGuess)
+ lowWidth = fm.stringWidth(contents.substring(lineStarts[line],lowBound));
+ if (highBound != highGuess)
+ highWidth = fm.stringWidth(contents.substring(lineStarts[line],highBound));
+ // we now have the right widths
+ if (target - lowWidth < highWidth - target) {
+ o.caret = lowBound;
+ o.clickAfter = true;
+ } else {
+ o.caret = highBound;
+ o.clickAfter = false;
+ }
+ // we now have the closest!
+ return o;
+ }
+
+ private void fixLineStarts(int width) {
+ lineCount = 1;
+ lineStarts[0] = 0;
+ if (contents.length() == 0) {
+ lineStarts[1] = 0;
+ return;
+ }
+ int end = 0;
+ // LIU: Add check for MAX_LINES
+ for (int start = 0; start < contents.length() && lineCount < MAX_LINES;
+ start = end) {
+ end = nextLine(fm, start, width);
+ lineStarts[lineCount++] = end;
+ if (end == start) { // LIU: Assertion
+ throw new RuntimeException("nextLine broken");
+ }
+ }
+ --lineCount;
+ redoLines = false;
+ }
+
+ // LIU: Enhanced to wrap long lines. Bug with return of start fixed.
+ public int nextLine(FontMetrics fm, int start, int width) {
+ int len = contents.length();
+ for (int i = start; i < len; ++i) {
+ // check for line separator
+ char ch = (contents.charAt(i));
+ if (ch >= 0x000A && ch <= 0x000D || ch == 0x2028 || ch == 0x2029) {
+ len = i + 1;
+ if (ch == 0x000D && i+1 < len && contents.charAt(i+1) == 0x000A) // crlf
+ ++len; // grab extra char
+ break;
+ }
+ }
+ String subject = contents.substring(start,len);
+ if (visibleWidth(fm, subject) <= width)
+ return len;
+
+ // LIU: Remainder of this method rewritten to accomodate lines
+ // longer than the component width by first trying to break
+ // into lines; then words; finally chars.
+ int n = findFittingBreak(fm, subject, width, lineBreaker);
+ if (n == 0) {
+ n = findFittingBreak(fm, subject, width, wordBreaker);
+ }
+ if (n == 0) {
+ n = findFittingBreak(fm, subject, width, charBreaker);
+ }
+ return n > 0 ? start + n : len;
+ }
+
+ /**
+ * LIU: Finds the longest substring that fits a given width
+ * composed of subunits returned by a BreakIterator. If the smallest
+ * subunit is too long, returns 0.
+ * @param fm metrics to use
+ * @param line the string to be fix into width
+ * @param width line.substring(0, result) must be <= width
+ * @param breaker the BreakIterator that will be used to find subunits
+ * @return maximum characters, at boundaries returned by breaker,
+ * that fit into width, or zero on failure
+ */
+ private int findFittingBreak(FontMetrics fm, String line, int width,
+ BreakIterator breaker) {
+ breaker.setText(line);
+ int last = breaker.first();
+ int end = breaker.next();
+ while (end != BreakIterator.DONE &&
+ visibleWidth(fm, line.substring(0, end)) <= width) {
+ last = end;
+ end = breaker.next();
+ }
+ return last;
+ }
+
+ public int visibleWidth(FontMetrics fm, String s) {
+ int i;
+ for (i = s.length()-1; i >= 0; --i) {
+ char ch = s.charAt(i);
+ if (!(ch == ' ' || ch >= 0x000A && ch <= 0x000D || ch == 0x2028 || ch == 0x2029))
+ return fm.stringWidth(s.substring(0,i+1));;
+ }
+ return 0;
+ }
+
+// =============== Utility ====================
+
+ private void fixHex() {
+ if (selection.getEnd() == 0) return;
+ int store = 0;
+ int places = 1;
+ int count = 0;
+ int min = Math.min(8,selection.getEnd());
+ for (int i = 0; i < min; ++i) {
+ char ch = contents.charAt(selection.getEnd()-1-i);
+ int value = Character.getNumericValue(ch);
+ if (value < 0 || value > 15) break;
+ store += places * value;
+ ++count;
+ places *= 16;
+ }
+ String add = "";
+ int bottom = store & 0xFFFF;
+ if (store >= 0xD8000000 && store < 0xDC000000
+ && bottom >= 0xDC00 && bottom < 0xE000) { // surrogates
+ add = "" + (char)(store >> 16) + (char)bottom;
+ } else if (store > 0xFFFF && store <= 0x10FFFF) {
+ store -= 0x10000;
+ add = "" + (char)(((store >> 10) & 0x3FF) + 0xD800)
+ + (char)((store & 0x3FF) + 0xDC00);
+
+ } else if (count >= 4) {
+ count = 4;
+ add = ""+(char)(store & 0xFFFF);
+ } else {
+ count = 1;
+ char ch = contents.charAt(selection.getEnd()-1);
+ add = hex(ch);
+ if (ch >= 0xDC00 && ch <= 0xDFFF && selection.getEnd() > 1) {
+ ch = contents.charAt(selection.getEnd()-2);
+ if (ch >= 0xD800 && ch <= 0xDBFF) {
+ count = 2;
+ add = hex(ch) + add;
+ }
+ }
+ }
+ replaceRange(add, selection.getEnd()-count, selection.getEnd());
+ }
+
+ public static String hex(char ch) {
+ String result = Integer.toString(ch,16).toUpperCase();
+ result = "0000".substring(result.length(),4) + result;
+ return result;
+ }
+}
diff --git a/src/com/ibm/text/components/Selection.java b/src/com/ibm/text/components/Selection.java
new file mode 100755
index 0000000..985b36f
--- /dev/null
+++ b/src/com/ibm/text/components/Selection.java
@@ -0,0 +1,155 @@
+package com.ibm.text.components;
+import java.text.*;
+
+public final class Selection {
+
+ public int anchor;
+ public int caret;
+ public boolean clickAfter;
+
+ public int getStart() {
+ return anchor < caret ? anchor : caret;
+ }
+
+ public int getEnd() {
+ return anchor > caret ? anchor : caret;
+ }
+
+ public boolean isCaret() {
+ return anchor == caret;
+ }
+
+ public Selection set(Selection other) {
+ anchor = other.anchor;
+ caret = other.caret;
+ clickAfter = other.clickAfter;
+ return this;
+ }
+
+ public Selection set(int anchor, int caret, boolean clickAfter) {
+ this.anchor = anchor;
+ this.caret = caret;
+ this.clickAfter = clickAfter;
+ return this;
+ }
+
+ public boolean equals(Object other) {
+ Selection other2 = (Selection)other;
+ return anchor == other2.anchor
+ && caret == other2.caret
+ && clickAfter == other2.clickAfter;
+ }
+
+ public boolean isLessThan(Selection other) {
+ return getStart() < other.getEnd();
+ }
+
+ public Selection pin(String text) {
+ if (anchor > text.length()) {
+ anchor = text.length();
+ } else if (anchor < 0) {
+ anchor = 0;
+ }
+ if (caret > text.length()) {
+ caret = text.length();
+ clickAfter = true;
+ } else if (caret < 0) {
+ caret = 0;
+ clickAfter = false;
+ }
+ return this;
+ }
+
+ public Selection swap(Selection after) {
+ int temp = anchor;
+ anchor = after.anchor;
+ after.anchor = temp;
+ temp = caret;
+ caret = after.caret;
+ after.caret = temp;
+ boolean b = clickAfter;
+ clickAfter = after.clickAfter;
+ after.clickAfter = b;
+ return this;
+ }
+
+ public Selection fixAfterReplace(int start, int end, int len) {
+ if (anchor >= start) {
+ if (anchor < end) anchor = end;
+ anchor = start + len + anchor - end;
+ }
+ if (caret >= start) {
+ if (caret < end) caret = end;
+ caret = start + len + caret - end;
+ }
+ return this;
+ }
+
+ // Mac & Windows considerably different
+ // Mac: end++. If start!=end, start=end
+ // SHIFT: move end right
+ // CTL: no different
+ // Windows:
+ // UNSHIFTED: if start!=end, start = end, else start=end=end+1;
+ // anchor = tip = start
+ // SHIFT: tip++
+ // CTL: if start!=end, start = end = nextbound(end-1),
+ // else start=end=nextbound(end)
+ // anchor = tip = start
+ // CTL/SHIFT: tip = nextbound(tip)
+
+ public Selection nextBound(BreakIterator breaker,
+ int direction, boolean extend) {
+ if (!extend && anchor != caret) caret -= direction;
+ caret = next(caret, breaker, direction, true);
+ if (!extend) anchor = caret;
+ clickAfter = false;
+ return this;
+ }
+
+ // expand start and end to word breaks--if they are not already on one
+ public void expand(BreakIterator breaker) {
+ if (anchor <= caret) {
+ anchor = next(anchor,breaker,-1,false);
+ caret = next(caret,breaker,1,false);
+ /*
+ try {
+ breaker.following(anchor);
+ anchor = breaker.previous();
+ } catch (Exception e) {}
+ try {
+ caret = breaker.following(caret-1);
+ } catch (Exception e) {}
+ */
+ } else {
+ anchor = next(anchor,breaker,1,false);
+ caret = next(caret,breaker,-1,false);
+ /*
+ try {
+ breaker.following(caret);
+ caret = breaker.previous();
+ } catch (Exception e) {}
+ try {
+ anchor = breaker.following(anchor-1);
+ } catch (Exception e) {}
+ */
+ }
+ }
+
+ // different = false - move to next boundary, unless on one
+ // true - move to next boundary, even if on one
+ public static int next(int position, BreakIterator breaker,
+ int direction, boolean different) {
+ if (!different) position -= direction;
+ try {
+ if (direction > 0) {
+ position = breaker.following(position);
+ } else {
+ breaker.following(position-1);
+ position = breaker.previous();
+ }
+ } catch (Exception e) {}
+ return position;
+ }
+}
+
diff --git a/src/com/ibm/text/components/TransliteratingTextComponent.java b/src/com/ibm/text/components/TransliteratingTextComponent.java
new file mode 100755
index 0000000..02bcd59
--- /dev/null
+++ b/src/com/ibm/text/components/TransliteratingTextComponent.java
@@ -0,0 +1,191 @@
+package com.ibm.text.components;
+
+import java.awt.*;
+import java.awt.event.*;
+import java.text.*;
+import java.awt.datatransfer.*;
+import com.ibm.text.*;
+
+/**
+ * A subclass of {@link DumbTextComponent} that passes key events through
+ * a {@link com.ibm.text.Transliterator}.
+ *
+ * <p>Copyright © IBM Corporation 1999. All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: TransliteratingTextComponent.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class TransliteratingTextComponent extends DumbTextComponent {
+
+ private static boolean DEBUG = false;
+
+ private Transliterator translit = null;
+
+ // Index into getText() where the start of transliteration is.
+ // As we commit text during keyboardTransliteration, we advance
+ // this.
+ private int start = 0;
+
+ // Index into getText() where the cursor is; cursor >= start
+ private int cursor = 0;
+
+ private static final String COPYRIGHT =
+ "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+ /**
+ * Constructor.
+ */
+ public TransliteratingTextComponent() {
+ super();
+ addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ // We get an ActionEvent only when the selection changes
+ resetTransliterationStart();
+ }
+ });
+ }
+
+ /**
+ * {@link DumbTextComponent} API. Framework method that is called
+ * when a <code>KeyEvent</code> is received. This implementation
+ * runs the new character through the current
+ * <code>Transliterator</code>, if one is set, and inserts the
+ * transliterated text into the buffer.
+ */
+ protected void handleKeyTyped(KeyEvent e) {
+ char ch = e.getKeyChar();
+
+ if (translit == null) {
+ super.handleKeyTyped(e);
+ return;
+ }
+
+ // ------------------------------------------------------------
+ // The following case motivates the two lines that recompute
+ // start and cursor below.
+
+ // " "
+ // a b c q r|s t u m m
+ // 0 1 2 3 4 5 6 7 8 9
+ // 0 1 2
+
+ // start 3, cursor 5, sel 6 -> { 0, 3, 2 }
+ // : new int[] { 0, sel - start, cursor - start };
+
+ // sz>99|9
+
+ // " { "
+ // a b c q r 9 9|9 t u m m
+ // 0 1 2 3 4 5 6 7 8 9 a b
+ // 0 1 2 3 4
+
+ // { 3, 5, 4 } -> start 6, cursor 7, sel 8
+ // : start += index[0];
+ // : cursor = start + index[2] - index[0];
+ // ------------------------------------------------------------
+
+ // Need to save start because calls to replaceRange will update
+ // start and cursor.
+ int saveStart = start;
+
+ ReplaceableString buf = new ReplaceableString();
+ buf.getStringBuffer().append(getText().substring(start,
+ getSelectionStart()));
+
+ int[] index = new int[] { 0, getSelectionStart() - start,
+ cursor - start};
+
+ StringBuffer log = null;
+ if (DEBUG) {
+ log = new StringBuffer();
+ log.append("start " + start + ", cursor " + cursor);
+ log.append(", sel " + getSelectionStart());
+ log.append(", {" + index[0] + ", " + index[1] + ", " + index[2] + "}, ");
+ log.append('"' + buf.toString() + "\" + '" + ch + "' -> \"");
+ }
+
+ translit.keyboardTransliterate(buf, index, ch);
+ replaceRange(buf.toString(), start, getSelectionEnd());
+ // At this point start has been changed by the callback to
+ // resetTransliteratorStart() via replaceRange() -- so use our
+ // local copy, saveStart.
+
+ // The START index is zero-based. On entry to keyboardTransliterate(),
+ // it was zero. We can therefore just add it to our original
+ // getText()-based index value of start (in saveStart) to get
+ // the new getText()-based start.
+ start = saveStart + index[Transliterator.START];
+
+ // Make the cursor getText()-based. The CURSOR index is zero-based.
+ cursor = start + index[Transliterator.CURSOR]
+ - index[Transliterator.START];
+
+ if (DEBUG) {
+ String out = buf.toString();
+ log.append(out.substring(0, index[Transliterator.START])).
+ append('{').
+ append(out.substring(index[Transliterator.START],
+ index[Transliterator.CURSOR])).
+ append('|').
+ append(out.substring(index[Transliterator.CURSOR])).
+ append('"');
+ log.append(", {" + index[0] + ", " + index[1] + ", " + index[2] + "}, ");
+ log.append("start " + start + ", cursor " + cursor);
+ log.append(", sel " + getSelectionStart());
+ System.out.println(escape(log.toString()));
+ }
+ }
+
+ /**
+ * Set the {@link com.ibm.text.Transliterator} and direction to
+ * use to process incoming <code>KeyEvent</code>s.
+ * @param t the {@link com.ibm.text.Transliterator} to use
+ */
+ public void setTransliterator(Transliterator t) {
+ if (translit != t) { // [sic] pointer compare ok; singletons
+ resetTransliterationStart();
+ }
+ translit = t;
+ }
+
+ /**
+ * Reset the start point at which transliteration begins. This
+ * needs to be done when the user moves the cursor or when the
+ * current {@link com.ibm.text.Transliterator} is changed.
+ */
+ private void resetTransliterationStart() {
+ start = getSelectionStart();
+ cursor = start;
+ }
+
+ /**
+ * Escape non-ASCII characters as Unicode.
+ * JUST FOR DEBUGGING OUTPUT.
+ */
+ public static final String escape(String s) {
+ StringBuffer buf = new StringBuffer();
+ for (int i=0; i<s.length(); ++i) {
+ char c = s.charAt(i);
+ if (c >= ' ' && c <= 0x007F) {
+ if (c == '\\') {
+ buf.append("\\\\"); // That is, "\\"
+ } else {
+ buf.append(c);
+ }
+ } else {
+ buf.append("\\u");
+ if (c < 0x1000) {
+ buf.append('0');
+ if (c < 0x100) {
+ buf.append('0');
+ if (c < 0x10) {
+ buf.append('0');
+ }
+ }
+ }
+ buf.append(Integer.toHexString(c));
+ }
+ }
+ return buf.toString();
+ }
+}
diff --git a/src/com/ibm/text/resources/TransliterationRule$Fullwidth$Halfwidth.java b/src/com/ibm/text/resources/TransliterationRule$Fullwidth$Halfwidth.java
new file mode 100755
index 0000000..7c1481f
--- /dev/null
+++ b/src/com/ibm/text/resources/TransliterationRule$Fullwidth$Halfwidth.java
@@ -0,0 +1,276 @@
+package com.ibm.text.resources;
+
+import java.util.ListResourceBundle;
+
+public class TransliterationRule$Fullwidth$Halfwidth extends ListResourceBundle {
+ /**
+ * Overrides ListResourceBundle
+ */
+ public Object[][] getContents() {
+ return new Object[][] {
+ { "Rule", ""
+
+ /* Mechanically generated from Unicode Character Database
+ */
+
+ // multicharacter
+
+ + "\u30AC<>\uFF76\uFF9E;" // to KATAKANA LETTER GA
+ + "\u30AE<>\uFF77\uFF9E;" // to KATAKANA LETTER GI
+ + "\u30B0<>\uFF78\uFF9E;" // to KATAKANA LETTER GU
+ + "\u30B2<>\uFF79\uFF9E;" // to KATAKANA LETTER GE
+ + "\u30B4<>\uFF7A\uFF9E;" // to KATAKANA LETTER GO
+ + "\u30B6<>\uFF7B\uFF9E;" // to KATAKANA LETTER ZA
+ + "\u30B8<>\uFF7C\uFF9E;" // to KATAKANA LETTER ZI
+ + "\u30BA<>\uFF7D\uFF9E;" // to KATAKANA LETTER ZU
+ + "\u30BC<>\uFF7E\uFF9E;" // to KATAKANA LETTER ZE
+ + "\u30BE<>\uFF7F\uFF9E;" // to KATAKANA LETTER ZO
+ + "\u30C0<>\uFF80\uFF9E;" // to KATAKANA LETTER DA
+ + "\u30C2<>\uFF81\uFF9E;" // to KATAKANA LETTER DI
+ + "\u30C5<>\uFF82\uFF9E;" // to KATAKANA LETTER DU
+ + "\u30C7<>\uFF83\uFF9E;" // to KATAKANA LETTER DE
+ + "\u30C9<>\uFF84\uFF9E;" // to KATAKANA LETTER DO
+ + "\u30D0<>\uFF8A\uFF9E;" // to KATAKANA LETTER BA
+ + "\u30D1<>\uFF8A\uFF9F;" // to KATAKANA LETTER PA
+ + "\u30D3<>\uFF8B\uFF9E;" // to KATAKANA LETTER BI
+ + "\u30D4<>\uFF8B\uFF9F;" // to KATAKANA LETTER PI
+ + "\u30D6<>\uFF8C\uFF9E;" // to KATAKANA LETTER BU
+ + "\u30D7<>\uFF8C\uFF9F;" // to KATAKANA LETTER PU
+ + "\u30D9<>\uFF8D\uFF9E;" // to KATAKANA LETTER BE
+ + "\u30DA<>\uFF8D\uFF9F;" // to KATAKANA LETTER PE
+ + "\u30DC<>\uFF8E\uFF9E;" // to KATAKANA LETTER BO
+ + "\u30DD<>\uFF8E\uFF9F;" // to KATAKANA LETTER PO
+ + "\u30F4<>\uFF73\uFF9E;" // to KATAKANA LETTER VU
+ + "\u30F7<>\uFF9C\uFF9E;" // to KATAKANA LETTER VA
+ + "\u30FA<>\uFF66\uFF9E;" // to KATAKANA LETTER VO
+
+ // single character
+
+ + "\uFF01<>'!';" // from FULLWIDTH EXCLAMATION MARK
+ + "\uFF02<>'\"';" // from FULLWIDTH QUOTATION MARK
+ + "\uFF03<>'#';" // from FULLWIDTH NUMBER SIGN
+ + "\uFF04<>'$';" // from FULLWIDTH DOLLAR SIGN
+ + "\uFF05<>'%';" // from FULLWIDTH PERCENT SIGN
+ + "\uFF06<>'&';" // from FULLWIDTH AMPERSAND
+ + "\uFF07<>'';" // from FULLWIDTH APOSTROPHE
+ + "\uFF08<>'(';" // from FULLWIDTH LEFT PARENTHESIS
+ + "\uFF09<>')';" // from FULLWIDTH RIGHT PARENTHESIS
+ + "\uFF0A<>'*';" // from FULLWIDTH ASTERISK
+ + "\uFF0B<>'+';" // from FULLWIDTH PLUS SIGN
+ + "\uFF0C<>',';" // from FULLWIDTH COMMA
+ + "\uFF0D<>'-';" // from FULLWIDTH HYPHEN-MINUS
+ + "\uFF0E<>'.';" // from FULLWIDTH FULL STOP
+ + "\uFF0F<>'/';" // from FULLWIDTH SOLIDUS
+ + "\uFF10<>'0';" // from FULLWIDTH DIGIT ZERO
+ + "\uFF11<>'1';" // from FULLWIDTH DIGIT ONE
+ + "\uFF12<>'2';" // from FULLWIDTH DIGIT TWO
+ + "\uFF13<>'3';" // from FULLWIDTH DIGIT THREE
+ + "\uFF14<>'4';" // from FULLWIDTH DIGIT FOUR
+ + "\uFF15<>'5';" // from FULLWIDTH DIGIT FIVE
+ + "\uFF16<>'6';" // from FULLWIDTH DIGIT SIX
+ + "\uFF17<>'7';" // from FULLWIDTH DIGIT SEVEN
+ + "\uFF18<>'8';" // from FULLWIDTH DIGIT EIGHT
+ + "\uFF19<>'9';" // from FULLWIDTH DIGIT NINE
+ + "\uFF1A<>':';" // from FULLWIDTH COLON
+ + "\uFF1B<>';';" // from FULLWIDTH SEMICOLON
+ + "\uFF1C<>'<';" // from FULLWIDTH LESS-THAN SIGN
+ + "\uFF1D<>'=';" // from FULLWIDTH EQUALS SIGN
+ + "\uFF1E<>'>';" // from FULLWIDTH GREATER-THAN SIGN
+ + "\uFF1F<>'?';" // from FULLWIDTH QUESTION MARK
+ + "\uFF20<>'@';" // from FULLWIDTH COMMERCIAL AT
+ + "\uFF21<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
+ + "\uFF22<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
+ + "\uFF23<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
+ + "\uFF24<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
+ + "\uFF25<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
+ + "\uFF26<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
+ + "\uFF27<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
+ + "\uFF28<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
+ + "\uFF29<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
+ + "\uFF2A<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
+ + "\uFF2B<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
+ + "\uFF2C<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
+ + "\uFF2D<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
+ + "\uFF2E<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
+ + "\uFF2F<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
+ + "\uFF30<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
+ + "\uFF31<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
+ + "\uFF32<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
+ + "\uFF33<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
+ + "\uFF34<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
+ + "\uFF35<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
+ + "\uFF36<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
+ + "\uFF37<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
+ + "\uFF38<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
+ + "\uFF39<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
+ + "\uFF3A<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
+ + "\uFF3B<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
+ + "\uFF3C<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
+ + "\uFF3D<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
+ + "\uFF3E<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
+ + "\uFF3F<>'_';" // from FULLWIDTH LOW LINE
+ + "\uFF40<>'`';" // from FULLWIDTH GRAVE ACCENT
+ + "\uFF41<>a;" // from FULLWIDTH LATIN SMALL LETTER A
+ + "\uFF42<>b;" // from FULLWIDTH LATIN SMALL LETTER B
+ + "\uFF43<>c;" // from FULLWIDTH LATIN SMALL LETTER C
+ + "\uFF44<>d;" // from FULLWIDTH LATIN SMALL LETTER D
+ + "\uFF45<>e;" // from FULLWIDTH LATIN SMALL LETTER E
+ + "\uFF46<>f;" // from FULLWIDTH LATIN SMALL LETTER F
+ + "\uFF47<>g;" // from FULLWIDTH LATIN SMALL LETTER G
+ + "\uFF48<>h;" // from FULLWIDTH LATIN SMALL LETTER H
+ + "\uFF49<>i;" // from FULLWIDTH LATIN SMALL LETTER I
+ + "\uFF4A<>j;" // from FULLWIDTH LATIN SMALL LETTER J
+ + "\uFF4B<>k;" // from FULLWIDTH LATIN SMALL LETTER K
+ + "\uFF4C<>l;" // from FULLWIDTH LATIN SMALL LETTER L
+ + "\uFF4D<>m;" // from FULLWIDTH LATIN SMALL LETTER M
+ + "\uFF4E<>n;" // from FULLWIDTH LATIN SMALL LETTER N
+ + "\uFF4F<>o;" // from FULLWIDTH LATIN SMALL LETTER O
+ + "\uFF50<>p;" // from FULLWIDTH LATIN SMALL LETTER P
+ + "\uFF51<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
+ + "\uFF52<>r;" // from FULLWIDTH LATIN SMALL LETTER R
+ + "\uFF53<>s;" // from FULLWIDTH LATIN SMALL LETTER S
+ + "\uFF54<>t;" // from FULLWIDTH LATIN SMALL LETTER T
+ + "\uFF55<>u;" // from FULLWIDTH LATIN SMALL LETTER U
+ + "\uFF56<>v;" // from FULLWIDTH LATIN SMALL LETTER V
+ + "\uFF57<>w;" // from FULLWIDTH LATIN SMALL LETTER W
+ + "\uFF58<>x;" // from FULLWIDTH LATIN SMALL LETTER X
+ + "\uFF59<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
+ + "\uFF5A<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
+ + "\uFF5B<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
+ + "\uFF5C<>'|';" // from FULLWIDTH VERTICAL LINE
+ + "\uFF5D<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
+ + "\uFF5E<>'~';" // from FULLWIDTH TILDE
+ + "\u3002<>\uFF61;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
+ + "\u300C<>\uFF62;" // to HALFWIDTH LEFT CORNER BRACKET
+ + "\u300D<>\uFF63;" // to HALFWIDTH RIGHT CORNER BRACKET
+ + "\u3001<>\uFF64;" // to HALFWIDTH IDEOGRAPHIC COMMA
+ + "\u30FB<>\uFF65;" // to HALFWIDTH KATAKANA MIDDLE DOT
+ + "\u30F2<>\uFF66;" // to HALFWIDTH KATAKANA LETTER WO
+ + "\u30A1<>\uFF67;" // to HALFWIDTH KATAKANA LETTER SMALL A
+ + "\u30A3<>\uFF68;" // to HALFWIDTH KATAKANA LETTER SMALL I
+ + "\u30A5<>\uFF69;" // to HALFWIDTH KATAKANA LETTER SMALL U
+ + "\u30A7<>\uFF6A;" // to HALFWIDTH KATAKANA LETTER SMALL E
+ + "\u30A9<>\uFF6B;" // to HALFWIDTH KATAKANA LETTER SMALL O
+ + "\u30E3<>\uFF6C;" // to HALFWIDTH KATAKANA LETTER SMALL YA
+ + "\u30E5<>\uFF6D;" // to HALFWIDTH KATAKANA LETTER SMALL YU
+ + "\u30E7<>\uFF6E;" // to HALFWIDTH KATAKANA LETTER SMALL YO
+ + "\u30C3<>\uFF6F;" // to HALFWIDTH KATAKANA LETTER SMALL TU
+ + "\u30FC<>\uFF70;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ + "\u30A2<>\uFF71;" // to HALFWIDTH KATAKANA LETTER A
+ + "\u30A4<>\uFF72;" // to HALFWIDTH KATAKANA LETTER I
+ + "\u30A6<>\uFF73;" // to HALFWIDTH KATAKANA LETTER U
+ + "\u30A8<>\uFF74;" // to HALFWIDTH KATAKANA LETTER E
+ + "\u30AA<>\uFF75;" // to HALFWIDTH KATAKANA LETTER O
+ + "\u30AB<>\uFF76;" // to HALFWIDTH KATAKANA LETTER KA
+ + "\u30AD<>\uFF77;" // to HALFWIDTH KATAKANA LETTER KI
+ + "\u30AF<>\uFF78;" // to HALFWIDTH KATAKANA LETTER KU
+ + "\u30B1<>\uFF79;" // to HALFWIDTH KATAKANA LETTER KE
+ + "\u30B3<>\uFF7A;" // to HALFWIDTH KATAKANA LETTER KO
+ + "\u30B5<>\uFF7B;" // to HALFWIDTH KATAKANA LETTER SA
+ + "\u30B7<>\uFF7C;" // to HALFWIDTH KATAKANA LETTER SI
+ + "\u30B9<>\uFF7D;" // to HALFWIDTH KATAKANA LETTER SU
+ + "\u30BB<>\uFF7E;" // to HALFWIDTH KATAKANA LETTER SE
+ + "\u30BD<>\uFF7F;" // to HALFWIDTH KATAKANA LETTER SO
+ + "\u30BF<>\uFF80;" // to HALFWIDTH KATAKANA LETTER TA
+ + "\u30C1<>\uFF81;" // to HALFWIDTH KATAKANA LETTER TI
+ + "\u30C4<>\uFF82;" // to HALFWIDTH KATAKANA LETTER TU
+ + "\u30C6<>\uFF83;" // to HALFWIDTH KATAKANA LETTER TE
+ + "\u30C8<>\uFF84;" // to HALFWIDTH KATAKANA LETTER TO
+ + "\u30CA<>\uFF85;" // to HALFWIDTH KATAKANA LETTER NA
+ + "\u30CB<>\uFF86;" // to HALFWIDTH KATAKANA LETTER NI
+ + "\u30CC<>\uFF87;" // to HALFWIDTH KATAKANA LETTER NU
+ + "\u30CD<>\uFF88;" // to HALFWIDTH KATAKANA LETTER NE
+ + "\u30CE<>\uFF89;" // to HALFWIDTH KATAKANA LETTER NO
+ + "\u30CF<>\uFF8A;" // to HALFWIDTH KATAKANA LETTER HA
+ + "\u30D2<>\uFF8B;" // to HALFWIDTH KATAKANA LETTER HI
+ + "\u30D5<>\uFF8C;" // to HALFWIDTH KATAKANA LETTER HU
+ + "\u30D8<>\uFF8D;" // to HALFWIDTH KATAKANA LETTER HE
+ + "\u30DB<>\uFF8E;" // to HALFWIDTH KATAKANA LETTER HO
+ + "\u30DE<>\uFF8F;" // to HALFWIDTH KATAKANA LETTER MA
+ + "\u30DF<>\uFF90;" // to HALFWIDTH KATAKANA LETTER MI
+ + "\u30E0<>\uFF91;" // to HALFWIDTH KATAKANA LETTER MU
+ + "\u30E1<>\uFF92;" // to HALFWIDTH KATAKANA LETTER ME
+ + "\u30E2<>\uFF93;" // to HALFWIDTH KATAKANA LETTER MO
+ + "\u30E4<>\uFF94;" // to HALFWIDTH KATAKANA LETTER YA
+ + "\u30E6<>\uFF95;" // to HALFWIDTH KATAKANA LETTER YU
+ + "\u30E8<>\uFF96;" // to HALFWIDTH KATAKANA LETTER YO
+ + "\u30E9<>\uFF97;" // to HALFWIDTH KATAKANA LETTER RA
+ + "\u30EA<>\uFF98;" // to HALFWIDTH KATAKANA LETTER RI
+ + "\u30EB<>\uFF99;" // to HALFWIDTH KATAKANA LETTER RU
+ + "\u30EC<>\uFF9A;" // to HALFWIDTH KATAKANA LETTER RE
+ + "\u30ED<>\uFF9B;" // to HALFWIDTH KATAKANA LETTER RO
+ + "\u30EF<>\uFF9C;" // to HALFWIDTH KATAKANA LETTER WA
+ + "\u30F3<>\uFF9D;" // to HALFWIDTH KATAKANA LETTER N
+ + "\u3099<>\uFF9E;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
+ + "\u309A<>\uFF9F;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+ + "\u1160<>\uFFA0;" // to HALFWIDTH HANGUL FILLER
+ + "\u1100<>\uFFA1;" // to HALFWIDTH HANGUL LETTER KIYEOK
+ + "\u1101<>\uFFA2;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
+ + "\u11AA<>\uFFA3;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
+ + "\u1102<>\uFFA4;" // to HALFWIDTH HANGUL LETTER NIEUN
+ + "\u11AC<>\uFFA5;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
+ + "\u11AD<>\uFFA6;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
+ + "\u1103<>\uFFA7;" // to HALFWIDTH HANGUL LETTER TIKEUT
+ + "\u1104<>\uFFA8;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
+ + "\u1105<>\uFFA9;" // to HALFWIDTH HANGUL LETTER RIEUL
+ + "\u11B0<>\uFFAA;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
+ + "\u11B1<>\uFFAB;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
+ + "\u11B2<>\uFFAC;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
+ + "\u11B3<>\uFFAD;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
+ + "\u11B4<>\uFFAE;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
+ + "\u11B5<>\uFFAF;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
+ + "\u111A<>\uFFB0;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
+ + "\u1106<>\uFFB1;" // to HALFWIDTH HANGUL LETTER MIEUM
+ + "\u1107<>\uFFB2;" // to HALFWIDTH HANGUL LETTER PIEUP
+ + "\u1108<>\uFFB3;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
+ + "\u1121<>\uFFB4;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
+ + "\u1109<>\uFFB5;" // to HALFWIDTH HANGUL LETTER SIOS
+ + "\u110A<>\uFFB6;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
+ + "\u110B<>\uFFB7;" // to HALFWIDTH HANGUL LETTER IEUNG
+ + "\u110C<>\uFFB8;" // to HALFWIDTH HANGUL LETTER CIEUC
+ + "\u110D<>\uFFB9;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
+ + "\u110E<>\uFFBA;" // to HALFWIDTH HANGUL LETTER CHIEUCH
+ + "\u110F<>\uFFBB;" // to HALFWIDTH HANGUL LETTER KHIEUKH
+ + "\u1110<>\uFFBC;" // to HALFWIDTH HANGUL LETTER THIEUTH
+ + "\u1111<>\uFFBD;" // to HALFWIDTH HANGUL LETTER PHIEUPH
+ + "\u1112<>\uFFBE;" // to HALFWIDTH HANGUL LETTER HIEUH
+ + "\u1161<>\uFFC2;" // to HALFWIDTH HANGUL LETTER A
+ + "\u1162<>\uFFC3;" // to HALFWIDTH HANGUL LETTER AE
+ + "\u1163<>\uFFC4;" // to HALFWIDTH HANGUL LETTER YA
+ + "\u1164<>\uFFC5;" // to HALFWIDTH HANGUL LETTER YAE
+ + "\u1165<>\uFFC6;" // to HALFWIDTH HANGUL LETTER EO
+ + "\u1166<>\uFFC7;" // to HALFWIDTH HANGUL LETTER E
+ + "\u1167<>\uFFCA;" // to HALFWIDTH HANGUL LETTER YEO
+ + "\u1168<>\uFFCB;" // to HALFWIDTH HANGUL LETTER YE
+ + "\u1169<>\uFFCC;" // to HALFWIDTH HANGUL LETTER O
+ + "\u116A<>\uFFCD;" // to HALFWIDTH HANGUL LETTER WA
+ + "\u116B<>\uFFCE;" // to HALFWIDTH HANGUL LETTER WAE
+ + "\u116C<>\uFFCF;" // to HALFWIDTH HANGUL LETTER OE
+ + "\u116D<>\uFFD2;" // to HALFWIDTH HANGUL LETTER YO
+ + "\u116E<>\uFFD3;" // to HALFWIDTH HANGUL LETTER U
+ + "\u116F<>\uFFD4;" // to HALFWIDTH HANGUL LETTER WEO
+ + "\u1170<>\uFFD5;" // to HALFWIDTH HANGUL LETTER WE
+ + "\u1171<>\uFFD6;" // to HALFWIDTH HANGUL LETTER WI
+ + "\u1172<>\uFFD7;" // to HALFWIDTH HANGUL LETTER YU
+ + "\u1173<>\uFFDA;" // to HALFWIDTH HANGUL LETTER EU
+ + "\u1174<>\uFFDB;" // to HALFWIDTH HANGUL LETTER YI
+ + "\u1175<>\uFFDC;" // to HALFWIDTH HANGUL LETTER I
+ + "\uFFE0<>'\u00a2';" // from FULLWIDTH CENT SIGN
+ + "\uFFE1<>'\u00a3';" // from FULLWIDTH POUND SIGN
+ + "\uFFE2<>'\u00ac';" // from FULLWIDTH NOT SIGN
+ + "\uFFE3<>' '\u0304;" // from FULLWIDTH MACRON
+ + "\uFFE4<>'\u00a6';" // from FULLWIDTH BROKEN BAR
+ + "\uFFE5<>'\u00a5';" // from FULLWIDTH YEN SIGN
+ + "\uFFE6<>\u20A9;" // from FULLWIDTH WON SIGN
+ + "\u2502<>\uFFE8;" // to HALFWIDTH FORMS LIGHT VERTICAL
+ + "\u2190<>\uFFE9;" // to HALFWIDTH LEFTWARDS ARROW
+ + "\u2191<>\uFFEA;" // to HALFWIDTH UPWARDS ARROW
+ + "\u2192<>\uFFEB;" // to HALFWIDTH RIGHTWARDS ARROW
+ + "\u2193<>\uFFEC;" // to HALFWIDTH DOWNWARDS ARROW
+ + "\u25A0<>\uFFED;" // to HALFWIDTH BLACK SQUARE
+ + "\u25CB<>\uFFEE;" // to HALFWIDTH WHITE CIRCLE
+
+ }
+ };
+ }
+}
diff --git a/src/com/ibm/text/resources/TransliterationRule$Han$Pinyin.java b/src/com/ibm/text/resources/TransliterationRule$Han$Pinyin.java
new file mode 100755
index 0000000..e09f559
--- /dev/null
+++ b/src/com/ibm/text/resources/TransliterationRule$Han$Pinyin.java
@@ -0,0 +1,20351 @@
+package com.ibm.text.resources;
+import java.util.ListResourceBundle;
+public class TransliterationRule$Han$Pinyin extends ListResourceBundle {
+ public Object[][] getContents() {
+ return new Object[][] {
+ {"Rule", new String[] {
+ "\u4E01>'[ding]'",
+ "\u4E02>'['ka\u014F']'",
+ "\u4E03>'[qi]'",
+ "\u4E04>'['sh\u00e0ng']'",
+ "\u4E05>'['xi\u00e0']'",
+ "\u4E07>'['m\u00f2']'",
+ "\u4E08>'['zh\u00e0ng']'",
+ "\u4E09>'[san]'",
+ "\u4E0A>'['sh\u00e0ng']'",
+ "\u4E0B>'['xi\u00e0']'",
+ "\u4E0C>'[ji]'",
+ "\u4E0D>'['b\u00f9']'",
+ "\u4E0E>'['y\u016D']'",
+ "\u4E0F>'['mi\u0103n']'",
+ "\u4E10>'['ga\u00ec']'",
+ "\u4E11>'['cho\u016D']'",
+ "\u4E12>'['cho\u016D']'",
+ "\u4E13>'[zhuan]'",
+ "\u4E14>'['qi\u0115']'",
+ "\u4E15>'[pi]'",
+ "\u4E16>'['sh\u00ec']'",
+ "\u4E17>'['sh\u00ec']'",
+ "\u4E18>'[qiu]'",
+ "\u4E19>'['b\u012Dng']'",
+ "\u4E1A>'['y\u00e8']'",
+ "\u4E1B>'['c\u00f3ng']'",
+ "\u4E1C>'[dong]'",
+ "\u4E1D>'[si]'",
+ "\u4E1E>'['ch\u00e9ng']'",
+ "\u4E1F>'[diu]'",
+ "\u4E20>'[qiu]'",
+ "\u4E21>'['li\u0103ng']'",
+ "\u4E22>'[diu]'",
+ "\u4E23>'['yo\u016D']'",
+ "\u4E24>'['li\u0103ng']'",
+ "\u4E25>'['y\u00e1n']'",
+ "\u4E26>'['b\u00ecng']'",
+ "\u4E27>'[sang]'",
+ "\u4E28>'['g\u016Dn']'",
+ "\u4E29>'[jiu]'",
+ "\u4E2A>'['g\u00e8']'",
+ "\u4E2B>'[ya]'",
+ "\u4E2C>'['qi\u00e1ng']'",
+ "\u4E2D>'[zhong]'",
+ "\u4E2E>'['j\u012D']'",
+ "\u4E2F>'['ji\u00e8']'",
+ "\u4E30>'[feng]'",
+ "\u4E31>'['gu\u00e0n']'",
+ "\u4E32>'['chu\u00e0n']'",
+ "\u4E33>'['ch\u0103n']'",
+ "\u4E34>'['l\u00edn']'",
+ "\u4E35>'['zhu\u014F']'",
+ "\u4E36>'['zh\u016D']'",
+ "\u4E38>'['w\u00e1n']'",
+ "\u4E39>'[dan]'",
+ "\u4E3A>'['we\u00ec']'",
+ "\u4E3B>'['zh\u016D']'",
+ "\u4E3C>'['j\u012Dng']'",
+ "\u4E3D>'['l\u00ec']'",
+ "\u4E3E>'['j\u016D']'",
+ "\u4E3F>'['pi\u0115']'",
+ "\u4E40>'['f\u00fa']'",
+ "\u4E41>'['y\u00ed']'",
+ "\u4E42>'['y\u00ec']'",
+ "\u4E43>'['na\u012D']'",
+ "\u4E45>'['ji\u016D']'",
+ "\u4E46>'['ji\u016D']'",
+ "\u4E47>'['zh\u00e9']'",
+ "\u4E48>'[yao]'",
+ "\u4E49>'['y\u00ec']'",
+ "\u4E4B>'[zhi]'",
+ "\u4E4C>'[wu]'",
+ "\u4E4D>'['zh\u00e0']'",
+ "\u4E4E>'[hu]'",
+ "\u4E4F>'['f\u00e1']'",
+ "\u4E50>'['l\u00e8']'",
+ "\u4E51>'['zh\u00f2ng']'",
+ "\u4E52>'[ping]'",
+ "\u4E53>'['p\u0101ng']'",
+ "\u4E54>'['qia\u00f3']'",
+ "\u4E55>'['h\u016D']'",
+ "\u4E56>'[guai]'",
+ "\u4E57>'['ch\u00e9ng']'",
+ "\u4E58>'['ch\u00e9ng']'",
+ "\u4E59>'['y\u012D']'",
+ "\u4E5A>'['y\u012Dn']'",
+ "\u4E5C>'[mie]'",
+ "\u4E5D>'['ji\u016D']'",
+ "\u4E5E>'['q\u012D']'",
+ "\u4E5F>'['y\u0115']'",
+ "\u4E60>'['x\u00ed']'",
+ "\u4E61>'[xiang]'",
+ "\u4E62>'['ga\u00ec']'",
+ "\u4E63>'[diu]'",
+ "\u4E66>'[shu]'",
+ "\u4E68>'['sh\u012D']'",
+ "\u4E69>'[ji]'",
+ "\u4E6A>'[nang]'",
+ "\u4E6B>'[jia]'",
+ "\u4E6D>'['sh\u00ed']'",
+ "\u4E70>'['ma\u012D']'",
+ "\u4E71>'['lu\u00e0n']'",
+ "\u4E73>'['r\u016D']'",
+ "\u4E74>'['xu\u00e9']'",
+ "\u4E75>'['y\u0103n']'",
+ "\u4E76>'['f\u016D']'",
+ "\u4E77>'[sha]'",
+ "\u4E78>'['n\u0103']'",
+ "\u4E79>'[gan]'",
+ "\u4E7E>'[gan]'",
+ "\u4E7F>'['ch\u00ec']'",
+ "\u4E80>'[gui]'",
+ "\u4E81>'[gan]'",
+ "\u4E82>'['lu\u00e0n']'",
+ "\u4E83>'['l\u00edn']'",
+ "\u4E84>'['y\u00ec']'",
+ "\u4E85>'['ju\u00e9']'",
+ "\u4E86>'['lia\u014F']'",
+ "\u4E88>'['y\u00fa']'",
+ "\u4E89>'[zheng]'",
+ "\u4E8A>'['sh\u00ec']'",
+ "\u4E8B>'['sh\u00ec']'",
+ "\u4E8C>'['\u00e8r']'",
+ "\u4E8D>'['ch\u00f9']'",
+ "\u4E8E>'['y\u00fa']'",
+ "\u4E8F>'['y\u00fa']'",
+ "\u4E90>'['y\u00fa']'",
+ "\u4E91>'['y\u00fan']'",
+ "\u4E92>'['h\u00f9']'",
+ "\u4E93>'['q\u00ed']'",
+ "\u4E94>'['w\u016D']'",
+ "\u4E95>'['j\u012Dng']'",
+ "\u4E96>'['s\u00ec']'",
+ "\u4E97>'['su\u00ec']'",
+ "\u4E98>'['g\u00e8n']'",
+ "\u4E99>'['g\u00e8n']'",
+ "\u4E9A>'['y\u00e0']'",
+ "\u4E9B>'[xie]'",
+ "\u4E9C>'['y\u00e0']'",
+ "\u4E9D>'['q\u00ed']'",
+ "\u4E9E>'['y\u00e0']'",
+ "\u4E9F>'['j\u00ed']'",
+ "\u4EA0>'['to\u00fa']'",
+ "\u4EA1>'['w\u00e1ng']'",
+ "\u4EA2>'['k\u00e0ng']'",
+ "\u4EA3>'['t\u00e0']'",
+ "\u4EA4>'[jiao]'",
+ "\u4EA5>'['ha\u00ec']'",
+ "\u4EA6>'['y\u00ec']'",
+ "\u4EA7>'['ch\u0103n']'",
+ "\u4EA8>'[heng]'",
+ "\u4EA9>'['m\u016D']'",
+ "\u4EAB>'['xi\u0103ng']'",
+ "\u4EAC>'[jing]'",
+ "\u4EAD>'['t\u00edng']'",
+ "\u4EAE>'['li\u00e0ng']'",
+ "\u4EAF>'['xi\u0103ng']'",
+ "\u4EB0>'[jing]'",
+ "\u4EB1>'['y\u00e8']'",
+ "\u4EB2>'[qin]'",
+ "\u4EB3>'['b\u00f3']'",
+ "\u4EB4>'['yo\u00f9']'",
+ "\u4EB5>'['xi\u00e8']'",
+ "\u4EB6>'['d\u0103n']'",
+ "\u4EB7>'['li\u00e1n']'",
+ "\u4EB8>'['du\u014F']'",
+ "\u4EB9>'['we\u012D']'",
+ "\u4EBA>'['r\u00e9n']'",
+ "\u4EBB>'['r\u00e9n']'",
+ "\u4EBC>'['j\u00ed']'",
+ "\u4EBE>'['w\u00e1ng']'",
+ "\u4EBF>'['y\u00ec']'",
+ "\u4EC0>'['sh\u00ed']'",
+ "\u4EC1>'['r\u00e9n']'",
+ "\u4EC2>'['l\u00e8']'",
+ "\u4EC3>'[ding]'",
+ "\u4EC4>'['z\u00e8']'",
+ "\u4EC5>'['j\u012Dn']'",
+ "\u4EC6>'[pu]'",
+ "\u4EC7>'['cho\u00fa']'",
+ "\u4EC8>'[ba]'",
+ "\u4EC9>'['zh\u0103ng']'",
+ "\u4ECA>'[jin]'",
+ "\u4ECB>'['ji\u00e8']'",
+ "\u4ECC>'[bing]'",
+ "\u4ECD>'['r\u00e9ng']'",
+ "\u4ECE>'['c\u00f3ng']'",
+ "\u4ECF>'['f\u00f3']'",
+ "\u4ED0>'['s\u0103n']'",
+ "\u4ED1>'['l\u00fan']'",
+ "\u4ED3>'[cang]'",
+ "\u4ED4>'['z\u012D']'",
+ "\u4ED5>'['sh\u00ec']'",
+ "\u4ED6>'[ta]'",
+ "\u4ED7>'['zh\u00e0ng']'",
+ "\u4ED8>'['f\u00f9']'",
+ "\u4ED9>'[xian]'",
+ "\u4EDA>'[xian]'",
+ "\u4EDB>'[tuo]'",
+ "\u4EDC>'['h\u00f3ng']'",
+ "\u4EDD>'['t\u00f3ng']'",
+ "\u4EDE>'['r\u00e8n']'",
+ "\u4EDF>'[qian]'",
+ "\u4EE0>'['g\u00e1n']'",
+ "\u4EE1>'['y\u00ec']'",
+ "\u4EE2>'['d\u00ed']'",
+ "\u4EE3>'['da\u00ec']'",
+ "\u4EE4>'['l\u00ecng']'",
+ "\u4EE5>'['y\u012D']'",
+ "\u4EE6>'['cha\u00f2']'",
+ "\u4EE7>'['ch\u00e1ng']'",
+ "\u4EE8>'[sa]'",
+ "\u4EEA>'['y\u00ed']'",
+ "\u4EEB>'['m\u00f9']'",
+ "\u4EEC>'['m\u0113n']'",
+ "\u4EED>'['r\u00e8n']'",
+ "\u4EEE>'['ji\u0103']'",
+ "\u4EEF>'['cha\u00f2']'",
+ "\u4EF0>'['y\u0103ng']'",
+ "\u4EF1>'['qi\u00e1n']'",
+ "\u4EF2>'['zh\u00f2ng']'",
+ "\u4EF3>'['p\u012D']'",
+ "\u4EF4>'['w\u00e0n']'",
+ "\u4EF5>'['w\u016D']'",
+ "\u4EF6>'['ji\u00e0n']'",
+ "\u4EF7>'['ji\u00e8']'",
+ "\u4EF8>'['ya\u014F']'",
+ "\u4EF9>'[feng]'",
+ "\u4EFA>'[cang]'",
+ "\u4EFB>'['r\u00e8n']'",
+ "\u4EFC>'['w\u00e1ng']'",
+ "\u4EFD>'['f\u00e8n']'",
+ "\u4EFE>'[di]'",
+ "\u4EFF>'['f\u0103ng']'",
+ "\u4F00>'[zhong]'",
+ "\u4F01>'['q\u012D']'",
+ "\u4F02>'['pe\u00ec']'",
+ "\u4F03>'['y\u00fa']'",
+ "\u4F04>'['dia\u00f2']'",
+ "\u4F05>'['d\u00f9n']'",
+ "\u4F06>'['w\u00e8n']'",
+ "\u4F07>'['y\u00ec']'",
+ "\u4F08>'['x\u012Dn']'",
+ "\u4F09>'['k\u00e0ng']'",
+ "\u4F0A>'[yi]'",
+ "\u4F0B>'['j\u00ed']'",
+ "\u4F0C>'['a\u00ec']'",
+ "\u4F0D>'['w\u016D']'",
+ "\u4F0E>'['j\u00ec']'",
+ "\u4F0F>'['f\u00fa']'",
+ "\u4F10>'['f\u00e1']'",
+ "\u4F11>'[xiu]'",
+ "\u4F12>'['j\u00ecn']'",
+ "\u4F13>'[bei]'",
+ "\u4F14>'['d\u0103n']'",
+ "\u4F15>'[fu]'",
+ "\u4F16>'['t\u0103ng']'",
+ "\u4F17>'['zh\u00f2ng']'",
+ "\u4F18>'[you]'",
+ "\u4F19>'['hu\u014F']'",
+ "\u4F1A>'['hu\u00ec']'",
+ "\u4F1B>'['y\u016D']'",
+ "\u4F1C>'['cu\u00ec']'",
+ "\u4F1D>'['chu\u00e1n']'",
+ "\u4F1E>'['s\u0103n']'",
+ "\u4F1F>'['we\u012D']'",
+ "\u4F20>'['chu\u00e1n']'",
+ "\u4F21>'[che]'",
+ "\u4F22>'['y\u00e1']'",
+ "\u4F23>'['xi\u00e0n']'",
+ "\u4F24>'[shang]'",
+ "\u4F25>'[chang]'",
+ "\u4F26>'['l\u00fan']'",
+ "\u4F27>'[cang]'",
+ "\u4F28>'['x\u00f9n']'",
+ "\u4F29>'['x\u00ecn']'",
+ "\u4F2A>'['we\u012D']'",
+ "\u4F2B>'['zh\u00f9']'",
+ "\u4F2D>'['xu\u00e1n']'",
+ "\u4F2E>'['n\u00fa']'",
+ "\u4F2F>'['b\u00f3']'",
+ "\u4F30>'[gu]'",
+ "\u4F31>'['n\u012D']'",
+ "\u4F32>'['n\u012D']'",
+ "\u4F33>'['xi\u00e8']'",
+ "\u4F34>'['b\u00e0n']'",
+ "\u4F35>'['x\u00f9']'",
+ "\u4F36>'['l\u00edng']'",
+ "\u4F37>'['zho\u00f9']'",
+ "\u4F38>'[shen]'",
+ "\u4F39>'[qu]'",
+ "\u4F3A>'['s\u00ec']'",
+ "\u4F3B>'[beng]'",
+ "\u4F3C>'['s\u00ec']'",
+ "\u4F3D>'[jia]'",
+ "\u4F3E>'[pi]'",
+ "\u4F3F>'['y\u00ec']'",
+ "\u4F40>'['s\u00ec']'",
+ "\u4F41>'['a\u012D']'",
+ "\u4F42>'[zheng]'",
+ "\u4F43>'['di\u00e0n']'",
+ "\u4F44>'['h\u00e1n']'",
+ "\u4F45>'['ma\u00ec']'",
+ "\u4F46>'['d\u00e0n']'",
+ "\u4F47>'['zh\u00f9']'",
+ "\u4F48>'['b\u00f9']'",
+ "\u4F49>'[qu]'",
+ "\u4F4A>'['b\u012D']'",
+ "\u4F4B>'['sha\u00f2']'",
+ "\u4F4C>'['c\u012D']'",
+ "\u4F4D>'['we\u00ec']'",
+ "\u4F4E>'[di]'",
+ "\u4F4F>'['zh\u00f9']'",
+ "\u4F50>'['zu\u014F']'",
+ "\u4F51>'['yo\u00f9']'",
+ "\u4F52>'[yang]'",
+ "\u4F53>'['t\u012D']'",
+ "\u4F54>'['zh\u00e0n']'",
+ "\u4F55>'['h\u00e9']'",
+ "\u4F56>'['b\u00ec']'",
+ "\u4F57>'[tuo]'",
+ "\u4F58>'['sh\u00e9']'",
+ "\u4F59>'['y\u00fa']'",
+ "\u4F5A>'['y\u00ec']'",
+ "\u4F5B>'['f\u00f3']'",
+ "\u4F5C>'['zu\u00f2']'",
+ "\u4F5D>'['ko\u00f9']'",
+ "\u4F5E>'['n\u00ecng']'",
+ "\u4F5F>'['t\u00f3ng']'",
+ "\u4F60>'['n\u012D']'",
+ "\u4F61>'[xuan]'",
+ "\u4F62>'['q\u00fa']'",
+ "\u4F63>'['y\u00f2ng']'",
+ "\u4F64>'['w\u0103']'",
+ "\u4F65>'[qian]'",
+ "\u4F67>'['k\u0103']'",
+ "\u4F69>'['pe\u00ec']'",
+ "\u4F6A>'['hua\u00ed']'",
+ "\u4F6B>'['h\u00e8']'",
+ "\u4F6C>'['la\u014F']'",
+ "\u4F6D>'['xi\u00e1ng']'",
+ "\u4F6E>'['g\u00e9']'",
+ "\u4F6F>'['y\u00e1ng']'",
+ "\u4F70>'['ba\u012D']'",
+ "\u4F71>'['f\u0103']'",
+ "\u4F72>'['m\u00edng']'",
+ "\u4F73>'['ji\u0101']'",
+ "\u4F74>'['\u00e8r']'",
+ "\u4F75>'['b\u00ecng']'",
+ "\u4F76>'['j\u00ed']'",
+ "\u4F77>'['h\u0115n']'",
+ "\u4F78>'['hu\u00f3']'",
+ "\u4F79>'['gu\u012D']'",
+ "\u4F7A>'['qu\u00e1n']'",
+ "\u4F7B>'[tiao]'",
+ "\u4F7C>'['jia\u014F']'",
+ "\u4F7D>'['c\u00ec']'",
+ "\u4F7E>'['y\u00ec']'",
+ "\u4F7F>'['sh\u012D']'",
+ "\u4F80>'['x\u00edng']'",
+ "\u4F81>'[shen]'",
+ "\u4F82>'[tuo]'",
+ "\u4F83>'['k\u0103n']'",
+ "\u4F84>'['zh\u00ed']'",
+ "\u4F85>'[gai]'",
+ "\u4F86>'['la\u00ed']'",
+ "\u4F87>'['y\u00ed']'",
+ "\u4F88>'['ch\u012D']'",
+ "\u4F89>'[kua]'",
+ "\u4F8A>'[guang]'",
+ "\u4F8B>'['l\u00ec']'",
+ "\u4F8C>'[yin]'",
+ "\u4F8D>'['sh\u00ec']'",
+ "\u4F8E>'['m\u012D']'",
+ "\u4F8F>'[zhu]'",
+ "\u4F90>'['x\u00f9']'",
+ "\u4F91>'['yo\u00f9']'",
+ "\u4F92>'[an]'",
+ "\u4F93>'['l\u00f9']'",
+ "\u4F94>'['mo\u00fa']'",
+ "\u4F95>'['\u00e9r']'",
+ "\u4F96>'['l\u00fan']'",
+ "\u4F97>'['t\u00f3ng']'",
+ "\u4F98>'['ch\u00e0']'",
+ "\u4F99>'['ch\u00ec']'",
+ "\u4F9A>'['x\u00f9n']'",
+ "\u4F9B>'[gong]'",
+ "\u4F9C>'[zhou]'",
+ "\u4F9D>'[yi]'",
+ "\u4F9E>'['r\u016D']'",
+ "\u4F9F>'['ji\u00e0n']'",
+ "\u4FA0>'['xi\u00e1']'",
+ "\u4FA1>'['ji\u00e0']'",
+ "\u4FA2>'['za\u00ec']'",
+ "\u4FA3>'['l\u01DA']'",
+ "\u4FA5>'['jia\u014F']'",
+ "\u4FA6>'[zhen]'",
+ "\u4FA7>'['c\u00e8']'",
+ "\u4FA8>'['qia\u00f3']'",
+ "\u4FA9>'['kua\u00ec']'",
+ "\u4FAA>'['cha\u00ed']'",
+ "\u4FAB>'['n\u00ecng']'",
+ "\u4FAC>'['n\u00f3ng']'",
+ "\u4FAD>'['j\u012Dn']'",
+ "\u4FAE>'['w\u016D']'",
+ "\u4FAF>'['ho\u00fa']'",
+ "\u4FB0>'['ji\u014Fng']'",
+ "\u4FB1>'['ch\u0115ng']'",
+ "\u4FB2>'['zh\u00e8n']'",
+ "\u4FB3>'['zu\u00f2']'",
+ "\u4FB4>'['cho\u016D']'",
+ "\u4FB5>'[qin]'",
+ "\u4FB6>'['l\u01DA']'",
+ "\u4FB7>'['j\u00fa']'",
+ "\u4FB8>'['sh\u00f9']'",
+ "\u4FB9>'['t\u012Dng']'",
+ "\u4FBA>'['sh\u00e8n']'",
+ "\u4FBB>'[tuo]'",
+ "\u4FBC>'['b\u00f3']'",
+ "\u4FBD>'['n\u00e1n']'",
+ "\u4FBE>'[hao]'",
+ "\u4FBF>'['bi\u00e0n']'",
+ "\u4FC0>'['tu\u012D']'",
+ "\u4FC1>'['y\u016D']'",
+ "\u4FC2>'['x\u00ec']'",
+ "\u4FC3>'['c\u00f9']'",
+ "\u4FC4>'['\u00e9']'",
+ "\u4FC5>'['qi\u00fa']'",
+ "\u4FC6>'['x\u00fa']'",
+ "\u4FC7>'['ku\u0103ng']'",
+ "\u4FC8>'['k\u00f9']'",
+ "\u4FC9>'['w\u00f9']'",
+ "\u4FCA>'['j\u00f9n']'",
+ "\u4FCB>'['y\u00ec']'",
+ "\u4FCC>'['f\u016D']'",
+ "\u4FCD>'['l\u00e1ng']'",
+ "\u4FCE>'['z\u016D']'",
+ "\u4FCF>'['qia\u00f2']'",
+ "\u4FD0>'['l\u00ec']'",
+ "\u4FD1>'['y\u014Fng']'",
+ "\u4FD2>'['h\u00f9n']'",
+ "\u4FD3>'['j\u00ecng']'",
+ "\u4FD4>'['xi\u00e0n']'",
+ "\u4FD5>'['s\u00e0n']'",
+ "\u4FD6>'['pa\u012D']'",
+ "\u4FD7>'['s\u00fa']'",
+ "\u4FD8>'['f\u00fa']'",
+ "\u4FD9>'[xi]'",
+ "\u4FDA>'['l\u012D']'",
+ "\u4FDB>'['f\u016D']'",
+ "\u4FDC>'[ping]'",
+ "\u4FDD>'['ba\u014F']'",
+ "\u4FDE>'['y\u00fa']'",
+ "\u4FDF>'['s\u00ec']'",
+ "\u4FE0>'['xi\u00e1']'",
+ "\u4FE1>'['x\u00ecn']'",
+ "\u4FE2>'[xiu]'",
+ "\u4FE3>'['y\u016D']'",
+ "\u4FE4>'['t\u00ec']'",
+ "\u4FE5>'[che]'",
+ "\u4FE6>'['cho\u00fa']'",
+ "\u4FE8>'['y\u0103n']'",
+ "\u4FE9>'['li\u0103']'",
+ "\u4FEA>'['l\u00ec']'",
+ "\u4FEB>'['la\u00ed']'",
+ "\u4FED>'['ji\u0103n']'",
+ "\u4FEE>'[xiu]'",
+ "\u4FEF>'['f\u016D']'",
+ "\u4FF0>'['h\u00e8']'",
+ "\u4FF1>'['j\u00f9']'",
+ "\u4FF2>'['xia\u00f2']'",
+ "\u4FF3>'['pa\u00ed']'",
+ "\u4FF4>'['ji\u00e0n']'",
+ "\u4FF5>'['bia\u00f2']'",
+ "\u4FF6>'['ch\u00f9']'",
+ "\u4FF7>'['fe\u00ec']'",
+ "\u4FF8>'['f\u00e8ng']'",
+ "\u4FF9>'['y\u00e0']'",
+ "\u4FFA>'['\u0103n']'",
+ "\u4FFB>'['be\u00ec']'",
+ "\u4FFC>'['y\u00f9']'",
+ "\u4FFD>'[xin]'",
+ "\u4FFE>'['b\u012D']'",
+ "\u4FFF>'['ji\u00e0n']'",
+ "\u5000>'[chang]'",
+ "\u5001>'['ch\u00ed']'",
+ "\u5002>'['b\u00ecng']'",
+ "\u5003>'['z\u00e1n']'",
+ "\u5004>'['ya\u00f3']'",
+ "\u5005>'['cu\u00ec']'",
+ "\u5006>'['li\u0103']'",
+ "\u5007>'['w\u0103n']'",
+ "\u5008>'['la\u00ed']'",
+ "\u5009>'[cang]'",
+ "\u500A>'['z\u00f2ng']'",
+ "\u500B>'['g\u00e8']'",
+ "\u500C>'[guan]'",
+ "\u500D>'['be\u00ec']'",
+ "\u500E>'[tian]'",
+ "\u500F>'[shu]'",
+ "\u5010>'[shu]'",
+ "\u5011>'['m\u0113n']'",
+ "\u5012>'['da\u014F']'",
+ "\u5013>'['t\u00e1n']'",
+ "\u5014>'['ju\u00e9']'",
+ "\u5015>'['chu\u00ed']'",
+ "\u5016>'['x\u00ecng']'",
+ "\u5017>'['p\u00e9ng']'",
+ "\u5018>'['t\u0103ng']'",
+ "\u5019>'['ho\u00f9']'",
+ "\u501A>'['y\u012D']'",
+ "\u501B>'[qi]'",
+ "\u501C>'['t\u00ec']'",
+ "\u501D>'['g\u00e0n']'",
+ "\u501E>'['j\u00ecng']'",
+ "\u501F>'['ji\u00e8']'",
+ "\u5020>'[sui]'",
+ "\u5021>'['ch\u00e0ng']'",
+ "\u5022>'['ji\u00e9']'",
+ "\u5023>'['f\u0103ng']'",
+ "\u5024>'['zh\u00ed']'",
+ "\u5025>'[kong]'",
+ "\u5026>'['ju\u00e0n']'",
+ "\u5027>'[zong]'",
+ "\u5028>'['j\u00f9']'",
+ "\u5029>'['qi\u00e0n']'",
+ "\u502A>'['n\u00ed']'",
+ "\u502B>'['l\u00fan']'",
+ "\u502C>'[zhuo]'",
+ "\u502D>'[wei]'",
+ "\u502E>'['lu\u014F']'",
+ "\u502F>'[song]'",
+ "\u5030>'['l\u00e9ng']'",
+ "\u5031>'['h\u00f9n']'",
+ "\u5032>'[dong]'",
+ "\u5033>'['z\u00ec']'",
+ "\u5034>'['b\u00e8n']'",
+ "\u5035>'['w\u016D']'",
+ "\u5036>'['j\u00f9']'",
+ "\u5037>'['na\u00ec']'",
+ "\u5038>'['ca\u012D']'",
+ "\u5039>'['ji\u0103n']'",
+ "\u503A>'['zha\u00ec']'",
+ "\u503B>'[ye]'",
+ "\u503C>'['zh\u00ed']'",
+ "\u503D>'['sh\u00e0']'",
+ "\u503E>'[qing]'",
+ "\u5040>'[ying]'",
+ "\u5041>'[cheng]'",
+ "\u5042>'[jian]'",
+ "\u5043>'['y\u0103n']'",
+ "\u5044>'['nu\u00e0n']'",
+ "\u5045>'['zh\u00f2ng']'",
+ "\u5046>'['ch\u016Dn']'",
+ "\u5047>'['ji\u0103']'",
+ "\u5048>'['ji\u00e9']'",
+ "\u5049>'['we\u012D']'",
+ "\u504A>'['y\u016D']'",
+ "\u504B>'['b\u012Dng']'",
+ "\u504C>'['ru\u00f2']'",
+ "\u504D>'['t\u00ed']'",
+ "\u504E>'[wei]'",
+ "\u504F>'[pian]'",
+ "\u5050>'['y\u00e0n']'",
+ "\u5051>'[feng]'",
+ "\u5052>'['t\u0103ng']'",
+ "\u5053>'['w\u00f2']'",
+ "\u5054>'['\u00e8']'",
+ "\u5055>'['xi\u00e9']'",
+ "\u5056>'['ch\u0115']'",
+ "\u5057>'['sh\u0115ng']'",
+ "\u5058>'['k\u0103n']'",
+ "\u5059>'['d\u00ec']'",
+ "\u505A>'['zu\u00f2']'",
+ "\u505B>'[cha]'",
+ "\u505C>'['t\u00edng']'",
+ "\u505D>'['be\u00ec']'",
+ "\u505E>'['y\u00e8']'",
+ "\u505F>'['hu\u00e1ng']'",
+ "\u5060>'['ya\u014F']'",
+ "\u5061>'['zh\u00e0n']'",
+ "\u5062>'['cho\u016D']'",
+ "\u5063>'[yan]'",
+ "\u5064>'['yo\u016D']'",
+ "\u5065>'['ji\u00e0n']'",
+ "\u5066>'[xu]'",
+ "\u5067>'[zha]'",
+ "\u5068>'[ci]'",
+ "\u5069>'['f\u00f9']'",
+ "\u506A>'[bi]'",
+ "\u506B>'['zh\u00ec']'",
+ "\u506C>'['z\u014Fng']'",
+ "\u506D>'['mi\u0103n']'",
+ "\u506E>'['j\u00ed']'",
+ "\u506F>'['y\u012D']'",
+ "\u5070>'['xi\u00e8']'",
+ "\u5071>'['x\u00fan']'",
+ "\u5072>'[si]'",
+ "\u5073>'[duan]'",
+ "\u5074>'['c\u00e8']'",
+ "\u5075>'[zhen]'",
+ "\u5076>'['o\u016D']'",
+ "\u5077>'[tou]'",
+ "\u5078>'[tou]'",
+ "\u5079>'['be\u00ec']'",
+ "\u507A>'['z\u00e1']'",
+ "\u507B>'['l\u01DA']'",
+ "\u507C>'['ji\u00e9']'",
+ "\u507D>'['we\u012D']'",
+ "\u507E>'['f\u00e8n']'",
+ "\u507F>'['ch\u00e1ng']'",
+ "\u5080>'[gui]'",
+ "\u5081>'['so\u016D']'",
+ "\u5082>'['zh\u00ec']'",
+ "\u5083>'['s\u00f9']'",
+ "\u5084>'[xia]'",
+ "\u5085>'['f\u00f9']'",
+ "\u5086>'['yu\u00e0n']'",
+ "\u5087>'['r\u014Fng']'",
+ "\u5088>'['l\u00ec']'",
+ "\u5089>'['r\u00f9']'",
+ "\u508A>'['y\u016Dn']'",
+ "\u508B>'['go\u00f9']'",
+ "\u508C>'['m\u00e0']'",
+ "\u508D>'['b\u00e0ng']'",
+ "\u508E>'[dian]'",
+ "\u508F>'['t\u00e1ng']'",
+ "\u5090>'['ha\u00f2']'",
+ "\u5091>'['ji\u00e9']'",
+ "\u5092>'[xi]'",
+ "\u5093>'['sh\u00e0n']'",
+ "\u5094>'['qi\u00e0n']'",
+ "\u5095>'['ju\u00e9']'",
+ "\u5096>'[cang]'",
+ "\u5097>'['ch\u00f9']'",
+ "\u5098>'['s\u0103n']'",
+ "\u5099>'['be\u00ec']'",
+ "\u509A>'['xia\u00f2']'",
+ "\u509B>'['y\u014Fng']'",
+ "\u509C>'['ya\u00f3']'",
+ "\u509D>'['t\u00e0n']'",
+ "\u509E>'[suo]'",
+ "\u509F>'['y\u0103ng']'",
+ "\u50A0>'[fa]'",
+ "\u50A1>'['b\u00ecng']'",
+ "\u50A2>'[jia]'",
+ "\u50A3>'['da\u012D']'",
+ "\u50A4>'['za\u00ec']'",
+ "\u50A5>'['t\u0103ng']'",
+ "\u50A7>'['b\u00ecn']'",
+ "\u50A8>'['ch\u016D']'",
+ "\u50A9>'['nu\u00f3']'",
+ "\u50AA>'[can]'",
+ "\u50AB>'['le\u012D']'",
+ "\u50AC>'[cui]'",
+ "\u50AD>'[yong]'",
+ "\u50AE>'[zao]'",
+ "\u50AF>'['z\u014Fng']'",
+ "\u50B0>'['p\u00e9ng']'",
+ "\u50B1>'['s\u014Fng']'",
+ "\u50B2>'['a\u00f2']'",
+ "\u50B3>'['chu\u00e1n']'",
+ "\u50B4>'['y\u016D']'",
+ "\u50B5>'['zha\u00ec']'",
+ "\u50B6>'['co\u00f9']'",
+ "\u50B7>'[shang]'",
+ "\u50B8>'['qi\u0103ng']'",
+ "\u50B9>'['j\u00ecng']'",
+ "\u50BA>'['ch\u00ec']'",
+ "\u50BB>'['sh\u0103']'",
+ "\u50BC>'['h\u00e0n']'",
+ "\u50BD>'[zhang]'",
+ "\u50BE>'[qing]'",
+ "\u50BF>'['y\u00e0n']'",
+ "\u50C0>'['d\u00ec']'",
+ "\u50C1>'[xi]'",
+ "\u50C2>'['l\u01DA']'",
+ "\u50C3>'['be\u00ec']'",
+ "\u50C4>'['pia\u00f2']'",
+ "\u50C5>'['j\u012Dn']'",
+ "\u50C6>'['li\u00e1n']'",
+ "\u50C7>'['l\u00f9']'",
+ "\u50C8>'['m\u00e0n']'",
+ "\u50C9>'[qian]'",
+ "\u50CA>'[xian]'",
+ "\u50CB>'['t\u00e0n']'",
+ "\u50CC>'['y\u00edng']'",
+ "\u50CD>'['d\u00f2ng']'",
+ "\u50CE>'['zhu\u00e0n']'",
+ "\u50CF>'['xi\u00e0ng']'",
+ "\u50D0>'['sh\u00e0n']'",
+ "\u50D1>'['qia\u00f3']'",
+ "\u50D2>'['ji\u014Fng']'",
+ "\u50D3>'['tu\u012D']'",
+ "\u50D4>'['z\u016Dn']'",
+ "\u50D5>'['p\u00fa']'",
+ "\u50D6>'[xi]'",
+ "\u50D7>'['la\u00f3']'",
+ "\u50D8>'['ch\u0103ng']'",
+ "\u50D9>'[guang]'",
+ "\u50DA>'['lia\u00f3']'",
+ "\u50DB>'[qi]'",
+ "\u50DC>'['d\u00e8ng']'",
+ "\u50DD>'['ch\u00e1n']'",
+ "\u50DE>'['we\u012D']'",
+ "\u50DF>'[ji]'",
+ "\u50E0>'[fan]'",
+ "\u50E1>'['hu\u00ec']'",
+ "\u50E2>'['chu\u0103n']'",
+ "\u50E3>'['ji\u00e0n']'",
+ "\u50E4>'['d\u00e0n']'",
+ "\u50E5>'['jia\u014F']'",
+ "\u50E6>'['ji\u00f9']'",
+ "\u50E7>'[seng]'",
+ "\u50E8>'['f\u00e8n']'",
+ "\u50E9>'['xi\u00e0n']'",
+ "\u50EA>'['ju\u00e9']'",
+ "\u50EB>'['\u00e8']'",
+ "\u50EC>'[jiao]'",
+ "\u50ED>'['ji\u00e0n']'",
+ "\u50EE>'['t\u00f3ng']'",
+ "\u50EF>'['l\u012Dn']'",
+ "\u50F0>'['b\u00f3']'",
+ "\u50F1>'['g\u00f9']'",
+ "\u50F3>'['s\u00f9']'",
+ "\u50F4>'['xi\u00e0n']'",
+ "\u50F5>'[jiang]'",
+ "\u50F6>'['m\u012Dn']'",
+ "\u50F7>'['y\u00e8']'",
+ "\u50F8>'['j\u00ecn']'",
+ "\u50F9>'['ji\u00e0']'",
+ "\u50FA>'['qia\u00f2']'",
+ "\u50FB>'['p\u00ec']'",
+ "\u50FC>'[feng]'",
+ "\u50FD>'['zho\u00f9']'",
+ "\u50FE>'['a\u00ec']'",
+ "\u50FF>'['sa\u00ec']'",
+ "\u5100>'['y\u00ed']'",
+ "\u5101>'['j\u00f9n']'",
+ "\u5102>'['n\u00f3ng']'",
+ "\u5103>'['ch\u00e1n']'",
+ "\u5104>'['y\u00ec']'",
+ "\u5105>'[dang]'",
+ "\u5106>'['j\u012Dng']'",
+ "\u5107>'[xuan]'",
+ "\u5108>'['kua\u00ec']'",
+ "\u5109>'['ji\u0103n']'",
+ "\u510A>'['ch\u00f9']'",
+ "\u510B>'[dan]'",
+ "\u510C>'['jia\u014F']'",
+ "\u510D>'['sh\u0103']'",
+ "\u510E>'['za\u00ec']'",
+ "\u5110>'['b\u00ecn']'",
+ "\u5111>'['\u00e0n']'",
+ "\u5112>'['r\u00fa']'",
+ "\u5113>'['ta\u00ed']'",
+ "\u5114>'['cho\u00fa']'",
+ "\u5115>'['cha\u00ed']'",
+ "\u5116>'['l\u00e1n']'",
+ "\u5117>'['n\u012D']'",
+ "\u5118>'['j\u012Dn']'",
+ "\u5119>'['qi\u00e0n']'",
+ "\u511A>'['m\u00e9ng']'",
+ "\u511B>'['w\u016D']'",
+ "\u511C>'['n\u00edng']'",
+ "\u511D>'['qi\u00f3ng']'",
+ "\u511E>'['n\u012D']'",
+ "\u511F>'['ch\u00e1ng']'",
+ "\u5120>'['li\u00e8']'",
+ "\u5121>'['le\u012D']'",
+ "\u5122>'['l\u01DA']'",
+ "\u5123>'['ku\u00e0ng']'",
+ "\u5124>'['ba\u00f2']'",
+ "\u5125>'['d\u00fa']'",
+ "\u5126>'[biao]'",
+ "\u5127>'['z\u0103n']'",
+ "\u5128>'['zh\u00ed']'",
+ "\u5129>'['s\u00ec']'",
+ "\u512A>'[you]'",
+ "\u512B>'['ha\u00f3']'",
+ "\u512C>'['ch\u00e8n']'",
+ "\u512D>'['ch\u00e8n']'",
+ "\u512E>'['l\u00ec']'",
+ "\u512F>'['t\u00e9ng']'",
+ "\u5130>'['we\u012D']'",
+ "\u5131>'['l\u014Fng']'",
+ "\u5132>'['ch\u016D']'",
+ "\u5133>'['ch\u00e0n']'",
+ "\u5134>'['r\u00e1ng']'",
+ "\u5135>'[shu]'",
+ "\u5136>'['hu\u00ec']'",
+ "\u5137>'['l\u00ec']'",
+ "\u5138>'['lu\u00f3']'",
+ "\u5139>'['z\u0103n']'",
+ "\u513A>'['nu\u00f3']'",
+ "\u513B>'['t\u0103ng']'",
+ "\u513C>'['y\u0103n']'",
+ "\u513D>'['le\u012D']'",
+ "\u513E>'['n\u00e0ng']'",
+ "\u513F>'['\u00e9r']'",
+ "\u5140>'['w\u00f9']'",
+ "\u5141>'['y\u016Dn']'",
+ "\u5142>'[zan]'",
+ "\u5143>'['yu\u00e1n']'",
+ "\u5144>'[xiong]'",
+ "\u5145>'[chong]'",
+ "\u5146>'['zha\u00f2']'",
+ "\u5147>'[xiong]'",
+ "\u5148>'[xian]'",
+ "\u5149>'[guang]'",
+ "\u514A>'['du\u00ec']'",
+ "\u514B>'['k\u00e8']'",
+ "\u514C>'['du\u00ec']'",
+ "\u514D>'['mi\u0103n']'",
+ "\u514E>'['t\u00f9']'",
+ "\u514F>'['ch\u00e1ng']'",
+ "\u5150>'['\u00e9r']'",
+ "\u5151>'['du\u00ec']'",
+ "\u5152>'['\u00e9r']'",
+ "\u5153>'[xin]'",
+ "\u5154>'['t\u00f9']'",
+ "\u5155>'['s\u00ec']'",
+ "\u5156>'['y\u0103n']'",
+ "\u5157>'['y\u0103n']'",
+ "\u5158>'['sh\u012D']'",
+ "\u5159>'['shi2k\u00e8']'",
+ "\u515A>'['d\u0103ng']'",
+ "\u515B>'[qian]'",
+ "\u515C>'[dou]'",
+ "\u515D>'[fen]'",
+ "\u515E>'['ma\u00f3']'",
+ "\u515F>'[shen]'",
+ "\u5160>'[dou]'",
+ "\u5161>'['bai3k\u00e8']'",
+ "\u5162>'[jing]'",
+ "\u5163>'['l\u012D']'",
+ "\u5164>'['hu\u00e1ng']'",
+ "\u5165>'['r\u00f9']'",
+ "\u5166>'['w\u00e1ng']'",
+ "\u5167>'['ne\u00ec']'",
+ "\u5168>'['qu\u00e1n']'",
+ "\u5169>'['li\u0103ng']'",
+ "\u516A>'['y\u00fa']'",
+ "\u516B>'[ba]'",
+ "\u516C>'[gong]'",
+ "\u516D>'['li\u00f9']'",
+ "\u516E>'[xi]'",
+ "\u5170>'['l\u00e1n']'",
+ "\u5171>'['g\u00f2ng']'",
+ "\u5172>'[tian]'",
+ "\u5173>'[guan]'",
+ "\u5174>'[xing]'",
+ "\u5175>'[bing]'",
+ "\u5176>'['q\u00ed']'",
+ "\u5177>'['j\u00f9']'",
+ "\u5178>'['di\u0103n']'",
+ "\u5179>'[zi]'",
+ "\u517B>'['y\u0103ng']'",
+ "\u517C>'[jian]'",
+ "\u517D>'['sho\u00f9']'",
+ "\u517E>'['j\u00ec']'",
+ "\u517F>'['y\u00ec']'",
+ "\u5180>'['j\u00ec']'",
+ "\u5181>'['ch\u0103n']'",
+ "\u5182>'[jiong]'",
+ "\u5183>'[MAO]'",
+ "\u5184>'['r\u0103n']'",
+ "\u5185>'['ne\u00ec']'",
+ "\u5186>'[YUAN]'",
+ "\u5187>'['ma\u014F']'",
+ "\u5188>'[gang]'",
+ "\u5189>'['r\u0103n']'",
+ "\u518A>'['c\u00e8']'",
+ "\u518B>'[jiong]'",
+ "\u518C>'['c\u00e8']'",
+ "\u518D>'['za\u00ec']'",
+ "\u518E>'['gu\u0103']'",
+ "\u518F>'['ji\u014Fng']'",
+ "\u5190>'['ma\u00f2']'",
+ "\u5191>'['zho\u00f9']'",
+ "\u5192>'['mo\u00f9']'",
+ "\u5193>'['go\u00f9']'",
+ "\u5194>'['x\u016D']'",
+ "\u5195>'['mi\u0103n']'",
+ "\u5196>'['m\u00ec']'",
+ "\u5197>'['r\u014Fng']'",
+ "\u5198>'['y\u00edn']'",
+ "\u5199>'['xi\u0115']'",
+ "\u519A>'['k\u0103n']'",
+ "\u519B>'[jun]'",
+ "\u519C>'['n\u00f3ng']'",
+ "\u519D>'['y\u00ed']'",
+ "\u519E>'['m\u00ed']'",
+ "\u519F>'['sh\u00ec']'",
+ "\u51A0>'[guan]'",
+ "\u51A1>'['m\u00e9ng']'",
+ "\u51A2>'['zh\u014Fng']'",
+ "\u51A3>'['j\u00f9']'",
+ "\u51A4>'[yuan]'",
+ "\u51A5>'['m\u00edng']'",
+ "\u51A6>'['ko\u00f9']'",
+ "\u51A8>'['f\u00f9']'",
+ "\u51A9>'['xi\u0115']'",
+ "\u51AA>'['m\u00ec']'",
+ "\u51AB>'[bing]'",
+ "\u51AC>'[dong]'",
+ "\u51AD>'['ta\u00ed']'",
+ "\u51AE>'[gang]'",
+ "\u51AF>'['f\u00e9ng']'",
+ "\u51B0>'[bing]'",
+ "\u51B1>'['h\u00f9']'",
+ "\u51B2>'[chong]'",
+ "\u51B3>'['ju\u00e9']'",
+ "\u51B4>'['h\u00f9']'",
+ "\u51B5>'['ku\u00e0ng']'",
+ "\u51B6>'['y\u0115']'",
+ "\u51B7>'['l\u0115ng']'",
+ "\u51B8>'['p\u00e0n']'",
+ "\u51B9>'['f\u00fa']'",
+ "\u51BA>'['m\u012Dn']'",
+ "\u51BB>'['d\u00f2ng']'",
+ "\u51BC>'['xi\u0103n']'",
+ "\u51BD>'['li\u00e8']'",
+ "\u51BE>'['xi\u00e1']'",
+ "\u51BF>'[jian]'",
+ "\u51C0>'['j\u00ecng']'",
+ "\u51C1>'['sh\u00f9']'",
+ "\u51C2>'['me\u012D']'",
+ "\u51C3>'['t\u00fa']'",
+ "\u51C4>'[qi]'",
+ "\u51C5>'['g\u00f9']'",
+ "\u51C6>'['zh\u016Dn']'",
+ "\u51C7>'['s\u00f2ng']'",
+ "\u51C8>'['j\u00ecng']'",
+ "\u51C9>'['li\u00e1ng']'",
+ "\u51CA>'['q\u00ecng']'",
+ "\u51CB>'[diao]'",
+ "\u51CC>'['l\u00edng']'",
+ "\u51CD>'['d\u00f2ng']'",
+ "\u51CE>'['g\u00e0n']'",
+ "\u51CF>'['ji\u0103n']'",
+ "\u51D0>'[yin]'",
+ "\u51D1>'['co\u00f9']'",
+ "\u51D2>'['y\u00ed']'",
+ "\u51D3>'['l\u00ec']'",
+ "\u51D4>'[cang]'",
+ "\u51D5>'['m\u012Dng']'",
+ "\u51D6>'['zhu\u0115n']'",
+ "\u51D7>'['cu\u00ed']'",
+ "\u51D8>'[si]'",
+ "\u51D9>'['du\u00f3']'",
+ "\u51DA>'['j\u00ecn']'",
+ "\u51DB>'['l\u012Dn']'",
+ "\u51DC>'['l\u012Dn']'",
+ "\u51DD>'['n\u00edng']'",
+ "\u51DE>'[xi]'",
+ "\u51DF>'['d\u00fa']'",
+ "\u51E0>'[ji]'",
+ "\u51E1>'['f\u00e1n']'",
+ "\u51E2>'['f\u00e1n']'",
+ "\u51E3>'['f\u00e1n']'",
+ "\u51E4>'['f\u00e8ng']'",
+ "\u51E5>'[ju]'",
+ "\u51E6>'['ch\u016D']'",
+ "\u51E8>'[feng]'",
+ "\u51EB>'['f\u00fa']'",
+ "\u51EC>'[feng]'",
+ "\u51ED>'['p\u00edng']'",
+ "\u51EE>'[feng]'",
+ "\u51EF>'['ka\u012D']'",
+ "\u51F0>'['hu\u00e1ng']'",
+ "\u51F1>'['ka\u012D']'",
+ "\u51F2>'[gan]'",
+ "\u51F3>'['d\u00e8ng']'",
+ "\u51F4>'['p\u00edng']'",
+ "\u51F5>'[qu]'",
+ "\u51F6>'[xiong]'",
+ "\u51F7>'['kua\u00ec']'",
+ "\u51F8>'[tu]'",
+ "\u51F9>'[ao]'",
+ "\u51FA>'[chu]'",
+ "\u51FB>'['j\u00ed']'",
+ "\u51FC>'['d\u00e0ng']'",
+ "\u51FD>'['h\u00e1n']'",
+ "\u51FE>'['h\u00e1n']'",
+ "\u51FF>'['za\u00f3']'",
+ "\u5200>'[dao]'",
+ "\u5201>'[diao]'",
+ "\u5202>'[dao]'",
+ "\u5203>'['r\u00e8n']'",
+ "\u5204>'['r\u00e8n']'",
+ "\u5205>'[chuang]'",
+ "\u5206>'[fen]'",
+ "\u5207>'[qie]'",
+ "\u5208>'['y\u00ec']'",
+ "\u5209>'[ji]'",
+ "\u520A>'[kan]'",
+ "\u520B>'['qi\u00e0n']'",
+ "\u520C>'['c\u016Dn']'",
+ "\u520D>'['ch\u00fa']'",
+ "\u520E>'['w\u0115n']'",
+ "\u520F>'[ji]'",
+ "\u5210>'['d\u0103n']'",
+ "\u5211>'['x\u00edng']'",
+ "\u5212>'['hu\u00e1']'",
+ "\u5213>'['w\u00e1n']'",
+ "\u5214>'['ju\u00e9']'",
+ "\u5215>'['l\u00ed']'",
+ "\u5216>'['yu\u00e8']'",
+ "\u5217>'['li\u00e8']'",
+ "\u5218>'['li\u00fa']'",
+ "\u5219>'['z\u00e9']'",
+ "\u521A>'[gang]'",
+ "\u521B>'['chu\u00e0ng']'",
+ "\u521C>'['f\u00fa']'",
+ "\u521D>'[chu]'",
+ "\u521E>'['q\u00f9']'",
+ "\u521F>'[ju]'",
+ "\u5220>'[shan]'",
+ "\u5221>'['m\u012Dn']'",
+ "\u5222>'['l\u00edng']'",
+ "\u5223>'[zhong]'",
+ "\u5224>'['p\u00e0n']'",
+ "\u5225>'['bi\u00e9']'",
+ "\u5226>'['ji\u00e9']'",
+ "\u5227>'['ji\u00e9']'",
+ "\u5228>'['ba\u00f2']'",
+ "\u5229>'['l\u00ec']'",
+ "\u522A>'[shan]'",
+ "\u522B>'['bi\u00e9']'",
+ "\u522C>'['ch\u0103n']'",
+ "\u522D>'['j\u012Dng']'",
+ "\u522E>'[gua]'",
+ "\u522F>'[gen]'",
+ "\u5230>'['da\u00f2']'",
+ "\u5231>'['chu\u00e0ng']'",
+ "\u5232>'[kui]'",
+ "\u5233>'[ku]'",
+ "\u5234>'['du\u00f2']'",
+ "\u5235>'['\u00e8r']'",
+ "\u5236>'['zh\u00ec']'",
+ "\u5237>'[shua]'",
+ "\u5238>'['qu\u00e0n']'",
+ "\u5239>'['ch\u00e0']'",
+ "\u523A>'['c\u00ec']'",
+ "\u523B>'['k\u00e8']'",
+ "\u523C>'['ji\u00e9']'",
+ "\u523D>'['gu\u00ec']'",
+ "\u523E>'['c\u00ec']'",
+ "\u523F>'['gu\u00ec']'",
+ "\u5240>'['ka\u012D']'",
+ "\u5241>'['du\u00f2']'",
+ "\u5242>'['j\u00ec']'",
+ "\u5243>'['t\u00ec']'",
+ "\u5244>'['j\u012Dng']'",
+ "\u5245>'['lo\u00fa']'",
+ "\u5246>'[gen]'",
+ "\u5247>'['z\u00e9']'",
+ "\u5248>'[yuan]'",
+ "\u5249>'['cu\u00f2']'",
+ "\u524A>'[xue]'",
+ "\u524B>'['k\u00e8']'",
+ "\u524C>'['l\u00e0']'",
+ "\u524D>'['qi\u00e1n']'",
+ "\u524E>'['ch\u00e0']'",
+ "\u524F>'['chu\u00e0ng']'",
+ "\u5250>'['gu\u0103']'",
+ "\u5251>'['ji\u00e0n']'",
+ "\u5252>'['cu\u00f2']'",
+ "\u5253>'['l\u00ed']'",
+ "\u5254>'[ti]'",
+ "\u5255>'['fe\u00ec']'",
+ "\u5256>'[pou]'",
+ "\u5257>'['ch\u0103n']'",
+ "\u5258>'['q\u00ed']'",
+ "\u5259>'['chu\u00e0ng']'",
+ "\u525A>'['z\u00ec']'",
+ "\u525B>'[gang]'",
+ "\u525C>'[wan]'",
+ "\u525D>'[bo]'",
+ "\u525E>'[ji]'",
+ "\u525F>'[duo]'",
+ "\u5260>'['q\u00edng']'",
+ "\u5261>'['y\u0103n']'",
+ "\u5262>'['zhu\u00f3']'",
+ "\u5263>'['ji\u00e0n']'",
+ "\u5264>'['j\u00ec']'",
+ "\u5265>'[bo]'",
+ "\u5266>'[yan]'",
+ "\u5267>'['j\u00f9']'",
+ "\u5268>'['hu\u00f2']'",
+ "\u5269>'['sh\u00e8ng']'",
+ "\u526A>'['ji\u0103n']'",
+ "\u526B>'['du\u00f3']'",
+ "\u526C>'[duan]'",
+ "\u526D>'[wu]'",
+ "\u526E>'['gu\u0103']'",
+ "\u526F>'['f\u00f9']'",
+ "\u5270>'['sh\u00e8ng']'",
+ "\u5271>'['ji\u00e0n']'",
+ "\u5272>'[ge]'",
+ "\u5273>'[zha]'",
+ "\u5274>'['ka\u012D']'",
+ "\u5275>'['chu\u00e0ng']'",
+ "\u5276>'[juan]'",
+ "\u5277>'['ch\u0103n']'",
+ "\u5278>'['tu\u00e1n']'",
+ "\u5279>'['l\u00f9']'",
+ "\u527A>'['l\u00ed']'",
+ "\u527B>'['fo\u00fa']'",
+ "\u527C>'[shan]'",
+ "\u527D>'['pia\u00f2']'",
+ "\u527E>'[kou]'",
+ "\u527F>'['jia\u014F']'",
+ "\u5280>'[gua]'",
+ "\u5281>'[qiao]'",
+ "\u5282>'['ju\u00e9']'",
+ "\u5283>'['hu\u00e0']'",
+ "\u5284>'['zh\u00e1']'",
+ "\u5285>'['zhu\u00f2']'",
+ "\u5286>'['li\u00e1n']'",
+ "\u5287>'['j\u00f9']'",
+ "\u5288>'[pi]'",
+ "\u5289>'['li\u00fa']'",
+ "\u528A>'['gu\u00ec']'",
+ "\u528B>'['jia\u014F']'",
+ "\u528C>'['gu\u00ec']'",
+ "\u528D>'['ji\u00e0n']'",
+ "\u528E>'['ji\u00e0n']'",
+ "\u528F>'[tang]'",
+ "\u5290>'[huo]'",
+ "\u5291>'['j\u00ec']'",
+ "\u5292>'['ji\u00e0n']'",
+ "\u5293>'['y\u00ec']'",
+ "\u5294>'['ji\u00e0n']'",
+ "\u5295>'['zh\u00ed']'",
+ "\u5296>'['ch\u00e1n']'",
+ "\u5297>'['cu\u00e1n']'",
+ "\u5298>'['m\u00f3']'",
+ "\u5299>'['l\u00ed']'",
+ "\u529A>'['zh\u00fa']'",
+ "\u529B>'['l\u00ec']'",
+ "\u529C>'[ya]'",
+ "\u529D>'['qu\u00e0n']'",
+ "\u529E>'['b\u00e0n']'",
+ "\u529F>'[gong]'",
+ "\u52A0>'[jia]'",
+ "\u52A1>'['w\u00f9']'",
+ "\u52A2>'['ma\u00ec']'",
+ "\u52A3>'['li\u00e8']'",
+ "\u52A4>'['j\u00ecn']'",
+ "\u52A5>'[keng]'",
+ "\u52A6>'['xi\u00e9']'",
+ "\u52A7>'['zh\u012D']'",
+ "\u52A8>'['d\u00f2ng']'",
+ "\u52A9>'['zh\u00f9']'",
+ "\u52AA>'['n\u016D']'",
+ "\u52AB>'['ji\u00e9']'",
+ "\u52AC>'['q\u00fa']'",
+ "\u52AD>'['sha\u00f2']'",
+ "\u52AE>'['y\u00ec']'",
+ "\u52AF>'[zhu]'",
+ "\u52B0>'['mia\u014F']'",
+ "\u52B1>'['l\u00ec']'",
+ "\u52B2>'['j\u00ecng']'",
+ "\u52B3>'['la\u00f3']'",
+ "\u52B4>'['la\u00f3']'",
+ "\u52B5>'['ju\u00e0n']'",
+ "\u52B6>'['ko\u016D']'",
+ "\u52B7>'['y\u00e1ng']'",
+ "\u52B8>'[wa]'",
+ "\u52B9>'['xia\u00f2']'",
+ "\u52BA>'['mo\u00fa']'",
+ "\u52BB>'[kuang]'",
+ "\u52BC>'['ji\u00e9']'",
+ "\u52BD>'['li\u00e8']'",
+ "\u52BE>'['h\u00e9']'",
+ "\u52BF>'['sh\u00ec']'",
+ "\u52C0>'['k\u00e8']'",
+ "\u52C1>'['j\u00ecng']'",
+ "\u52C2>'['ha\u00f3']'",
+ "\u52C3>'['b\u00f3']'",
+ "\u52C4>'['m\u012Dn']'",
+ "\u52C5>'['ch\u00ec']'",
+ "\u52C6>'['l\u00e1ng']'",
+ "\u52C7>'['y\u014Fng']'",
+ "\u52C8>'['y\u014Fng']'",
+ "\u52C9>'['mi\u0103n']'",
+ "\u52CA>'['k\u00e8']'",
+ "\u52CB>'[xun]'",
+ "\u52CC>'['ju\u00e0n']'",
+ "\u52CD>'['q\u00edng']'",
+ "\u52CE>'['l\u00f9']'",
+ "\u52CF>'['po\u016D']'",
+ "\u52D0>'['m\u0115ng']'",
+ "\u52D1>'['la\u00ec']'",
+ "\u52D2>'['l\u00e8']'",
+ "\u52D3>'['ka\u00ec']'",
+ "\u52D4>'['mi\u0103n']'",
+ "\u52D5>'['d\u00f2ng']'",
+ "\u52D6>'['x\u00f9']'",
+ "\u52D7>'['x\u00f9']'",
+ "\u52D8>'[kan]'",
+ "\u52D9>'['w\u00f9']'",
+ "\u52DA>'['y\u00ec']'",
+ "\u52DB>'[xun]'",
+ "\u52DC>'['w\u0115ng']'",
+ "\u52DD>'['sh\u00e8ng']'",
+ "\u52DE>'['la\u00f3']'",
+ "\u52DF>'['m\u00f9']'",
+ "\u52E0>'['l\u00f9']'",
+ "\u52E1>'['pia\u00f2']'",
+ "\u52E2>'['sh\u00ec']'",
+ "\u52E3>'[ji]'",
+ "\u52E4>'['q\u00edn']'",
+ "\u52E5>'['qi\u0103ng']'",
+ "\u52E6>'['jia\u014F']'",
+ "\u52E7>'['qu\u00e0n']'",
+ "\u52E8>'['y\u0103ng']'",
+ "\u52E9>'['y\u00ec']'",
+ "\u52EA>'['ju\u00e9']'",
+ "\u52EB>'['f\u00e1n']'",
+ "\u52EC>'['ju\u00e0n']'",
+ "\u52ED>'['t\u00f3ng']'",
+ "\u52EE>'['j\u00f9']'",
+ "\u52EF>'[dan]'",
+ "\u52F0>'['xi\u00e9']'",
+ "\u52F1>'['ma\u00ec']'",
+ "\u52F2>'[xun]'",
+ "\u52F3>'[xun]'",
+ "\u52F4>'['l\u01DC']'",
+ "\u52F5>'['l\u00ec']'",
+ "\u52F6>'['ch\u00e8']'",
+ "\u52F7>'['r\u00e1ng']'",
+ "\u52F8>'['qu\u00e0n']'",
+ "\u52F9>'[bao]'",
+ "\u52FA>'['sha\u00f3']'",
+ "\u52FB>'['y\u00fan']'",
+ "\u52FC>'[jiu]'",
+ "\u52FD>'['ba\u00f2']'",
+ "\u52FE>'[gou]'",
+ "\u52FF>'['w\u00f9']'",
+ "\u5300>'['y\u00fan']'",
+ "\u5303>'['ga\u00ec']'",
+ "\u5304>'['ga\u00ec']'",
+ "\u5305>'[bao]'",
+ "\u5306>'[cong]'",
+ "\u5308>'[xiong]'",
+ "\u5309>'[peng]'",
+ "\u530A>'['j\u00fa']'",
+ "\u530B>'['ta\u00f3']'",
+ "\u530C>'['g\u00e9']'",
+ "\u530D>'['p\u00fa']'",
+ "\u530E>'['\u00e0n']'",
+ "\u530F>'['pa\u00f3']'",
+ "\u5310>'['f\u00fa']'",
+ "\u5311>'[gong]'",
+ "\u5312>'['d\u00e1']'",
+ "\u5313>'['ji\u00f9']'",
+ "\u5314>'[qiong]'",
+ "\u5315>'['b\u012D']'",
+ "\u5316>'['hu\u00e0']'",
+ "\u5317>'['be\u012D']'",
+ "\u5318>'['na\u014F']'",
+ "\u5319>'['ch\u00ed']'",
+ "\u531A>'[fang]'",
+ "\u531B>'['ji\u00f9']'",
+ "\u531C>'['y\u00ed']'",
+ "\u531D>'[za]'",
+ "\u531E>'['ji\u00e0ng']'",
+ "\u531F>'['k\u00e0ng']'",
+ "\u5320>'['ji\u00e0ng']'",
+ "\u5321>'[kuang]'",
+ "\u5322>'[hu]'",
+ "\u5323>'['xi\u00e1']'",
+ "\u5324>'[qu]'",
+ "\u5325>'['bi\u00e0n']'",
+ "\u5326>'['gu\u012D']'",
+ "\u5327>'['qi\u00e8']'",
+ "\u5328>'[zang]'",
+ "\u5329>'[kuang]'",
+ "\u532A>'['fe\u012D']'",
+ "\u532B>'[hu]'",
+ "\u532C>'['to\u00fa']'",
+ "\u532D>'['gu\u012D']'",
+ "\u532E>'['gu\u00ec']'",
+ "\u532F>'['hu\u00ec']'",
+ "\u5330>'[dan]'",
+ "\u5331>'['gu\u00ec']'",
+ "\u5332>'['li\u00e1n']'",
+ "\u5333>'['li\u00e1n']'",
+ "\u5334>'['su\u0103n']'",
+ "\u5335>'['d\u00fa']'",
+ "\u5336>'['ji\u00f9']'",
+ "\u5337>'['q\u00fa']'",
+ "\u5338>'['x\u012D']'",
+ "\u5339>'['p\u012D']'",
+ "\u533A>'[qu]'",
+ "\u533B>'['y\u00ec']'",
+ "\u533C>'['qi\u00e0']'",
+ "\u533D>'['y\u0103n']'",
+ "\u533E>'['bi\u0103n']'",
+ "\u533F>'['n\u00ec']'",
+ "\u5340>'[qu]'",
+ "\u5341>'['sh\u00ed']'",
+ "\u5342>'['x\u00ecn']'",
+ "\u5343>'[qian]'",
+ "\u5344>'['ni\u00e0n']'",
+ "\u5345>'['s\u00e0']'",
+ "\u5346>'['z\u00fa']'",
+ "\u5347>'[sheng]'",
+ "\u5348>'['w\u016D']'",
+ "\u5349>'['hu\u00ec']'",
+ "\u534A>'['b\u00e0n']'",
+ "\u534B>'['sh\u00ec']'",
+ "\u534C>'['x\u00ec']'",
+ "\u534D>'['w\u00e0n']'",
+ "\u534E>'['hu\u00e1']'",
+ "\u534F>'['xi\u00e9']'",
+ "\u5350>'['w\u00e0n']'",
+ "\u5351>'[bei]'",
+ "\u5352>'['z\u00fa']'",
+ "\u5353>'[zhuo]'",
+ "\u5354>'['xi\u00e9']'",
+ "\u5355>'[dan]'",
+ "\u5356>'['ma\u00ec']'",
+ "\u5357>'['n\u00e1n']'",
+ "\u5358>'[dan]'",
+ "\u5359>'['j\u00ed']'",
+ "\u535A>'['b\u00f3']'",
+ "\u535B>'['shua\u00ec']'",
+ "\u535C>'['b\u016D']'",
+ "\u535D>'['ku\u00e0ng']'",
+ "\u535E>'['bi\u00e0n']'",
+ "\u535F>'['b\u016D']'",
+ "\u5360>'[zhan]'",
+ "\u5361>'['qi\u0103']'",
+ "\u5362>'['l\u00fa']'",
+ "\u5363>'['yo\u016D']'",
+ "\u5364>'['l\u016D']'",
+ "\u5365>'[xi]'",
+ "\u5366>'['gu\u00e0']'",
+ "\u5367>'['w\u00f2']'",
+ "\u5368>'['xi\u00e8']'",
+ "\u5369>'['ji\u00e9']'",
+ "\u536A>'['ji\u00e9']'",
+ "\u536B>'['we\u00ec']'",
+ "\u536C>'['\u00e1ng']'",
+ "\u536D>'['qi\u00f3ng']'",
+ "\u536E>'[zhi]'",
+ "\u536F>'['ma\u014F']'",
+ "\u5370>'['y\u00ecn']'",
+ "\u5371>'[wei]'",
+ "\u5372>'['sha\u00f2']'",
+ "\u5373>'['j\u00ed']'",
+ "\u5374>'['qu\u00e8']'",
+ "\u5375>'['lu\u0103n']'",
+ "\u5376>'['sh\u00ec']'",
+ "\u5377>'['ju\u00e0n']'",
+ "\u5378>'['xi\u00e8']'",
+ "\u5379>'['x\u00f9']'",
+ "\u537A>'['j\u012Dn']'",
+ "\u537B>'['qu\u00e8']'",
+ "\u537C>'['w\u00f9']'",
+ "\u537D>'['j\u00ed']'",
+ "\u537E>'['\u00e8']'",
+ "\u537F>'[qing]'",
+ "\u5380>'[xi]'",
+ "\u5382>'['h\u00e0n']'",
+ "\u5383>'[zhan]'",
+ "\u5384>'['\u00e8']'",
+ "\u5385>'[ting]'",
+ "\u5386>'['l\u00ec']'",
+ "\u5387>'['zh\u00e9']'",
+ "\u5388>'['h\u0103n']'",
+ "\u5389>'['l\u00ec']'",
+ "\u538A>'['y\u0103']'",
+ "\u538B>'[ya]'",
+ "\u538C>'['y\u00e0n']'",
+ "\u538D>'['sh\u00e8']'",
+ "\u538E>'['zh\u012D']'",
+ "\u538F>'['zh\u0103']'",
+ "\u5390>'['p\u00e1ng']'",
+ "\u5392>'['h\u00e9']'",
+ "\u5393>'['y\u00e1']'",
+ "\u5394>'['zh\u00ec']'",
+ "\u5395>'['c\u00e8']'",
+ "\u5396>'['p\u00e1ng']'",
+ "\u5397>'['t\u00ed']'",
+ "\u5398>'['l\u00ed']'",
+ "\u5399>'['sh\u00e8']'",
+ "\u539A>'['ho\u00f9']'",
+ "\u539B>'[ting]'",
+ "\u539C>'[zui]'",
+ "\u539D>'['cu\u00f2']'",
+ "\u539E>'['fe\u00ec']'",
+ "\u539F>'['yu\u00e1n']'",
+ "\u53A0>'['c\u00e8']'",
+ "\u53A1>'['yu\u00e1n']'",
+ "\u53A2>'[xiang]'",
+ "\u53A3>'['y\u0103n']'",
+ "\u53A4>'['l\u00ec']'",
+ "\u53A5>'['ju\u00e9']'",
+ "\u53A6>'['sh\u00e0']'",
+ "\u53A7>'[dian]'",
+ "\u53A8>'['ch\u00fa']'",
+ "\u53A9>'['ji\u00f9']'",
+ "\u53AA>'['q\u00edn']'",
+ "\u53AB>'['a\u00f3']'",
+ "\u53AC>'['gu\u012D']'",
+ "\u53AD>'['y\u00e0n']'",
+ "\u53AE>'[si]'",
+ "\u53AF>'['l\u00ec']'",
+ "\u53B0>'['ch\u0103ng']'",
+ "\u53B1>'['l\u00e1n']'",
+ "\u53B2>'['l\u00ec']'",
+ "\u53B3>'['y\u00e1n']'",
+ "\u53B4>'['y\u0103n']'",
+ "\u53B5>'['yu\u00e1n']'",
+ "\u53B6>'[si]'",
+ "\u53B7>'[gong]'",
+ "\u53B8>'['l\u00edn']'",
+ "\u53B9>'['qi\u00fa']'",
+ "\u53BA>'['q\u00f9']'",
+ "\u53BB>'['q\u00f9']'",
+ "\u53BD>'['le\u012D']'",
+ "\u53BE>'[du]'",
+ "\u53BF>'['xi\u00e0n']'",
+ "\u53C0>'[zhuan]'",
+ "\u53C1>'[san]'",
+ "\u53C2>'[can]'",
+ "\u53C3>'[can]'",
+ "\u53C4>'[can]'",
+ "\u53C5>'[can]'",
+ "\u53C6>'['a\u00ec']'",
+ "\u53C7>'['da\u00ec']'",
+ "\u53C8>'['yo\u00f9']'",
+ "\u53C9>'['ch\u0101']'",
+ "\u53CA>'['j\u00ed']'",
+ "\u53CB>'['yo\u016D']'",
+ "\u53CC>'[shuang]'",
+ "\u53CD>'['f\u0103n']'",
+ "\u53CE>'[shou]'",
+ "\u53CF>'['gua\u00ec']'",
+ "\u53D0>'['b\u00e1']'",
+ "\u53D1>'[fa]'",
+ "\u53D2>'['ru\u00f2']'",
+ "\u53D3>'['sh\u00ec']'",
+ "\u53D4>'[shu]'",
+ "\u53D5>'['zhu\u00f3']'",
+ "\u53D6>'[qu]'",
+ "\u53D7>'['sho\u00f9']'",
+ "\u53D8>'['bi\u00e0n']'",
+ "\u53D9>'['x\u00f9']'",
+ "\u53DA>'['ji\u0103']'",
+ "\u53DB>'['p\u00e0n']'",
+ "\u53DC>'['so\u016D']'",
+ "\u53DD>'['ga\u00f2']'",
+ "\u53DE>'['we\u00ec']'",
+ "\u53DF>'['so\u016D']'",
+ "\u53E0>'['di\u00e9']'",
+ "\u53E1>'['ru\u00ec']'",
+ "\u53E2>'['c\u00f3ng']'",
+ "\u53E3>'['ko\u016D']'",
+ "\u53E4>'['g\u016D']'",
+ "\u53E5>'['j\u00f9']'",
+ "\u53E6>'['l\u00ecng']'",
+ "\u53E7>'['gu\u0103']'",
+ "\u53E8>'[tao]'",
+ "\u53E9>'['ko\u00f9']'",
+ "\u53EA>'['zh\u012D']'",
+ "\u53EB>'['jia\u00f2']'",
+ "\u53EC>'['zha\u00f2']'",
+ "\u53ED>'[ba]'",
+ "\u53EE>'[ding]'",
+ "\u53EF>'['k\u0115']'",
+ "\u53F0>'['ta\u00ed']'",
+ "\u53F1>'['ch\u00ec']'",
+ "\u53F2>'['sh\u012D']'",
+ "\u53F3>'['yo\u00f9']'",
+ "\u53F4>'['qi\u00fa']'",
+ "\u53F5>'['p\u014F']'",
+ "\u53F6>'['xi\u00e9']'",
+ "\u53F7>'['ha\u00f2']'",
+ "\u53F8>'[si]'",
+ "\u53F9>'['t\u00e0n']'",
+ "\u53FA>'['ch\u012D']'",
+ "\u53FB>'['l\u00e8']'",
+ "\u53FC>'[diao]'",
+ "\u53FD>'[ji]'",
+ "\u53FF>'[hong]'",
+ "\u5400>'[mie]'",
+ "\u5401>'[xu]'",
+ "\u5402>'['m\u00e1ng']'",
+ "\u5403>'[chi]'",
+ "\u5404>'['g\u00e8']'",
+ "\u5405>'[xuan]'",
+ "\u5406>'[yao]'",
+ "\u5407>'['z\u012D']'",
+ "\u5408>'['h\u00e9']'",
+ "\u5409>'['j\u00ed']'",
+ "\u540A>'['dia\u00f2']'",
+ "\u540B>'['c\u00f9n']'",
+ "\u540C>'['t\u00f3ng']'",
+ "\u540D>'['m\u00edng']'",
+ "\u540E>'['ho\u00f9']'",
+ "\u540F>'['l\u00ec']'",
+ "\u5410>'['t\u016D']'",
+ "\u5411>'['xi\u00e0ng']'",
+ "\u5412>'['zh\u00e0']'",
+ "\u5413>'['xi\u00e0']'",
+ "\u5414>'['y\u0115']'",
+ "\u5415>'['l\u01DA']'",
+ "\u5416>'[a]'",
+ "\u5417>'['m\u0101']'",
+ "\u5418>'['o\u016D']'",
+ "\u5419>'[xue]'",
+ "\u541A>'[yi]'",
+ "\u541B>'[jun]'",
+ "\u541C>'['cho\u016D']'",
+ "\u541D>'['l\u00ecn']'",
+ "\u541E>'[tun]'",
+ "\u541F>'['y\u00edn']'",
+ "\u5420>'['fe\u00ec']'",
+ "\u5421>'['b\u012D']'",
+ "\u5422>'['q\u00ecn']'",
+ "\u5423>'['q\u00ecn']'",
+ "\u5424>'['ji\u00e8']'",
+ "\u5425>'['b\u00f9']'",
+ "\u5426>'['fo\u016D']'",
+ "\u5427>'['b\u0101']'",
+ "\u5428>'[dun]'",
+ "\u5429>'[fen]'",
+ "\u542A>'['\u00e9']'",
+ "\u542B>'['h\u00e1n']'",
+ "\u542C>'[ting]'",
+ "\u542D>'['h\u00e1ng']'",
+ "\u542E>'['sh\u016Dn']'",
+ "\u542F>'['q\u012D']'",
+ "\u5430>'['h\u00f3ng']'",
+ "\u5431>'[zhi]'",
+ "\u5432>'['sh\u0115n']'",
+ "\u5433>'['w\u00fa']'",
+ "\u5434>'['w\u00fa']'",
+ "\u5435>'['cha\u014F']'",
+ "\u5436>'['n\u00e8']'",
+ "\u5437>'['xu\u00e8']'",
+ "\u5438>'[xi]'",
+ "\u5439>'[chui]'",
+ "\u543A>'[dou]'",
+ "\u543B>'['w\u0115n']'",
+ "\u543C>'['ho\u016D']'",
+ "\u543D>'['o\u00fa']'",
+ "\u543E>'['w\u00fa']'",
+ "\u543F>'['ga\u00f2']'",
+ "\u5440>'[ya]'",
+ "\u5441>'['j\u00f9n']'",
+ "\u5442>'['l\u01DA']'",
+ "\u5443>'['\u00e8']'",
+ "\u5444>'['g\u00e9']'",
+ "\u5445>'['me\u00ed']'",
+ "\u5446>'['a\u00ed']'",
+ "\u5447>'['q\u012D']'",
+ "\u5448>'['ch\u00e9ng']'",
+ "\u5449>'['w\u00fa']'",
+ "\u544A>'['ga\u00f2']'",
+ "\u544B>'[fu]'",
+ "\u544C>'['jia\u00f2']'",
+ "\u544D>'[hong]'",
+ "\u544E>'['ch\u012D']'",
+ "\u544F>'[sheng]'",
+ "\u5450>'['n\u00e8']'",
+ "\u5451>'[tun]'",
+ "\u5452>'['f\u016D']'",
+ "\u5453>'['y\u00ec']'",
+ "\u5454>'[dai]'",
+ "\u5455>'[ou]'",
+ "\u5456>'['l\u00ec']'",
+ "\u5457>'['ba\u00ec']'",
+ "\u5458>'['yu\u00e1n']'",
+ "\u5459>'[kuai]'",
+ "\u545B>'[qiang]'",
+ "\u545C>'[wu]'",
+ "\u545D>'['\u00e8']'",
+ "\u545E>'[shi]'",
+ "\u545F>'['qu\u0103n']'",
+ "\u5460>'[pen]'",
+ "\u5461>'['w\u0115n']'",
+ "\u5462>'['n\u00ed']'",
+ "\u5463>'['m\u0301']'",
+ "\u5464>'['l\u012Dng']'",
+ "\u5465>'['r\u0103n']'",
+ "\u5466>'[you]'",
+ "\u5467>'['d\u012D']'",
+ "\u5468>'[zhou]'",
+ "\u5469>'['sh\u00ec']'",
+ "\u546A>'['zho\u00f9']'",
+ "\u546B>'[tie]'",
+ "\u546C>'['x\u00ec']'",
+ "\u546D>'['y\u00ec']'",
+ "\u546E>'['q\u00ec']'",
+ "\u546F>'['p\u00edng']'",
+ "\u5470>'['z\u012D']'",
+ "\u5471>'[gu]'",
+ "\u5472>'[zi]'",
+ "\u5473>'['we\u00ec']'",
+ "\u5474>'[xu]'",
+ "\u5475>'[he]'",
+ "\u5476>'['na\u00f3']'",
+ "\u5477>'[xia]'",
+ "\u5478>'[pei]'",
+ "\u5479>'['y\u00ec']'",
+ "\u547A>'[xiao]'",
+ "\u547B>'[shen]'",
+ "\u547C>'[hu]'",
+ "\u547D>'['m\u00ecng']'",
+ "\u547E>'['d\u00e1']'",
+ "\u547F>'[qu]'",
+ "\u5480>'['j\u016D']'",
+ "\u5481>'['g\u00e8m']'",
+ "\u5482>'[za]'",
+ "\u5483>'[tuo]'",
+ "\u5484>'[duo]'",
+ "\u5485>'['po\u00f9']'",
+ "\u5486>'['pa\u00f3']'",
+ "\u5487>'['b\u00ec']'",
+ "\u5488>'['f\u00fa']'",
+ "\u5489>'[yang]'",
+ "\u548A>'['h\u00e9']'",
+ "\u548B>'['zh\u00e0']'",
+ "\u548C>'['h\u00e9']'",
+ "\u548D>'[hai]'",
+ "\u548E>'['ji\u00f9']'",
+ "\u548F>'['y\u014Fng']'",
+ "\u5490>'['f\u00f9']'",
+ "\u5491>'['qu\u00e8']'",
+ "\u5492>'['zho\u00f9']'",
+ "\u5493>'['w\u0103']'",
+ "\u5494>'['k\u0103']'",
+ "\u5495>'[gu]'",
+ "\u5496>'[ka]'",
+ "\u5497>'['zu\u014F']'",
+ "\u5498>'['b\u00f9']'",
+ "\u5499>'['l\u00f3ng']'",
+ "\u549A>'[dong]'",
+ "\u549B>'['n\u00edng']'",
+ "\u549D>'[si]'",
+ "\u549E>'['xi\u00e0n']'",
+ "\u549F>'['hu\u00f2']'",
+ "\u54A0>'['q\u00ec']'",
+ "\u54A1>'['\u00e8r']'",
+ "\u54A2>'['\u00e8']'",
+ "\u54A3>'[guang]'",
+ "\u54A4>'['zh\u00e0']'",
+ "\u54A5>'['x\u00ec']'",
+ "\u54A6>'['y\u00ed']'",
+ "\u54A7>'['li\u0115']'",
+ "\u54A8>'[zi]'",
+ "\u54A9>'[mie]'",
+ "\u54AA>'[mi]'",
+ "\u54AB>'['zh\u012D']'",
+ "\u54AC>'['ya\u014F']'",
+ "\u54AD>'[ji]'",
+ "\u54AE>'['zho\u00f9']'",
+ "\u54AF>'[ge]'",
+ "\u54B0>'['shua\u00ec']'",
+ "\u54B1>'['z\u00e1n']'",
+ "\u54B2>'['xia\u00f2']'",
+ "\u54B3>'['k\u00e9']'",
+ "\u54B4>'[hui]'",
+ "\u54B5>'[kua]'",
+ "\u54B6>'['hua\u00ec']'",
+ "\u54B7>'['ta\u00f3']'",
+ "\u54B8>'['xi\u00e1n']'",
+ "\u54B9>'['\u00e8']'",
+ "\u54BA>'[xuan]'",
+ "\u54BB>'[xiu]'",
+ "\u54BC>'[wai]'",
+ "\u54BD>'[yan]'",
+ "\u54BE>'['la\u014F']'",
+ "\u54BF>'[yi]'",
+ "\u54C0>'[ai]'",
+ "\u54C1>'['p\u012Dn']'",
+ "\u54C2>'['sh\u0115n']'",
+ "\u54C3>'['t\u00f3ng']'",
+ "\u54C4>'[hong]'",
+ "\u54C5>'[xiong]'",
+ "\u54C6>'['ch\u012D']'",
+ "\u54C7>'[wa]'",
+ "\u54C8>'[ha]'",
+ "\u54C9>'[zai]'",
+ "\u54CA>'['y\u00f9']'",
+ "\u54CB>'['d\u00ec']'",
+ "\u54CC>'['pa\u00ec']'",
+ "\u54CD>'['xi\u0103ng']'",
+ "\u54CE>'[ai]'",
+ "\u54CF>'['h\u0115n']'",
+ "\u54D0>'[kuang]'",
+ "\u54D1>'['y\u0103']'",
+ "\u54D2>'[da]'",
+ "\u54D3>'[xiao]'",
+ "\u54D4>'['b\u00ec']'",
+ "\u54D5>'['yu\u0115']'",
+ "\u54D7>'[hua]'",
+ "\u54D9>'['kua\u00ec']'",
+ "\u54DA>'['du\u014F']'",
+ "\u54DC>'['j\u00ec']'",
+ "\u54DD>'['n\u00f3ng']'",
+ "\u54DE>'[mou]'",
+ "\u54DF>'['y\u014D']'",
+ "\u54E0>'['ha\u00f2']'",
+ "\u54E1>'['yu\u00e1n']'",
+ "\u54E2>'['l\u00f2ng']'",
+ "\u54E3>'['po\u016D']'",
+ "\u54E4>'['m\u00e1ng']'",
+ "\u54E5>'[ge]'",
+ "\u54E6>'['\u00e9']'",
+ "\u54E7>'[chi]'",
+ "\u54E8>'['sha\u00f2']'",
+ "\u54E9>'[li]'",
+ "\u54EA>'['n\u0103']'",
+ "\u54EB>'['z\u00fa']'",
+ "\u54EC>'['h\u00e9']'",
+ "\u54ED>'[ku]'",
+ "\u54EE>'[xiao]'",
+ "\u54EF>'['xi\u00e0n']'",
+ "\u54F0>'['la\u00f3']'",
+ "\u54F1>'[bo]'",
+ "\u54F2>'['zh\u00e9']'",
+ "\u54F3>'[zha]'",
+ "\u54F4>'['li\u00e0ng']'",
+ "\u54F5>'[ba]'",
+ "\u54F6>'[mie]'",
+ "\u54F7>'['l\u00e8']'",
+ "\u54F8>'[sui]'",
+ "\u54F9>'['fo\u00fa']'",
+ "\u54FA>'['b\u016D']'",
+ "\u54FB>'['h\u00e0n']'",
+ "\u54FC>'[heng]'",
+ "\u54FD>'['g\u0115ng']'",
+ "\u54FE>'[shuo]'",
+ "\u54FF>'['g\u0115']'",
+ "\u5500>'['yo\u016D']'",
+ "\u5501>'['y\u00e0n']'",
+ "\u5502>'['g\u016D']'",
+ "\u5503>'['g\u016D']'",
+ "\u5504>'['ba\u00ec']'",
+ "\u5505>'[han]'",
+ "\u5506>'[suo]'",
+ "\u5507>'['ch\u00fan']'",
+ "\u5508>'['y\u00ec']'",
+ "\u5509>'[ai]'",
+ "\u550A>'['ji\u00e1']'",
+ "\u550B>'['t\u016D']'",
+ "\u550C>'['xi\u00e1n']'",
+ "\u550D>'['hu\u0103n']'",
+ "\u550E>'[li]'",
+ "\u550F>'[xi]'",
+ "\u5510>'['t\u00e1ng']'",
+ "\u5511>'['zu\u00f2']'",
+ "\u5512>'['qi\u00fa']'",
+ "\u5513>'[che]'",
+ "\u5514>'['w\u00fa']'",
+ "\u5515>'['za\u00f2']'",
+ "\u5516>'['y\u0103']'",
+ "\u5517>'[dou]'",
+ "\u5518>'['q\u012D']'",
+ "\u5519>'['d\u00ed']'",
+ "\u551A>'['q\u00ecn']'",
+ "\u551B>'['m\u00e0']'",
+ "\u551D>'['h\u014Fng']'",
+ "\u551E>'['do\u016D']'",
+ "\u5520>'['la\u00f3']'",
+ "\u5521>'['li\u0103ng']'",
+ "\u5522>'['su\u014F']'",
+ "\u5523>'['za\u00f2']'",
+ "\u5524>'['hu\u00e0n']'",
+ "\u5526>'[sha]'",
+ "\u5527>'[ji]'",
+ "\u5528>'['zu\u014F']'",
+ "\u5529>'[wo]'",
+ "\u552A>'['f\u0115ng']'",
+ "\u552B>'['y\u00edn']'",
+ "\u552C>'['h\u016D']'",
+ "\u552D>'[qi]'",
+ "\u552E>'['sho\u00f9']'",
+ "\u552F>'['we\u00ed']'",
+ "\u5530>'[shua]'",
+ "\u5531>'['ch\u00e0ng']'",
+ "\u5532>'['\u00e9r']'",
+ "\u5533>'['l\u00ec']'",
+ "\u5534>'['qi\u00e0ng']'",
+ "\u5535>'['\u0103n']'",
+ "\u5536>'['ji\u00e8']'",
+ "\u5537>'[yo]'",
+ "\u5538>'['ni\u00e0n']'",
+ "\u5539>'[yu]'",
+ "\u553A>'['ti\u0103n']'",
+ "\u553B>'['la\u012D']'",
+ "\u553C>'['sh\u00e0']'",
+ "\u553D>'[xi]'",
+ "\u553E>'['tu\u00f2']'",
+ "\u553F>'[hu]'",
+ "\u5540>'['a\u00ed']'",
+ "\u5541>'[zhou]'",
+ "\u5542>'['no\u00f9']'",
+ "\u5543>'['k\u0115n']'",
+ "\u5544>'['zhu\u00f3']'",
+ "\u5545>'['zhu\u00f3']'",
+ "\u5546>'[shang]'",
+ "\u5547>'['d\u00ed']'",
+ "\u5548>'['h\u00e8ng']'",
+ "\u5549>'['l\u00e1n']'",
+ "\u554A>'['\u0101']'",
+ "\u554B>'[xiao]'",
+ "\u554C>'[xiang]'",
+ "\u554D>'[tun]'",
+ "\u554E>'['w\u016D']'",
+ "\u554F>'['w\u00e8n']'",
+ "\u5550>'['cu\u00ec']'",
+ "\u5551>'['sh\u00e0']'",
+ "\u5552>'[hu]'",
+ "\u5553>'['q\u012D']'",
+ "\u5554>'['q\u012D']'",
+ "\u5555>'['ta\u00f3']'",
+ "\u5556>'['d\u00e0n']'",
+ "\u5557>'['d\u00e0n']'",
+ "\u5558>'['y\u00e8']'",
+ "\u5559>'['z\u012D']'",
+ "\u555A>'['b\u012D']'",
+ "\u555B>'['cu\u00ec']'",
+ "\u555C>'['chu\u00f2']'",
+ "\u555D>'['h\u00e9']'",
+ "\u555E>'['y\u0103']'",
+ "\u555F>'['q\u012D']'",
+ "\u5560>'['zh\u00e9']'",
+ "\u5561>'[pei]'",
+ "\u5562>'['li\u0103ng']'",
+ "\u5563>'['xi\u00e1n']'",
+ "\u5564>'['p\u00ed']'",
+ "\u5565>'['sh\u00e0']'",
+ "\u5566>'['l\u0101']'",
+ "\u5567>'['z\u00e9']'",
+ "\u5568>'[qing]'",
+ "\u5569>'['gu\u00e0']'",
+ "\u556A>'[pa]'",
+ "\u556B>'['zh\u0115']'",
+ "\u556C>'['s\u00e8']'",
+ "\u556D>'['zhu\u00e0n']'",
+ "\u556E>'['ni\u00e8']'",
+ "\u556F>'['gu\u014D']'",
+ "\u5570>'[luo]'",
+ "\u5571>'[yan]'",
+ "\u5572>'['d\u00ec']'",
+ "\u5573>'['qu\u00e1n']'",
+ "\u5574>'[tan]'",
+ "\u5575>'['b\u014D']'",
+ "\u5576>'['d\u00ecng']'",
+ "\u5577>'[lang]'",
+ "\u5578>'['xia\u00f2']'",
+ "\u557A>'['t\u00e1ng']'",
+ "\u557B>'['ch\u00ec']'",
+ "\u557C>'['t\u00ed']'",
+ "\u557D>'['\u00e1n']'",
+ "\u557E>'[jiu]'",
+ "\u557F>'['d\u00e0n']'",
+ "\u5580>'['k\u00e8']'",
+ "\u5581>'['y\u00f3ng']'",
+ "\u5582>'['we\u00ec']'",
+ "\u5583>'['n\u00e1n']'",
+ "\u5584>'['sh\u00e0n']'",
+ "\u5585>'['y\u00f9']'",
+ "\u5586>'['zh\u00e9']'",
+ "\u5587>'['l\u0103']'",
+ "\u5588>'[jie]'",
+ "\u5589>'['ho\u00fa']'",
+ "\u558A>'['h\u0103n']'",
+ "\u558B>'['di\u00e9']'",
+ "\u558C>'[zhou]'",
+ "\u558D>'['cha\u00ed']'",
+ "\u558E>'[wai]'",
+ "\u558F>'['r\u0115']'",
+ "\u5590>'['y\u00f9']'",
+ "\u5591>'[yin]'",
+ "\u5592>'['z\u00e1n']'",
+ "\u5593>'[yao]'",
+ "\u5594>'[wo]'",
+ "\u5595>'['mi\u0103n']'",
+ "\u5596>'['h\u00fa']'",
+ "\u5597>'['y\u016Dn']'",
+ "\u5598>'['chu\u0103n']'",
+ "\u5599>'['hu\u00ec']'",
+ "\u559A>'['hu\u00e0n']'",
+ "\u559B>'['hu\u00e0n']'",
+ "\u559C>'['x\u012D']'",
+ "\u559D>'[he]'",
+ "\u559E>'[ji]'",
+ "\u559F>'['ku\u00ec']'",
+ "\u55A0>'['zh\u014Fng']'",
+ "\u55A1>'['we\u012D']'",
+ "\u55A2>'['sh\u00e0']'",
+ "\u55A3>'['x\u016D']'",
+ "\u55A4>'['hu\u00e1ng']'",
+ "\u55A5>'['d\u00f9']'",
+ "\u55A6>'['ni\u00e8']'",
+ "\u55A7>'[xuan]'",
+ "\u55A8>'['li\u00e0ng']'",
+ "\u55A9>'['y\u00f9']'",
+ "\u55AA>'[sang]'",
+ "\u55AB>'[chi]'",
+ "\u55AC>'['qia\u00f3']'",
+ "\u55AD>'['y\u00e0n']'",
+ "\u55AE>'[dan]'",
+ "\u55AF>'[pen]'",
+ "\u55B0>'[can]'",
+ "\u55B1>'['l\u00ed']'",
+ "\u55B2>'['y\u014D']'",
+ "\u55B3>'[zha]'",
+ "\u55B4>'[wei]'",
+ "\u55B5>'[miao]'",
+ "\u55B6>'['y\u00edng']'",
+ "\u55B7>'[pen]'",
+ "\u55B9>'['ku\u00ed']'",
+ "\u55BA>'['x\u00ec']'",
+ "\u55BB>'['y\u00f9']'",
+ "\u55BC>'['ji\u00e9']'",
+ "\u55BD>'['lo\u016B']'",
+ "\u55BE>'['k\u00f9']'",
+ "\u55BF>'['sa\u00f2']'",
+ "\u55C0>'['hu\u00f2']'",
+ "\u55C1>'['t\u00ed']'",
+ "\u55C2>'['ya\u00f3']'",
+ "\u55C3>'['h\u00e8']'",
+ "\u55C4>'['\u00e1']'",
+ "\u55C5>'['xi\u00f9']'",
+ "\u55C6>'[qiang]'",
+ "\u55C7>'['s\u00e8']'",
+ "\u55C8>'[yong]'",
+ "\u55C9>'['s\u00f9']'",
+ "\u55CA>'['h\u014Fng']'",
+ "\u55CB>'['xi\u00e9']'",
+ "\u55CC>'['y\u00ec']'",
+ "\u55CD>'[suo]'",
+ "\u55CE>'['m\u0101']'",
+ "\u55CF>'[cha]'",
+ "\u55D0>'['ha\u00ec']'",
+ "\u55D1>'['k\u00e8']'",
+ "\u55D2>'['t\u00e0']'",
+ "\u55D3>'['s\u0103ng']'",
+ "\u55D4>'['ti\u00e1n']'",
+ "\u55D5>'['r\u00f9']'",
+ "\u55D6>'[sou]'",
+ "\u55D7>'[wa]'",
+ "\u55D8>'[ji]'",
+ "\u55D9>'['p\u0103ng']'",
+ "\u55DA>'[wu]'",
+ "\u55DB>'['xi\u00e1n']'",
+ "\u55DC>'['sh\u00ec']'",
+ "\u55DD>'['g\u00e9']'",
+ "\u55DE>'[zi]'",
+ "\u55DF>'[jie]'",
+ "\u55E0>'['lu\u00f2']'",
+ "\u55E1>'[weng]'",
+ "\u55E2>'['w\u00e0']'",
+ "\u55E3>'['s\u00ec']'",
+ "\u55E4>'[chi]'",
+ "\u55E5>'['ha\u00f3']'",
+ "\u55E6>'[suo]'",
+ "\u55E7>'['jia1l\u00fan']'",
+ "\u55E8>'['ha\u012D']'",
+ "\u55E9>'['su\u014F']'",
+ "\u55EA>'['q\u00edn']'",
+ "\u55EB>'['ni\u00e8']'",
+ "\u55EC>'[he]'",
+ "\u55EE>'['sa\u00ec']'",
+ "\u55EF>'['ng\u0300']'",
+ "\u55F0>'['g\u00e8']'",
+ "\u55F1>'['n\u00e1']'",
+ "\u55F2>'['di\u0103']'",
+ "\u55F3>'['a\u00ec']'",
+ "\u55F5>'[tong]'",
+ "\u55F6>'['b\u00ec']'",
+ "\u55F7>'['a\u00f3']'",
+ "\u55F8>'['a\u00f3']'",
+ "\u55F9>'['li\u00e1n']'",
+ "\u55FA>'[cui]'",
+ "\u55FB>'[zhe]'",
+ "\u55FC>'['m\u00f2']'",
+ "\u55FD>'['so\u00f9']'",
+ "\u55FE>'['so\u016D']'",
+ "\u55FF>'['t\u0103n']'",
+ "\u5600>'['d\u00ed']'",
+ "\u5601>'[qi]'",
+ "\u5602>'['jia\u00f2']'",
+ "\u5603>'[chong]'",
+ "\u5604>'[jiao]'",
+ "\u5605>'['ka\u012D']'",
+ "\u5606>'['t\u00e0n']'",
+ "\u5607>'[san]'",
+ "\u5608>'['ca\u00f3']'",
+ "\u5609>'[jia]'",
+ "\u560A>'['a\u00ed']'",
+ "\u560B>'[xiao]'",
+ "\u560C>'[piao]'",
+ "\u560D>'['lo\u016B']'",
+ "\u560E>'[ga]'",
+ "\u560F>'['g\u016D']'",
+ "\u5610>'[xiao]'",
+ "\u5611>'[hu]'",
+ "\u5612>'['hu\u00ec']'",
+ "\u5613>'['gu\u014D']'",
+ "\u5614>'[ou]'",
+ "\u5615>'[xian]'",
+ "\u5616>'['z\u00e9']'",
+ "\u5617>'['ch\u00e1ng']'",
+ "\u5618>'[xu]'",
+ "\u5619>'['p\u00f3']'",
+ "\u561A>'['d\u00e9']'",
+ "\u561B>'['m\u0101']'",
+ "\u561C>'['m\u00e0']'",
+ "\u561D>'['h\u00fa']'",
+ "\u561E>'['le\u012B']'",
+ "\u561F>'[du]'",
+ "\u5620>'[ga]'",
+ "\u5621>'[tang]'",
+ "\u5622>'['y\u0115']'",
+ "\u5623>'[beng]'",
+ "\u5624>'[ying]'",
+ "\u5626>'['jia\u00f2']'",
+ "\u5627>'[mi]'",
+ "\u5628>'['xia\u00f2']'",
+ "\u5629>'[hua]'",
+ "\u562A>'['ma\u012D']'",
+ "\u562B>'['r\u00e1n']'",
+ "\u562C>'[zuo]'",
+ "\u562D>'[peng]'",
+ "\u562E>'['la\u00f3']'",
+ "\u562F>'['xia\u00f2']'",
+ "\u5630>'[ji]'",
+ "\u5631>'['zh\u016D']'",
+ "\u5632>'['cha\u00f3']'",
+ "\u5633>'['ku\u00ec']'",
+ "\u5634>'['zu\u012D']'",
+ "\u5635>'[xiao]'",
+ "\u5636>'[si]'",
+ "\u5637>'['ha\u00f3']'",
+ "\u5638>'['f\u016D']'",
+ "\u5639>'['lia\u00f3']'",
+ "\u563A>'['qia\u00f3']'",
+ "\u563B>'[xi]'",
+ "\u563C>'['xi\u00f9']'",
+ "\u563D>'[tan]'",
+ "\u563E>'['t\u00e1n']'",
+ "\u563F>'['m\u00f2']'",
+ "\u5640>'['x\u00f9n']'",
+ "\u5641>'['\u0115']'",
+ "\u5642>'['z\u016Dn']'",
+ "\u5643>'[fan]'",
+ "\u5644>'[chi]'",
+ "\u5645>'[hui]'",
+ "\u5646>'['z\u0103n']'",
+ "\u5647>'['chu\u00e1ng']'",
+ "\u5648>'['c\u00f9']'",
+ "\u5649>'['d\u00e0n']'",
+ "\u564A>'['y\u00f9']'",
+ "\u564B>'[tun]'",
+ "\u564C>'[cheng]'",
+ "\u564D>'['jia\u00f2']'",
+ "\u564E>'[ye]'",
+ "\u564F>'[xi]'",
+ "\u5650>'['q\u00ec']'",
+ "\u5651>'['ha\u00f3']'",
+ "\u5652>'['li\u00e1n']'",
+ "\u5653>'[xu]'",
+ "\u5654>'[deng]'",
+ "\u5655>'[hui]'",
+ "\u5656>'['y\u00edn']'",
+ "\u5657>'[pu]'",
+ "\u5658>'[jue]'",
+ "\u5659>'['q\u00edn']'",
+ "\u565A>'['x\u00fan']'",
+ "\u565B>'['ni\u00e8']'",
+ "\u565C>'[lu]'",
+ "\u565D>'[si]'",
+ "\u565E>'['y\u0103n']'",
+ "\u565F>'['y\u00ecng']'",
+ "\u5660>'[da]'",
+ "\u5661>'[dan]'",
+ "\u5662>'['y\u016D']'",
+ "\u5663>'['zho\u00f9']'",
+ "\u5664>'['j\u00ecn']'",
+ "\u5665>'['n\u00f3ng']'",
+ "\u5666>'['yu\u0115']'",
+ "\u5667>'['hu\u00ec']'",
+ "\u5668>'['q\u00ec']'",
+ "\u5669>'['\u00e8']'",
+ "\u566A>'['za\u00f2']'",
+ "\u566B>'[yi]'",
+ "\u566C>'['sh\u00ec']'",
+ "\u566D>'['jia\u00f2']'",
+ "\u566E>'[yuan]'",
+ "\u566F>'['a\u00ec']'",
+ "\u5670>'[yong]'",
+ "\u5671>'['ju\u00e9']'",
+ "\u5672>'['kua\u00ec']'",
+ "\u5673>'['y\u016D']'",
+ "\u5674>'[pen]'",
+ "\u5675>'['da\u00f2']'",
+ "\u5676>'['g\u00e9']'",
+ "\u5677>'[xin]'",
+ "\u5678>'[dun]'",
+ "\u5679>'[dang]'",
+ "\u567B>'['sa\u012B']'",
+ "\u567C>'[pi]'",
+ "\u567D>'['p\u012D']'",
+ "\u567E>'[yin]'",
+ "\u567F>'['zu\u012D']'",
+ "\u5680>'['n\u00edng']'",
+ "\u5681>'['d\u00ed']'",
+ "\u5682>'['l\u00e0n']'",
+ "\u5683>'[ta]'",
+ "\u5684>'['hu\u00f2']'",
+ "\u5685>'['r\u00fa']'",
+ "\u5686>'[hao]'",
+ "\u5687>'['xi\u00e0']'",
+ "\u5688>'['y\u00e0']'",
+ "\u5689>'[duo]'",
+ "\u568A>'['x\u00ec']'",
+ "\u568B>'['cho\u00fa']'",
+ "\u568C>'['j\u00ec']'",
+ "\u568D>'['j\u00ecn']'",
+ "\u568E>'['ha\u00f3']'",
+ "\u568F>'['t\u00ec']'",
+ "\u5690>'['ch\u00e1ng']'",
+ "\u5693>'[ca]'",
+ "\u5694>'['t\u00ec']'",
+ "\u5695>'[lu]'",
+ "\u5696>'['hu\u00ec']'",
+ "\u5697>'['b\u00f3']'",
+ "\u5698>'[you]'",
+ "\u5699>'['ni\u00e8']'",
+ "\u569A>'['y\u00edn']'",
+ "\u569B>'['h\u00f9']'",
+ "\u569C>'['m\u00f2']'",
+ "\u569D>'[huang]'",
+ "\u569E>'['zh\u00e9']'",
+ "\u569F>'['l\u00ed']'",
+ "\u56A0>'['li\u00fa']'",
+ "\u56A2>'['n\u00e1ng']'",
+ "\u56A3>'[xiao]'",
+ "\u56A4>'['m\u00f3']'",
+ "\u56A5>'['y\u00e0n']'",
+ "\u56A6>'['l\u00ec']'",
+ "\u56A7>'['l\u00fa']'",
+ "\u56A8>'['l\u00f3ng']'",
+ "\u56A9>'['f\u00fa']'",
+ "\u56AA>'['d\u00e0n']'",
+ "\u56AB>'['ch\u00e8n']'",
+ "\u56AC>'['p\u00edn']'",
+ "\u56AD>'['p\u012D']'",
+ "\u56AE>'['xi\u00e0ng']'",
+ "\u56AF>'['hu\u00f2']'",
+ "\u56B0>'['m\u00f3']'",
+ "\u56B1>'['x\u00ec']'",
+ "\u56B2>'['du\u014F']'",
+ "\u56B3>'['k\u00f9']'",
+ "\u56B4>'['y\u00e1n']'",
+ "\u56B5>'['ch\u00e1n']'",
+ "\u56B6>'[ying]'",
+ "\u56B7>'['r\u0103ng']'",
+ "\u56B8>'['di\u0103n']'",
+ "\u56B9>'[la]'",
+ "\u56BA>'['t\u00e0']'",
+ "\u56BB>'[xiao]'",
+ "\u56BC>'['jia\u00f3']'",
+ "\u56BD>'['chu\u00f2']'",
+ "\u56BE>'[huan]'",
+ "\u56BF>'['hu\u00f2']'",
+ "\u56C0>'['zhu\u00e0n']'",
+ "\u56C1>'['ni\u00e8']'",
+ "\u56C2>'[xiao]'",
+ "\u56C3>'['c\u00e0']'",
+ "\u56C4>'['l\u00ed']'",
+ "\u56C5>'['ch\u0103n']'",
+ "\u56C6>'['cha\u00ec']'",
+ "\u56C7>'['l\u00ec']'",
+ "\u56C8>'['y\u00ec']'",
+ "\u56C9>'[luo]'",
+ "\u56CA>'['n\u00e1ng']'",
+ "\u56CB>'['z\u00e0n']'",
+ "\u56CC>'[su]'",
+ "\u56CD>'['x\u012D']'",
+ "\u56CF>'[jian]'",
+ "\u56D0>'['z\u00e1']'",
+ "\u56D1>'['zh\u016D']'",
+ "\u56D2>'['l\u00e1n']'",
+ "\u56D3>'['ni\u00e8']'",
+ "\u56D4>'[nang]'",
+ "\u56D7>'['we\u00ed']'",
+ "\u56D8>'['hu\u00ed']'",
+ "\u56D9>'[yin]'",
+ "\u56DA>'['qi\u00fa']'",
+ "\u56DB>'['s\u00ec']'",
+ "\u56DC>'['n\u00edn']'",
+ "\u56DD>'['ji\u0103n']'",
+ "\u56DE>'['hu\u00ed']'",
+ "\u56DF>'['x\u00ecn']'",
+ "\u56E0>'[yin]'",
+ "\u56E1>'[nan]'",
+ "\u56E2>'['tu\u00e1n']'",
+ "\u56E3>'['tu\u00e1n']'",
+ "\u56E4>'['d\u00f9n']'",
+ "\u56E5>'['k\u00e0ng']'",
+ "\u56E6>'[yuan]'",
+ "\u56E7>'['ji\u014Fng']'",
+ "\u56E8>'[pian]'",
+ "\u56E9>'['y\u00f9n']'",
+ "\u56EA>'[cong]'",
+ "\u56EB>'['h\u00fa']'",
+ "\u56EC>'['hu\u00ed']'",
+ "\u56ED>'['yu\u00e1n']'",
+ "\u56EE>'['yo\u00fa']'",
+ "\u56EF>'['gu\u00f3']'",
+ "\u56F0>'['k\u00f9n']'",
+ "\u56F1>'[cong]'",
+ "\u56F2>'['we\u00ed']'",
+ "\u56F3>'['t\u00fa']'",
+ "\u56F4>'['we\u00ed']'",
+ "\u56F5>'['l\u00fan']'",
+ "\u56F6>'['gu\u00f3']'",
+ "\u56F7>'[qun]'",
+ "\u56F8>'['r\u00ec']'",
+ "\u56F9>'['l\u00edng']'",
+ "\u56FA>'['g\u00f9']'",
+ "\u56FB>'['gu\u00f3']'",
+ "\u56FC>'[tai]'",
+ "\u56FD>'['gu\u00f3']'",
+ "\u56FE>'['t\u00fa']'",
+ "\u56FF>'['yo\u00f9']'",
+ "\u5700>'['gu\u00f3']'",
+ "\u5701>'['y\u00edn']'",
+ "\u5702>'['h\u00f9n']'",
+ "\u5703>'['p\u016D']'",
+ "\u5704>'['y\u016D']'",
+ "\u5705>'['h\u00e1n']'",
+ "\u5706>'['yu\u00e1n']'",
+ "\u5707>'['l\u00fan']'",
+ "\u5708>'[quan]'",
+ "\u5709>'['y\u016D']'",
+ "\u570A>'[qing]'",
+ "\u570B>'['gu\u00f3']'",
+ "\u570C>'['chu\u00e1n']'",
+ "\u570D>'['we\u00ed']'",
+ "\u570E>'['yu\u00e1n']'",
+ "\u570F>'[quan]'",
+ "\u5710>'[ku]'",
+ "\u5711>'['f\u00f9']'",
+ "\u5712>'['yu\u00e1n']'",
+ "\u5713>'['yu\u00e1n']'",
+ "\u5714>'['\u00e8']'",
+ "\u5715>'['tu2shu1gu\u0103n']'",
+ "\u5716>'['t\u00fa']'",
+ "\u5717>'['t\u00fa']'",
+ "\u5718>'['tu\u00e1n']'",
+ "\u5719>'['l\u00fc\u00e8']'",
+ "\u571A>'['hu\u00ec']'",
+ "\u571B>'['y\u00ec']'",
+ "\u571C>'['yu\u00e1n']'",
+ "\u571D>'['lu\u00e1n']'",
+ "\u571E>'['lu\u00e1n']'",
+ "\u571F>'['t\u016D']'",
+ "\u5720>'['y\u00e0']'",
+ "\u5721>'['t\u016D']'",
+ "\u5722>'[ting]'",
+ "\u5723>'['sh\u00e8ng']'",
+ "\u5724>'['p\u016D']'",
+ "\u5725>'['l\u00f9']'",
+ "\u5727>'[ya]'",
+ "\u5728>'['za\u00ec']'",
+ "\u5729>'['we\u00ed']'",
+ "\u572A>'[ge]'",
+ "\u572B>'['y\u00f9']'",
+ "\u572C>'[wu]'",
+ "\u572D>'[gui]'",
+ "\u572E>'['p\u012D']'",
+ "\u572F>'['y\u00ed']'",
+ "\u5730>'['d\u00ec']'",
+ "\u5731>'[qian]'",
+ "\u5732>'[qian]'",
+ "\u5733>'['zh\u00e8n']'",
+ "\u5734>'['zhu\u00f3']'",
+ "\u5735>'['d\u00e0ng']'",
+ "\u5736>'['qi\u00e0']'",
+ "\u5739>'['ku\u00e0ng']'",
+ "\u573A>'['ch\u00e1ng']'",
+ "\u573B>'['q\u00ed']'",
+ "\u573C>'['ni\u00e8']'",
+ "\u573D>'['m\u00f2']'",
+ "\u573E>'['j\u00ed']'",
+ "\u573F>'['ji\u00e1']'",
+ "\u5740>'['zh\u012D']'",
+ "\u5741>'['zh\u012D']'",
+ "\u5742>'['b\u0103n']'",
+ "\u5743>'[xun]'",
+ "\u5744>'['to\u00fa']'",
+ "\u5745>'['q\u012Dn']'",
+ "\u5746>'['f\u00e9n']'",
+ "\u5747>'[jun]'",
+ "\u5748>'[keng]'",
+ "\u5749>'['t\u00fan']'",
+ "\u574A>'[fang]'",
+ "\u574B>'['f\u00e8n']'",
+ "\u574C>'['b\u00e8n']'",
+ "\u574D>'[tan]'",
+ "\u574E>'['k\u0103n']'",
+ "\u574F>'[pi]'",
+ "\u5750>'['zu\u00f2']'",
+ "\u5751>'[keng]'",
+ "\u5752>'['b\u00ec']'",
+ "\u5753>'['x\u00edng']'",
+ "\u5754>'['d\u00ec']'",
+ "\u5755>'[jing]'",
+ "\u5756>'['j\u00ec']'",
+ "\u5757>'['kua\u00ec']'",
+ "\u5758>'['d\u012D']'",
+ "\u5759>'[jing]'",
+ "\u575A>'[jian]'",
+ "\u575B>'['t\u00e1n']'",
+ "\u575C>'['l\u00ec']'",
+ "\u575D>'['b\u00e0']'",
+ "\u575E>'['w\u00f9']'",
+ "\u575F>'['f\u00e9n']'",
+ "\u5760>'['zhu\u00ec']'",
+ "\u5761>'[po]'",
+ "\u5762>'['p\u0103n']'",
+ "\u5763>'[tang]'",
+ "\u5764>'[kun]'",
+ "\u5765>'[qu]'",
+ "\u5766>'['t\u0103n']'",
+ "\u5767>'[zhi]'",
+ "\u5768>'['tu\u00f3']'",
+ "\u5769>'[gan]'",
+ "\u576A>'['p\u00edng']'",
+ "\u576B>'['di\u00e0n']'",
+ "\u576C>'['gu\u00e0']'",
+ "\u576D>'['n\u00ed']'",
+ "\u576E>'['ta\u00ed']'",
+ "\u576F>'[pi]'",
+ "\u5770>'[jiong]'",
+ "\u5771>'['y\u0103ng']'",
+ "\u5772>'['f\u00f3']'",
+ "\u5773>'['a\u00f2']'",
+ "\u5774>'['li\u00f9']'",
+ "\u5775>'[qiu]'",
+ "\u5776>'['m\u00f9']'",
+ "\u5777>'['k\u0115']'",
+ "\u5778>'['go\u00f9']'",
+ "\u5779>'['xu\u00e8']'",
+ "\u577A>'['b\u00e1']'",
+ "\u577B>'['ch\u00ed']'",
+ "\u577C>'['ch\u00e8']'",
+ "\u577D>'['l\u00edng']'",
+ "\u577E>'['zh\u00f9']'",
+ "\u577F>'['f\u00f9']'",
+ "\u5780>'[hu]'",
+ "\u5781>'['zh\u00ec']'",
+ "\u5782>'['chu\u00ed']'",
+ "\u5783>'[la]'",
+ "\u5784>'['l\u014Fng']'",
+ "\u5785>'['l\u014Fng']'",
+ "\u5786>'['l\u00fa']'",
+ "\u5787>'['a\u00f2']'",
+ "\u5789>'['pa\u00f3']'",
+ "\u578B>'['x\u00edng']'",
+ "\u578C>'['d\u00f2ng']'",
+ "\u578D>'['j\u00ec']'",
+ "\u578E>'['k\u00e8']'",
+ "\u578F>'['l\u00f9']'",
+ "\u5790>'['c\u00ed']'",
+ "\u5791>'['ch\u012D']'",
+ "\u5792>'['le\u012D']'",
+ "\u5793>'[gai]'",
+ "\u5794>'[yin]'",
+ "\u5795>'['ho\u00f9']'",
+ "\u5796>'[dui]'",
+ "\u5797>'['zha\u00f2']'",
+ "\u5798>'['f\u00fa']'",
+ "\u5799>'[guang]'",
+ "\u579A>'['ya\u00f3']'",
+ "\u579B>'['du\u014F']'",
+ "\u579C>'['du\u014F']'",
+ "\u579D>'['gu\u012D']'",
+ "\u579E>'['ch\u00e1']'",
+ "\u579F>'['y\u00e1ng']'",
+ "\u57A0>'['y\u00edn']'",
+ "\u57A1>'['f\u00e1']'",
+ "\u57A2>'['go\u00f9']'",
+ "\u57A3>'['yu\u00e1n']'",
+ "\u57A4>'['di\u00e9']'",
+ "\u57A5>'['xi\u00e9']'",
+ "\u57A6>'['k\u0115n']'",
+ "\u57A7>'[jiong]'",
+ "\u57A8>'['sho\u016D']'",
+ "\u57A9>'['\u00e8']'",
+ "\u57AB>'['di\u00e0n']'",
+ "\u57AC>'['h\u00f3ng']'",
+ "\u57AD>'['w\u00f9']'",
+ "\u57AE>'['ku\u0103']'",
+ "\u57B1>'['d\u00e0ng']'",
+ "\u57B2>'['ka\u012D']'",
+ "\u57B4>'['na\u014F']'",
+ "\u57B5>'['\u0103n']'",
+ "\u57B6>'[xing]'",
+ "\u57B7>'['xi\u00e0n']'",
+ "\u57B8>'['hu\u00e0n']'",
+ "\u57B9>'[bang]'",
+ "\u57BA>'[pei]'",
+ "\u57BB>'['b\u00e0']'",
+ "\u57BC>'['y\u00ec']'",
+ "\u57BD>'['y\u00ecn']'",
+ "\u57BE>'['h\u00e0n']'",
+ "\u57BF>'['x\u00f9']'",
+ "\u57C0>'['chu\u00ed']'",
+ "\u57C1>'['c\u00e9n']'",
+ "\u57C2>'['g\u0115ng']'",
+ "\u57C3>'[ai]'",
+ "\u57C4>'['p\u00e9ng']'",
+ "\u57C5>'['f\u00e1ng']'",
+ "\u57C6>'['qu\u00e8']'",
+ "\u57C7>'['y\u014Fng']'",
+ "\u57C8>'['x\u00f9n']'",
+ "\u57C9>'['ji\u00e1']'",
+ "\u57CA>'['d\u00ec']'",
+ "\u57CB>'['ma\u00ed']'",
+ "\u57CC>'['l\u00e0ng']'",
+ "\u57CD>'['xu\u00e0n']'",
+ "\u57CE>'['ch\u00e9ng']'",
+ "\u57CF>'['y\u00e1n']'",
+ "\u57D0>'[jin]'",
+ "\u57D1>'['zh\u00e9']'",
+ "\u57D2>'['le\u00ec']'",
+ "\u57D3>'['li\u00e8']'",
+ "\u57D4>'['b\u00f9']'",
+ "\u57D5>'['ch\u00e9ng']'",
+ "\u57D7>'['b\u00f9']'",
+ "\u57D8>'['sh\u00ed']'",
+ "\u57D9>'[xun]'",
+ "\u57DA>'[guo]'",
+ "\u57DB>'[jiong]'",
+ "\u57DC>'['y\u0115']'",
+ "\u57DD>'['ni\u00e0n']'",
+ "\u57DE>'['d\u012D']'",
+ "\u57DF>'['y\u00f9']'",
+ "\u57E0>'['b\u00f9']'",
+ "\u57E1>'['y\u00e0']'",
+ "\u57E2>'['ju\u0103n']'",
+ "\u57E3>'['su\u00ec']'",
+ "\u57E4>'['p\u00ed']'",
+ "\u57E5>'[cheng]'",
+ "\u57E6>'['w\u0103n']'",
+ "\u57E7>'['j\u00f9']'",
+ "\u57E8>'['l\u016Dn']'",
+ "\u57E9>'[zheng]'",
+ "\u57EA>'[kong]'",
+ "\u57EB>'['ch\u014Fng']'",
+ "\u57EC>'[dong]'",
+ "\u57ED>'['da\u00ec']'",
+ "\u57EE>'['t\u00e0n']'",
+ "\u57EF>'['\u0103n']'",
+ "\u57F0>'['ca\u00ec']'",
+ "\u57F1>'['sh\u00fa']'",
+ "\u57F2>'['b\u0115ng']'",
+ "\u57F3>'['k\u0103n']'",
+ "\u57F4>'['zh\u00ed']'",
+ "\u57F5>'['du\u014F']'",
+ "\u57F6>'['y\u00ec']'",
+ "\u57F7>'['zh\u00ed']'",
+ "\u57F8>'['y\u00ec']'",
+ "\u57F9>'['pe\u00ed']'",
+ "\u57FA>'[ji]'",
+ "\u57FB>'['zh\u016Dn']'",
+ "\u57FC>'['q\u00ed']'",
+ "\u57FD>'['sa\u00f2']'",
+ "\u57FE>'['j\u00f9']'",
+ "\u57FF>'['n\u00ed']'",
+ "\u5800>'[ku]'",
+ "\u5801>'['k\u00e8']'",
+ "\u5802>'['t\u00e1ng']'",
+ "\u5803>'[kun]'",
+ "\u5804>'['n\u00ec']'",
+ "\u5805>'[jian]'",
+ "\u5806>'[dui]'",
+ "\u5807>'['j\u012Dn']'",
+ "\u5808>'[gang]'",
+ "\u5809>'['y\u00f9']'",
+ "\u580A>'['\u00e8']'",
+ "\u580B>'['p\u00e9ng']'",
+ "\u580C>'['g\u00f9']'",
+ "\u580D>'['t\u00f9']'",
+ "\u580E>'['l\u00e8ng']'",
+ "\u5810>'['y\u00e1']'",
+ "\u5811>'['qi\u00e0n']'",
+ "\u5813>'['\u00e0n']'",
+ "\u5815>'['du\u00f2']'",
+ "\u5816>'['na\u014F']'",
+ "\u5817>'[tu]'",
+ "\u5818>'['ch\u00e9ng']'",
+ "\u5819>'[yin]'",
+ "\u581A>'['h\u00fan']'",
+ "\u581B>'['b\u00ec']'",
+ "\u581C>'['li\u00e0n']'",
+ "\u581D>'[guo]'",
+ "\u581E>'['di\u00e9']'",
+ "\u581F>'['zhu\u00e0n']'",
+ "\u5820>'['ho\u00f9']'",
+ "\u5821>'['ba\u014F']'",
+ "\u5822>'['ba\u014F']'",
+ "\u5823>'['y\u00fa']'",
+ "\u5824>'[di]'",
+ "\u5825>'['ma\u00f3']'",
+ "\u5826>'[jie]'",
+ "\u5827>'['ru\u00e1n']'",
+ "\u5828>'['\u00e8']'",
+ "\u5829>'['g\u00e8ng']'",
+ "\u582A>'[kan]'",
+ "\u582B>'[zong]'",
+ "\u582C>'['y\u00fa']'",
+ "\u582D>'['hu\u00e1ng']'",
+ "\u582E>'['\u00e8']'",
+ "\u582F>'['ya\u00f3']'",
+ "\u5830>'['y\u00e0n']'",
+ "\u5831>'['ba\u00f2']'",
+ "\u5832>'['j\u00ed']'",
+ "\u5833>'['me\u00ed']'",
+ "\u5834>'['ch\u00e1ng']'",
+ "\u5835>'['d\u016D']'",
+ "\u5836>'['tu\u00f3']'",
+ "\u5837>'['y\u00ecn']'",
+ "\u5838>'['f\u00e9ng']'",
+ "\u5839>'['zh\u00f2ng']'",
+ "\u583A>'['ji\u00e8']'",
+ "\u583B>'[zhen]'",
+ "\u583C>'[feng]'",
+ "\u583D>'[gang]'",
+ "\u583E>'['chu\u0103n']'",
+ "\u583F>'['ji\u0103n']'",
+ "\u5842>'['xi\u00e0ng']'",
+ "\u5843>'[huang]'",
+ "\u5844>'['l\u00e9ng']'",
+ "\u5845>'['du\u00e0n']'",
+ "\u5847>'[xuan]'",
+ "\u5848>'['j\u00ec']'",
+ "\u5849>'['j\u00ed']'",
+ "\u584A>'['kua\u00ec']'",
+ "\u584B>'['y\u00edng']'",
+ "\u584C>'[ta]'",
+ "\u584D>'['ch\u00e9ng']'",
+ "\u584E>'['y\u014Fng']'",
+ "\u584F>'['ka\u012D']'",
+ "\u5850>'['s\u00f9']'",
+ "\u5851>'['s\u00f9']'",
+ "\u5852>'['sh\u00ed']'",
+ "\u5853>'['m\u00ec']'",
+ "\u5854>'['t\u0103']'",
+ "\u5855>'['w\u0115ng']'",
+ "\u5856>'['ch\u00e9ng']'",
+ "\u5857>'['t\u00fa']'",
+ "\u5858>'['t\u00e1ng']'",
+ "\u5859>'['qu\u00e8']'",
+ "\u585A>'['zh\u014Fng']'",
+ "\u585B>'['l\u00ec']'",
+ "\u585C>'['p\u00e9ng']'",
+ "\u585D>'['b\u00e0ng']'",
+ "\u585E>'[sai]'",
+ "\u585F>'['z\u00e0ng']'",
+ "\u5860>'[dui]'",
+ "\u5861>'['ti\u00e1n']'",
+ "\u5862>'['w\u00f9']'",
+ "\u5863>'['ch\u0115ng']'",
+ "\u5864>'[xun]'",
+ "\u5865>'['g\u00e9']'",
+ "\u5866>'['zh\u00e8n']'",
+ "\u5867>'['a\u00ec']'",
+ "\u5868>'[gong]'",
+ "\u5869>'['y\u00e1n']'",
+ "\u586A>'['k\u0103n']'",
+ "\u586B>'['ti\u00e1n']'",
+ "\u586C>'['yu\u00e1n']'",
+ "\u586D>'[wen]'",
+ "\u586E>'['xi\u00e8']'",
+ "\u586F>'['li\u00f9']'",
+ "\u5871>'['l\u0103ng']'",
+ "\u5872>'['ch\u00e1ng']'",
+ "\u5873>'['p\u00e9ng']'",
+ "\u5874>'['b\u00e8ng']'",
+ "\u5875>'['ch\u00e9n']'",
+ "\u5876>'['c\u00f9']'",
+ "\u5877>'['l\u016D']'",
+ "\u5878>'['o\u016D']'",
+ "\u5879>'['qi\u00e0n']'",
+ "\u587A>'['me\u00ed']'",
+ "\u587B>'['m\u00f2']'",
+ "\u587C>'[zhuan]'",
+ "\u587D>'['shu\u0103ng']'",
+ "\u587E>'['sh\u00fa']'",
+ "\u587F>'['lo\u016D']'",
+ "\u5880>'['ch\u00ed']'",
+ "\u5881>'['m\u00e0n']'",
+ "\u5882>'[biao]'",
+ "\u5883>'['j\u00ecng']'",
+ "\u5884>'[qi]'",
+ "\u5885>'['sh\u00f9']'",
+ "\u5886>'['d\u00ec']'",
+ "\u5887>'[zhang]'",
+ "\u5888>'['k\u00e0n']'",
+ "\u5889>'[yong]'",
+ "\u588A>'['di\u00e0n']'",
+ "\u588B>'['ch\u0115n']'",
+ "\u588C>'[zhi]'",
+ "\u588D>'['x\u00ec']'",
+ "\u588E>'[guo]'",
+ "\u588F>'['qi\u0103ng']'",
+ "\u5890>'['j\u00ecn']'",
+ "\u5891>'[di]'",
+ "\u5892>'[shang]'",
+ "\u5893>'['m\u00f9']'",
+ "\u5894>'[cui]'",
+ "\u5895>'['y\u00e0n']'",
+ "\u5896>'['t\u0103']'",
+ "\u5897>'[zeng]'",
+ "\u5898>'['q\u00ed']'",
+ "\u5899>'['qi\u00e1ng']'",
+ "\u589A>'['li\u00e1ng']'",
+ "\u589C>'['zhu\u00ec']'",
+ "\u589D>'[qiao]'",
+ "\u589E>'[zeng]'",
+ "\u589F>'[xu]'",
+ "\u58A0>'['sh\u00e0n']'",
+ "\u58A1>'['sh\u00e0n']'",
+ "\u58A2>'['b\u00e1']'",
+ "\u58A3>'[pu]'",
+ "\u58A4>'['kua\u00ec']'",
+ "\u58A5>'['d\u014Fng']'",
+ "\u58A6>'['f\u00e1n']'",
+ "\u58A7>'['qu\u00e8']'",
+ "\u58A8>'['m\u00f2']'",
+ "\u58A9>'[dun]'",
+ "\u58AA>'[dun]'",
+ "\u58AB>'[dun]'",
+ "\u58AC>'['d\u00ec']'",
+ "\u58AD>'['sh\u00e8ng']'",
+ "\u58AE>'['du\u00f2']'",
+ "\u58AF>'['du\u00f2']'",
+ "\u58B0>'['t\u00e1n']'",
+ "\u58B1>'['d\u00e8ng']'",
+ "\u58B2>'['w\u016D']'",
+ "\u58B3>'['f\u00e9n']'",
+ "\u58B4>'['hu\u00e1ng']'",
+ "\u58B5>'['t\u00e1n']'",
+ "\u58B6>'[da]'",
+ "\u58B7>'['y\u00e8']'",
+ "\u58BA>'['y\u00f9']'",
+ "\u58BB>'['qi\u00e1ng']'",
+ "\u58BC>'[ji]'",
+ "\u58BD>'[qiao]'",
+ "\u58BE>'['k\u0115n']'",
+ "\u58BF>'['y\u00ec']'",
+ "\u58C0>'['p\u00ed']'",
+ "\u58C1>'['b\u00ec']'",
+ "\u58C2>'['di\u00e0n']'",
+ "\u58C3>'[jiang]'",
+ "\u58C4>'['y\u0115']'",
+ "\u58C5>'[yong]'",
+ "\u58C6>'['b\u00f3']'",
+ "\u58C7>'['t\u00e1n']'",
+ "\u58C8>'['l\u0103n']'",
+ "\u58C9>'['j\u00f9']'",
+ "\u58CA>'['hua\u00ec']'",
+ "\u58CB>'['d\u00e0ng']'",
+ "\u58CC>'['r\u0103ng']'",
+ "\u58CD>'['qi\u00e0n']'",
+ "\u58CE>'[xun]'",
+ "\u58CF>'['l\u00e0n']'",
+ "\u58D0>'['x\u012D']'",
+ "\u58D1>'['h\u00e8']'",
+ "\u58D2>'['a\u00ec']'",
+ "\u58D3>'[ya]'",
+ "\u58D4>'['da\u014F']'",
+ "\u58D5>'['ha\u00f3']'",
+ "\u58D6>'['ru\u00e1n']'",
+ "\u58D8>'['le\u012D']'",
+ "\u58D9>'['ku\u00e0ng']'",
+ "\u58DA>'['l\u00fa']'",
+ "\u58DB>'['y\u00e1n']'",
+ "\u58DC>'['t\u00e1n']'",
+ "\u58DD>'['we\u00ed']'",
+ "\u58DE>'['hua\u00ec']'",
+ "\u58DF>'['l\u014Fng']'",
+ "\u58E0>'['l\u014Fng']'",
+ "\u58E1>'['ru\u00ec']'",
+ "\u58E2>'['l\u00ec']'",
+ "\u58E3>'['l\u00edn']'",
+ "\u58E4>'['r\u0103ng']'",
+ "\u58E6>'[xun]'",
+ "\u58E7>'['y\u00e1n']'",
+ "\u58E8>'['le\u00ed']'",
+ "\u58E9>'['b\u00e0']'",
+ "\u58EB>'['sh\u00ec']'",
+ "\u58EC>'['r\u00e9n']'",
+ "\u58EE>'['zhu\u00e0ng']'",
+ "\u58EF>'['zhu\u00e0ng']'",
+ "\u58F0>'[sheng]'",
+ "\u58F1>'[yi]'",
+ "\u58F2>'['ma\u00ec']'",
+ "\u58F3>'['k\u00e9']'",
+ "\u58F4>'['zh\u016D']'",
+ "\u58F5>'['zhu\u00e0ng']'",
+ "\u58F6>'['h\u00fa']'",
+ "\u58F7>'['h\u00fa']'",
+ "\u58F8>'['k\u016Dn']'",
+ "\u58F9>'[yi]'",
+ "\u58FA>'['h\u00fa']'",
+ "\u58FB>'['x\u00f9']'",
+ "\u58FC>'['k\u016Dn']'",
+ "\u58FD>'['sho\u00f9']'",
+ "\u58FE>'['m\u0103ng']'",
+ "\u58FF>'['z\u016Dn']'",
+ "\u5900>'['sho\u00f9']'",
+ "\u5901>'[yi]'",
+ "\u5902>'['zh\u012D']'",
+ "\u5903>'[gu]'",
+ "\u5904>'['ch\u00f9']'",
+ "\u5905>'['ji\u00e0ng']'",
+ "\u5906>'[feng]'",
+ "\u5907>'['be\u00ec']'",
+ "\u5909>'['bi\u00e0n']'",
+ "\u590A>'[sui]'",
+ "\u590B>'[qun]'",
+ "\u590C>'['l\u00edng']'",
+ "\u590D>'['f\u00f9']'",
+ "\u590E>'['zu\u00f2']'",
+ "\u590F>'['xi\u00e0']'",
+ "\u5910>'['xi\u00f2ng']'",
+ "\u5912>'['na\u00f3']'",
+ "\u5913>'['xi\u00e0']'",
+ "\u5914>'['ku\u00ed']'",
+ "\u5915>'[xi]'",
+ "\u5916>'['wa\u00ec']'",
+ "\u5917>'['yu\u00e0n']'",
+ "\u5918>'['ma\u014F']'",
+ "\u5919>'['s\u00f9']'",
+ "\u591A>'[duo]'",
+ "\u591B>'[duo]'",
+ "\u591C>'['y\u00e8']'",
+ "\u591D>'['q\u00edng']'",
+ "\u591F>'['go\u00f9']'",
+ "\u5920>'['go\u00f9']'",
+ "\u5921>'['q\u00ec']'",
+ "\u5922>'['m\u00e8ng']'",
+ "\u5923>'['m\u00e8ng']'",
+ "\u5924>'['y\u00edn']'",
+ "\u5925>'['hu\u014F']'",
+ "\u5926>'['ch\u00e8n']'",
+ "\u5927>'['d\u00e0']'",
+ "\u5928>'['z\u00e8']'",
+ "\u5929>'[tian]'",
+ "\u592A>'['ta\u00ec']'",
+ "\u592B>'[fu]'",
+ "\u592C>'['gua\u00ec']'",
+ "\u592D>'['ya\u014F']'",
+ "\u592E>'[yang]'",
+ "\u592F>'[hang]'",
+ "\u5930>'['ga\u014F']'",
+ "\u5931>'[shi]'",
+ "\u5932>'['b\u0115n']'",
+ "\u5933>'['ta\u00ec']'",
+ "\u5934>'['to\u00fa']'",
+ "\u5935>'['y\u0103n']'",
+ "\u5936>'['b\u012D']'",
+ "\u5937>'['y\u00ed']'",
+ "\u5938>'[kua]'",
+ "\u5939>'[jia]'",
+ "\u593A>'['du\u00f3']'",
+ "\u593C>'['ku\u0103ng']'",
+ "\u593D>'['y\u00f9n']'",
+ "\u593E>'[jia]'",
+ "\u593F>'[pa]'",
+ "\u5940>'[en]'",
+ "\u5941>'['li\u00e1n']'",
+ "\u5942>'['hu\u00e0n']'",
+ "\u5943>'['d\u00ec']'",
+ "\u5944>'['y\u0103n']'",
+ "\u5945>'['pa\u00f2']'",
+ "\u5946>'['qu\u0103n']'",
+ "\u5947>'['q\u00ed']'",
+ "\u5948>'['na\u00ec']'",
+ "\u5949>'['f\u00e8ng']'",
+ "\u594A>'['xi\u00e9']'",
+ "\u594B>'['f\u00e8n']'",
+ "\u594C>'['di\u0103n']'",
+ "\u594E>'['ku\u00ed']'",
+ "\u594F>'['zo\u00f9']'",
+ "\u5950>'['hu\u00e0n']'",
+ "\u5951>'['q\u00ec']'",
+ "\u5952>'[kai]'",
+ "\u5953>'['zh\u00e0']'",
+ "\u5954>'[ben]'",
+ "\u5955>'['y\u00ec']'",
+ "\u5956>'['ji\u0103ng']'",
+ "\u5957>'['ta\u00f2']'",
+ "\u5958>'['z\u00e0ng']'",
+ "\u5959>'['b\u0115n']'",
+ "\u595A>'[xi]'",
+ "\u595B>'['xi\u0103ng']'",
+ "\u595C>'['fe\u012D']'",
+ "\u595D>'[diao]'",
+ "\u595E>'['x\u00f9n']'",
+ "\u595F>'[keng]'",
+ "\u5960>'['di\u00e0n']'",
+ "\u5961>'['a\u00f2']'",
+ "\u5962>'[she]'",
+ "\u5963>'['w\u0115ng']'",
+ "\u5964>'['p\u0103n']'",
+ "\u5965>'['a\u00f2']'",
+ "\u5966>'['w\u00f9']'",
+ "\u5967>'['a\u00f2']'",
+ "\u5968>'['ji\u0103ng']'",
+ "\u5969>'['li\u00e1n']'",
+ "\u596A>'['du\u00f3']'",
+ "\u596B>'[yun]'",
+ "\u596C>'['ji\u0103ng']'",
+ "\u596D>'['sh\u00ec']'",
+ "\u596E>'['f\u00e8n']'",
+ "\u596F>'['hu\u00f2']'",
+ "\u5970>'['b\u00ec']'",
+ "\u5971>'['li\u00e1n']'",
+ "\u5972>'['du\u014F']'",
+ "\u5973>'['n\u01DA']'",
+ "\u5974>'['n\u00fa']'",
+ "\u5975>'[ding]'",
+ "\u5976>'['na\u012D']'",
+ "\u5977>'[qian]'",
+ "\u5978>'[jian]'",
+ "\u5979>'[ta]'",
+ "\u597A>'['ji\u016D']'",
+ "\u597B>'['n\u00e1n']'",
+ "\u597C>'['ch\u00e0']'",
+ "\u597D>'['ha\u014F']'",
+ "\u597E>'[xian]'",
+ "\u597F>'['f\u00e0n']'",
+ "\u5980>'['j\u012D']'",
+ "\u5981>'['shu\u00f2']'",
+ "\u5982>'['r\u00fa']'",
+ "\u5983>'[fei]'",
+ "\u5984>'['w\u00e0ng']'",
+ "\u5985>'['h\u00f3ng']'",
+ "\u5986>'[zhuang]'",
+ "\u5987>'['f\u00f9']'",
+ "\u5988>'[ma]'",
+ "\u5989>'[dan]'",
+ "\u598A>'['r\u00e8n']'",
+ "\u598B>'[fu]'",
+ "\u598C>'['j\u00ecng']'",
+ "\u598D>'['y\u00e1n']'",
+ "\u598E>'['xi\u00e8']'",
+ "\u598F>'['w\u00e8n']'",
+ "\u5990>'[zhong]'",
+ "\u5991>'[pa]'",
+ "\u5992>'['d\u00f9']'",
+ "\u5993>'['j\u00ec']'",
+ "\u5994>'[keng]'",
+ "\u5995>'['zh\u00f2ng']'",
+ "\u5996>'[yao]'",
+ "\u5997>'['j\u00ecn']'",
+ "\u5998>'['y\u00fan']'",
+ "\u5999>'['mia\u00f2']'",
+ "\u599A>'[pei]'",
+ "\u599C>'['yu\u00e8']'",
+ "\u599D>'[zhuang]'",
+ "\u599E>'[niu]'",
+ "\u599F>'['y\u00e0n']'",
+ "\u59A0>'['n\u00e0']'",
+ "\u59A1>'[xin]'",
+ "\u59A2>'['f\u00e9n']'",
+ "\u59A3>'['b\u012D']'",
+ "\u59A4>'['y\u00fa']'",
+ "\u59A5>'['tu\u014F']'",
+ "\u59A6>'[feng]'",
+ "\u59A7>'['yu\u00e1n']'",
+ "\u59A8>'['f\u00e1ng']'",
+ "\u59A9>'['w\u016D']'",
+ "\u59AA>'['y\u00f9']'",
+ "\u59AB>'[gui]'",
+ "\u59AC>'['d\u00f9']'",
+ "\u59AD>'['b\u00e1']'",
+ "\u59AE>'[ni]'",
+ "\u59AF>'['zho\u00fa']'",
+ "\u59B0>'['zhu\u00f3']'",
+ "\u59B1>'[zhao]'",
+ "\u59B2>'['d\u00e1']'",
+ "\u59B3>'['na\u012D']'",
+ "\u59B4>'['yu\u0103n']'",
+ "\u59B5>'['to\u016D']'",
+ "\u59B6>'['xu\u00e1n']'",
+ "\u59B7>'['zh\u00ed']'",
+ "\u59B8>'[e]'",
+ "\u59B9>'['me\u00ec']'",
+ "\u59BA>'['m\u00f2']'",
+ "\u59BB>'[qi]'",
+ "\u59BC>'['b\u00ec']'",
+ "\u59BD>'[shen]'",
+ "\u59BE>'['qi\u00e8']'",
+ "\u59BF>'[e]'",
+ "\u59C0>'['h\u00e9']'",
+ "\u59C1>'['x\u016D']'",
+ "\u59C2>'['f\u00e1']'",
+ "\u59C3>'[zheng]'",
+ "\u59C4>'['m\u00edn']'",
+ "\u59C5>'['b\u00e0n']'",
+ "\u59C6>'['m\u016D']'",
+ "\u59C7>'[fu]'",
+ "\u59C8>'['l\u00edng']'",
+ "\u59C9>'['z\u012D']'",
+ "\u59CA>'['z\u012D']'",
+ "\u59CB>'['sh\u012D']'",
+ "\u59CC>'['r\u0103n']'",
+ "\u59CD>'[shan]'",
+ "\u59CE>'[yang]'",
+ "\u59CF>'['m\u00e1n']'",
+ "\u59D0>'['ji\u0115']'",
+ "\u59D1>'[gu]'",
+ "\u59D2>'['s\u00ec']'",
+ "\u59D3>'['x\u00ecng']'",
+ "\u59D4>'['we\u012D']'",
+ "\u59D5>'[zi]'",
+ "\u59D6>'['j\u00f9']'",
+ "\u59D7>'[shan]'",
+ "\u59D8>'[pin]'",
+ "\u59D9>'['r\u00e8n']'",
+ "\u59DA>'['ya\u00f3']'",
+ "\u59DB>'['t\u014Fng']'",
+ "\u59DC>'[jiang]'",
+ "\u59DD>'[shu]'",
+ "\u59DE>'['j\u00ed']'",
+ "\u59DF>'[gai]'",
+ "\u59E0>'['sh\u00e0ng']'",
+ "\u59E1>'['ku\u00f2']'",
+ "\u59E2>'[juan]'",
+ "\u59E3>'[jiao]'",
+ "\u59E4>'['go\u00f9']'",
+ "\u59E5>'['m\u016D']'",
+ "\u59E6>'[jian]'",
+ "\u59E7>'[jian]'",
+ "\u59E8>'['y\u00ed']'",
+ "\u59E9>'['ni\u00e0n']'",
+ "\u59EA>'['zh\u00ed']'",
+ "\u59EB>'[ji]'",
+ "\u59EC>'[ji]'",
+ "\u59ED>'['xi\u00e0n']'",
+ "\u59EE>'['h\u00e9ng']'",
+ "\u59EF>'[guang]'",
+ "\u59F0>'[jun]'",
+ "\u59F1>'[kua]'",
+ "\u59F2>'['y\u00e0n']'",
+ "\u59F3>'['m\u012Dng']'",
+ "\u59F4>'['li\u00e8']'",
+ "\u59F5>'['pe\u00ec']'",
+ "\u59F6>'['y\u0103n']'",
+ "\u59F7>'['yo\u00f9']'",
+ "\u59F8>'['y\u00e1n']'",
+ "\u59F9>'['ch\u00e0']'",
+ "\u59FA>'[shen]'",
+ "\u59FB>'[yin]'",
+ "\u59FC>'['ch\u012D']'",
+ "\u59FD>'['gu\u012D']'",
+ "\u59FE>'[quan]'",
+ "\u59FF>'[zi]'",
+ "\u5A00>'[song]'",
+ "\u5A01>'[wei]'",
+ "\u5A02>'['h\u00f3ng']'",
+ "\u5A03>'['w\u00e1']'",
+ "\u5A04>'['lo\u00fa']'",
+ "\u5A05>'['y\u00e0']'",
+ "\u5A06>'['ra\u014F']'",
+ "\u5A07>'[jiao]'",
+ "\u5A08>'['lu\u00e1n']'",
+ "\u5A09>'[ping]'",
+ "\u5A0A>'['xi\u00e0n']'",
+ "\u5A0B>'['sha\u00f2']'",
+ "\u5A0C>'['l\u012D']'",
+ "\u5A0D>'['ch\u00e9ng']'",
+ "\u5A0E>'['xia\u00f2']'",
+ "\u5A0F>'['m\u00e1ng']'",
+ "\u5A10>'[FU]'",
+ "\u5A11>'[suo]'",
+ "\u5A12>'['w\u016D']'",
+ "\u5A13>'['we\u012D']'",
+ "\u5A14>'['k\u00e8']'",
+ "\u5A15>'['la\u00ec']'",
+ "\u5A16>'['chu\u00f2']'",
+ "\u5A17>'['d\u00ecng']'",
+ "\u5A18>'['ni\u00e1ng']'",
+ "\u5A19>'['x\u00edng']'",
+ "\u5A1A>'['n\u00e1n']'",
+ "\u5A1B>'['y\u00fa']'",
+ "\u5A1C>'['nu\u00f3']'",
+ "\u5A1D>'[pei]'",
+ "\u5A1E>'['ne\u012D']'",
+ "\u5A1F>'[juan]'",
+ "\u5A20>'[shen]'",
+ "\u5A21>'['zh\u00ec']'",
+ "\u5A22>'['h\u00e1n']'",
+ "\u5A23>'['d\u00ec']'",
+ "\u5A24>'[zhuang]'",
+ "\u5A25>'['\u00e9']'",
+ "\u5A26>'['p\u00edn']'",
+ "\u5A27>'['tu\u00ec']'",
+ "\u5A28>'['h\u00e0n']'",
+ "\u5A29>'['mi\u0103n']'",
+ "\u5A2A>'['w\u00fa']'",
+ "\u5A2B>'['y\u00e1n']'",
+ "\u5A2C>'['w\u016D']'",
+ "\u5A2D>'[xi]'",
+ "\u5A2E>'['y\u00e1n']'",
+ "\u5A2F>'['y\u00fa']'",
+ "\u5A30>'['s\u00ec']'",
+ "\u5A31>'['y\u00fa']'",
+ "\u5A32>'[wa]'",
+ "\u5A34>'['xi\u00e1n']'",
+ "\u5A35>'[ju]'",
+ "\u5A36>'['q\u016D']'",
+ "\u5A37>'['shu\u00ec']'",
+ "\u5A38>'[qi]'",
+ "\u5A39>'['xi\u00e1n']'",
+ "\u5A3A>'[zhui]'",
+ "\u5A3B>'[dong]'",
+ "\u5A3C>'[chang]'",
+ "\u5A3D>'['l\u00f9']'",
+ "\u5A3E>'['a\u012D']'",
+ "\u5A3F>'[e]'",
+ "\u5A40>'[e]'",
+ "\u5A41>'['lo\u00fa']'",
+ "\u5A42>'['mi\u00e1n']'",
+ "\u5A43>'['c\u00f3ng']'",
+ "\u5A44>'['po\u016D']'",
+ "\u5A45>'['j\u00fa']'",
+ "\u5A46>'['p\u00f3']'",
+ "\u5A47>'['ca\u012D']'",
+ "\u5A48>'['d\u00edng']'",
+ "\u5A49>'['w\u0103n']'",
+ "\u5A4A>'['bia\u014F']'",
+ "\u5A4B>'[xiao]'",
+ "\u5A4C>'['sh\u016D']'",
+ "\u5A4D>'['q\u012D']'",
+ "\u5A4E>'[hui]'",
+ "\u5A4F>'['f\u00f9']'",
+ "\u5A50>'[e]'",
+ "\u5A51>'['w\u014F']'",
+ "\u5A52>'['t\u00e1n']'",
+ "\u5A53>'[fei]'",
+ "\u5A54>'[WEI]'",
+ "\u5A55>'['ji\u00e9']'",
+ "\u5A56>'[tian]'",
+ "\u5A57>'['n\u00ed']'",
+ "\u5A58>'['qu\u00e1n']'",
+ "\u5A59>'['j\u00ecng']'",
+ "\u5A5A>'[hun]'",
+ "\u5A5B>'[jing]'",
+ "\u5A5C>'[qian]'",
+ "\u5A5D>'['di\u00e0n']'",
+ "\u5A5E>'['x\u00ecng']'",
+ "\u5A5F>'['h\u00f9']'",
+ "\u5A60>'['w\u00e0']'",
+ "\u5A61>'['la\u00ed']'",
+ "\u5A62>'['b\u00ec']'",
+ "\u5A63>'[yin]'",
+ "\u5A64>'[chou]'",
+ "\u5A65>'['chu\u00f2']'",
+ "\u5A66>'['f\u00f9']'",
+ "\u5A67>'['j\u00ecng']'",
+ "\u5A68>'['l\u00fan']'",
+ "\u5A69>'['y\u00e0n']'",
+ "\u5A6A>'['l\u00e1n']'",
+ "\u5A6B>'[kun]'",
+ "\u5A6C>'['y\u00edn']'",
+ "\u5A6D>'['y\u00e0']'",
+ "\u5A6E>'[JU]'",
+ "\u5A6F>'['l\u00ec']'",
+ "\u5A70>'['di\u0103n']'",
+ "\u5A71>'['xi\u00e1n']'",
+ "\u5A73>'['hu\u00e0']'",
+ "\u5A74>'[ying]'",
+ "\u5A75>'['ch\u00e1n']'",
+ "\u5A76>'['sh\u0115n']'",
+ "\u5A77>'['t\u00edng']'",
+ "\u5A78>'['d\u00e0ng']'",
+ "\u5A79>'['ya\u014F']'",
+ "\u5A7A>'['w\u00f9']'",
+ "\u5A7B>'['n\u00e0n']'",
+ "\u5A7C>'['ru\u00f2']'",
+ "\u5A7D>'['ji\u0103']'",
+ "\u5A7E>'[tou]'",
+ "\u5A7F>'['x\u00f9']'",
+ "\u5A80>'['y\u00fa']'",
+ "\u5A81>'[wei]'",
+ "\u5A82>'['t\u00ed']'",
+ "\u5A83>'['ro\u00fa']'",
+ "\u5A84>'['me\u012D']'",
+ "\u5A85>'[dan]'",
+ "\u5A86>'['ru\u0103n']'",
+ "\u5A87>'[qin]'",
+ "\u5A88>'[HUI]'",
+ "\u5A89>'[wu]'",
+ "\u5A8A>'['qi\u00e1n']'",
+ "\u5A8B>'[chun]'",
+ "\u5A8C>'['ma\u00f3']'",
+ "\u5A8D>'['f\u00f9']'",
+ "\u5A8E>'['ji\u0115']'",
+ "\u5A8F>'[duan]'",
+ "\u5A90>'[xi]'",
+ "\u5A91>'['zh\u00f2ng']'",
+ "\u5A92>'['me\u00ed']'",
+ "\u5A93>'['hu\u00e1ng']'",
+ "\u5A94>'['mi\u00e1n']'",
+ "\u5A95>'[an]'",
+ "\u5A96>'[ying]'",
+ "\u5A97>'[xuan]'",
+ "\u5A98>'[JIE]'",
+ "\u5A99>'[wei]'",
+ "\u5A9A>'['me\u00ec']'",
+ "\u5A9B>'['yu\u00e0n']'",
+ "\u5A9C>'[zhen]'",
+ "\u5A9D>'[qiu]'",
+ "\u5A9E>'['t\u00ed']'",
+ "\u5A9F>'['xi\u00e8']'",
+ "\u5AA0>'['tu\u014F']'",
+ "\u5AA1>'['li\u00e0n']'",
+ "\u5AA2>'['ma\u00f2']'",
+ "\u5AA3>'['r\u0103n']'",
+ "\u5AA4>'[si]'",
+ "\u5AA5>'[pian]'",
+ "\u5AA6>'['we\u00ec']'",
+ "\u5AA7>'[wa]'",
+ "\u5AA8>'['ji\u00f9']'",
+ "\u5AA9>'['h\u00fa']'",
+ "\u5AAA>'['a\u014F']'",
+ "\u5AAD>'[xu]'",
+ "\u5AAE>'[tou]'",
+ "\u5AAF>'[gui]'",
+ "\u5AB0>'[zou]'",
+ "\u5AB1>'['ya\u00f3']'",
+ "\u5AB2>'['p\u00ec']'",
+ "\u5AB3>'['x\u00ed']'",
+ "\u5AB4>'['yu\u00e1n']'",
+ "\u5AB5>'['y\u00ecng']'",
+ "\u5AB6>'['r\u00f3ng']'",
+ "\u5AB7>'['r\u00f9']'",
+ "\u5AB8>'[chi]'",
+ "\u5AB9>'['li\u00fa']'",
+ "\u5ABA>'['me\u012D']'",
+ "\u5ABB>'['p\u00e1n']'",
+ "\u5ABC>'['a\u014F']'",
+ "\u5ABD>'[ma]'",
+ "\u5ABE>'['go\u00f9']'",
+ "\u5ABF>'['ku\u00ec']'",
+ "\u5AC0>'['q\u00edn']'",
+ "\u5AC1>'['ji\u00e0']'",
+ "\u5AC2>'['sa\u014F']'",
+ "\u5AC3>'[zhen]'",
+ "\u5AC4>'['yu\u00e1n']'",
+ "\u5AC5>'[cha]'",
+ "\u5AC6>'['y\u00f3ng']'",
+ "\u5AC7>'['m\u00edng']'",
+ "\u5AC8>'[ying]'",
+ "\u5AC9>'['j\u00ed']'",
+ "\u5ACA>'['s\u00f9']'",
+ "\u5ACB>'['nia\u014F']'",
+ "\u5ACC>'['xi\u00e1n']'",
+ "\u5ACD>'[tao]'",
+ "\u5ACE>'['p\u00e1ng']'",
+ "\u5ACF>'['l\u00e1ng']'",
+ "\u5AD0>'['na\u014F']'",
+ "\u5AD1>'['ba\u00f3']'",
+ "\u5AD2>'['a\u00ec']'",
+ "\u5AD3>'['p\u00ec']'",
+ "\u5AD4>'['p\u00edn']'",
+ "\u5AD5>'['y\u00ec']'",
+ "\u5AD6>'['pia\u00f2']'",
+ "\u5AD7>'['y\u00f9']'",
+ "\u5AD8>'['le\u00ed']'",
+ "\u5AD9>'['xu\u00e1n']'",
+ "\u5ADA>'['m\u00e0n']'",
+ "\u5ADB>'[yi]'",
+ "\u5ADC>'[zhang]'",
+ "\u5ADD>'[kang]'",
+ "\u5ADE>'['y\u00f3ng']'",
+ "\u5ADF>'['n\u00ec']'",
+ "\u5AE0>'['l\u00ed']'",
+ "\u5AE1>'['d\u00ed']'",
+ "\u5AE2>'[gui]'",
+ "\u5AE3>'[yan]'",
+ "\u5AE4>'['j\u00ecn']'",
+ "\u5AE5>'[zhuan]'",
+ "\u5AE6>'['ch\u00e1ng']'",
+ "\u5AE7>'['c\u00e8']'",
+ "\u5AE8>'[han]'",
+ "\u5AE9>'['n\u00e8n']'",
+ "\u5AEA>'['la\u00f2']'",
+ "\u5AEB>'['m\u00f3']'",
+ "\u5AEC>'[zhe]'",
+ "\u5AED>'['h\u00f9']'",
+ "\u5AEE>'['h\u00f9']'",
+ "\u5AEF>'['a\u00f2']'",
+ "\u5AF0>'['n\u00e8n']'",
+ "\u5AF1>'['qi\u00e1ng']'",
+ "\u5AF3>'['pi\u00e8']'",
+ "\u5AF4>'[gu]'",
+ "\u5AF5>'['w\u016D']'",
+ "\u5AF6>'['jia\u00f3']'",
+ "\u5AF7>'['tu\u014F']'",
+ "\u5AF8>'['zh\u0103n']'",
+ "\u5AF9>'['ma\u00f3']'",
+ "\u5AFA>'['xi\u00e1n']'",
+ "\u5AFB>'['xi\u00e1n']'",
+ "\u5AFC>'['m\u00f2']'",
+ "\u5AFD>'['lia\u00f3']'",
+ "\u5AFE>'['li\u00e1n']'",
+ "\u5AFF>'['hu\u00e0']'",
+ "\u5B00>'[gui]'",
+ "\u5B01>'[deng]'",
+ "\u5B02>'[zhi]'",
+ "\u5B03>'[xu]'",
+ "\u5B04>'[YI]'",
+ "\u5B05>'['hu\u00e1']'",
+ "\u5B06>'[xi]'",
+ "\u5B07>'['hu\u00ec']'",
+ "\u5B08>'['ra\u014F']'",
+ "\u5B09>'[xi]'",
+ "\u5B0A>'['y\u00e0n']'",
+ "\u5B0B>'['ch\u00e1n']'",
+ "\u5B0C>'[jiao]'",
+ "\u5B0D>'['me\u012D']'",
+ "\u5B0E>'['f\u00e0n']'",
+ "\u5B0F>'[fan]'",
+ "\u5B10>'[xian]'",
+ "\u5B11>'['y\u00ec']'",
+ "\u5B12>'['we\u00ec']'",
+ "\u5B13>'['jia\u00f2']'",
+ "\u5B14>'['f\u00f9']'",
+ "\u5B15>'['sh\u00ec']'",
+ "\u5B16>'['b\u00ec']'",
+ "\u5B17>'['sh\u00e0n']'",
+ "\u5B18>'['su\u00ec']'",
+ "\u5B19>'['qi\u00e1ng']'",
+ "\u5B1A>'['li\u0103n']'",
+ "\u5B1B>'['hu\u00e1n']'",
+ "\u5B1C>'[XIN]'",
+ "\u5B1D>'['nia\u014F']'",
+ "\u5B1E>'['d\u014Fng']'",
+ "\u5B1F>'['y\u00ec']'",
+ "\u5B20>'['c\u00e1n']'",
+ "\u5B21>'['a\u00ec']'",
+ "\u5B22>'['ni\u00e1ng']'",
+ "\u5B23>'['n\u00e9ng']'",
+ "\u5B24>'[ma]'",
+ "\u5B25>'['tia\u014F']'",
+ "\u5B26>'['cho\u00fa']'",
+ "\u5B27>'['j\u00ecn']'",
+ "\u5B28>'['c\u00ed']'",
+ "\u5B29>'['y\u00fa']'",
+ "\u5B2A>'['p\u00edn']'",
+ "\u5B2B>'[YONG]'",
+ "\u5B2C>'[xu]'",
+ "\u5B2D>'['na\u012D']'",
+ "\u5B2E>'[yan]'",
+ "\u5B2F>'['ta\u00ed']'",
+ "\u5B30>'[ying]'",
+ "\u5B31>'['c\u00e1n']'",
+ "\u5B32>'['nia\u014F']'",
+ "\u5B33>'[WO]'",
+ "\u5B34>'['y\u00edng']'",
+ "\u5B35>'['mi\u00e1n']'",
+ "\u5B37>'[ma]'",
+ "\u5B38>'['sh\u0115n']'",
+ "\u5B39>'['x\u00ecng']'",
+ "\u5B3A>'['n\u00ec']'",
+ "\u5B3B>'['d\u00fa']'",
+ "\u5B3C>'['li\u016D']'",
+ "\u5B3D>'[yuan]'",
+ "\u5B3E>'['l\u0103n']'",
+ "\u5B3F>'['y\u00e0n']'",
+ "\u5B40>'[shuang]'",
+ "\u5B41>'['l\u00edng']'",
+ "\u5B42>'['jia\u014F']'",
+ "\u5B43>'['ni\u00e1ng']'",
+ "\u5B44>'['l\u0103n']'",
+ "\u5B45>'[xian]'",
+ "\u5B46>'[ying]'",
+ "\u5B47>'[shuang]'",
+ "\u5B48>'[shuai]'",
+ "\u5B49>'['qu\u00e1n']'",
+ "\u5B4A>'['m\u012D']'",
+ "\u5B4B>'['l\u00ed']'",
+ "\u5B4C>'['lu\u00e1n']'",
+ "\u5B4D>'['y\u00e1n']'",
+ "\u5B4E>'['zh\u016D']'",
+ "\u5B4F>'['l\u0103n']'",
+ "\u5B50>'['z\u012D']'",
+ "\u5B51>'['ji\u00e9']'",
+ "\u5B52>'['ju\u00e9']'",
+ "\u5B53>'['ju\u00e9']'",
+ "\u5B54>'['k\u014Fng']'",
+ "\u5B55>'['y\u00f9n']'",
+ "\u5B56>'[zi]'",
+ "\u5B57>'['z\u00ec']'",
+ "\u5B58>'['c\u00fan']'",
+ "\u5B59>'[sun]'",
+ "\u5B5A>'['f\u00fa']'",
+ "\u5B5B>'['be\u00ec']'",
+ "\u5B5C>'[zi]'",
+ "\u5B5D>'['xia\u00f2']'",
+ "\u5B5E>'['x\u00ecn']'",
+ "\u5B5F>'['m\u00e8ng']'",
+ "\u5B60>'['s\u00ec']'",
+ "\u5B61>'[tai]'",
+ "\u5B62>'[bao]'",
+ "\u5B63>'['j\u00ec']'",
+ "\u5B64>'[gu]'",
+ "\u5B65>'['n\u00fa']'",
+ "\u5B66>'['xu\u00e9']'",
+ "\u5B68>'['zhu\u0103n']'",
+ "\u5B69>'['ha\u00ed']'",
+ "\u5B6A>'['lu\u00e1n']'",
+ "\u5B6B>'[sun]'",
+ "\u5B6C>'['hua\u00ec']'",
+ "\u5B6D>'[mie]'",
+ "\u5B6E>'['c\u00f3ng']'",
+ "\u5B6F>'[qian]'",
+ "\u5B70>'['sh\u00fa']'",
+ "\u5B71>'['ch\u00e1n']'",
+ "\u5B72>'[ya]'",
+ "\u5B73>'[zi]'",
+ "\u5B74>'['n\u012D']'",
+ "\u5B75>'[fu]'",
+ "\u5B76>'[zi]'",
+ "\u5B77>'['l\u00ed']'",
+ "\u5B78>'['xu\u00e9']'",
+ "\u5B79>'['b\u00f2']'",
+ "\u5B7A>'['r\u00fa']'",
+ "\u5B7B>'['la\u00ed']'",
+ "\u5B7C>'['ni\u00e8']'",
+ "\u5B7D>'['ni\u00e8']'",
+ "\u5B7E>'[ying]'",
+ "\u5B7F>'['lu\u00e1n']'",
+ "\u5B80>'['mi\u00e1n']'",
+ "\u5B81>'['zh\u00f9']'",
+ "\u5B82>'['r\u014Fng']'",
+ "\u5B83>'[ta]'",
+ "\u5B84>'['gu\u012D']'",
+ "\u5B85>'['zha\u00ed']'",
+ "\u5B86>'['qi\u00f3ng']'",
+ "\u5B87>'['y\u016D']'",
+ "\u5B88>'['sho\u016D']'",
+ "\u5B89>'[an]'",
+ "\u5B8A>'['t\u00fa']'",
+ "\u5B8B>'['s\u00f2ng']'",
+ "\u5B8C>'['w\u00e1n']'",
+ "\u5B8D>'['ro\u00f9']'",
+ "\u5B8E>'['ya\u014F']'",
+ "\u5B8F>'['h\u00f3ng']'",
+ "\u5B90>'['y\u00ed']'",
+ "\u5B91>'['j\u012Dng']'",
+ "\u5B92>'[zhun]'",
+ "\u5B93>'['m\u00ec']'",
+ "\u5B94>'['zh\u016D']'",
+ "\u5B95>'['d\u00e0ng']'",
+ "\u5B96>'['h\u00f3ng']'",
+ "\u5B97>'[zong]'",
+ "\u5B98>'[guan]'",
+ "\u5B99>'['zho\u00f9']'",
+ "\u5B9A>'['d\u00ecng']'",
+ "\u5B9B>'['w\u0103n']'",
+ "\u5B9C>'['y\u00ed']'",
+ "\u5B9D>'['ba\u014F']'",
+ "\u5B9E>'['sh\u00ed']'",
+ "\u5B9F>'['sh\u00ed']'",
+ "\u5BA0>'['ch\u014Fng']'",
+ "\u5BA1>'['sh\u0115n']'",
+ "\u5BA2>'['k\u00e8']'",
+ "\u5BA3>'[xuan]'",
+ "\u5BA4>'['sh\u00ec']'",
+ "\u5BA5>'['yo\u00f9']'",
+ "\u5BA6>'['hu\u00e0n']'",
+ "\u5BA7>'['y\u00ed']'",
+ "\u5BA8>'['tia\u014F']'",
+ "\u5BA9>'['sh\u012D']'",
+ "\u5BAA>'['xi\u00e0n']'",
+ "\u5BAB>'[gong]'",
+ "\u5BAC>'['ch\u00e9ng']'",
+ "\u5BAD>'['q\u00fan']'",
+ "\u5BAE>'[gong]'",
+ "\u5BAF>'[xiao]'",
+ "\u5BB0>'['za\u012D']'",
+ "\u5BB1>'['zh\u00e0']'",
+ "\u5BB2>'['ba\u014F']'",
+ "\u5BB3>'['ha\u00ec']'",
+ "\u5BB4>'['y\u00e0n']'",
+ "\u5BB5>'[xiao]'",
+ "\u5BB6>'[jia]'",
+ "\u5BB7>'['sh\u0115n']'",
+ "\u5BB8>'['ch\u00e9n']'",
+ "\u5BB9>'['r\u00f3ng']'",
+ "\u5BBA>'['hu\u0103ng']'",
+ "\u5BBB>'['m\u00ec']'",
+ "\u5BBC>'['ko\u00f9']'",
+ "\u5BBD>'[kuan]'",
+ "\u5BBE>'[bin]'",
+ "\u5BBF>'['s\u00f9']'",
+ "\u5BC0>'['ca\u00ec']'",
+ "\u5BC1>'['z\u0103n']'",
+ "\u5BC2>'['j\u00ec']'",
+ "\u5BC3>'[yuan]'",
+ "\u5BC4>'['j\u00ec']'",
+ "\u5BC5>'['y\u00edn']'",
+ "\u5BC6>'['m\u00ec']'",
+ "\u5BC7>'['ko\u00f9']'",
+ "\u5BC8>'[qing]'",
+ "\u5BC9>'['qu\u00e8']'",
+ "\u5BCA>'[zhen]'",
+ "\u5BCB>'['ji\u0103n']'",
+ "\u5BCC>'['f\u00f9']'",
+ "\u5BCD>'['n\u00edng']'",
+ "\u5BCE>'['b\u00ecng']'",
+ "\u5BCF>'['hu\u00e1n']'",
+ "\u5BD0>'['me\u00ec']'",
+ "\u5BD1>'['q\u012Dn']'",
+ "\u5BD2>'['h\u00e1n']'",
+ "\u5BD3>'['y\u00f9']'",
+ "\u5BD4>'['sh\u00ed']'",
+ "\u5BD5>'['n\u00edng']'",
+ "\u5BD6>'['q\u00ecn']'",
+ "\u5BD7>'['n\u00edng']'",
+ "\u5BD8>'['zh\u00ec']'",
+ "\u5BD9>'['y\u016D']'",
+ "\u5BDA>'['ba\u014F']'",
+ "\u5BDB>'[kuan]'",
+ "\u5BDC>'['n\u00edng']'",
+ "\u5BDD>'['q\u012Dn']'",
+ "\u5BDE>'['m\u00f2']'",
+ "\u5BDF>'['ch\u00e1']'",
+ "\u5BE0>'['j\u00f9']'",
+ "\u5BE1>'['gu\u0103']'",
+ "\u5BE2>'['q\u012Dn']'",
+ "\u5BE3>'[hu]'",
+ "\u5BE4>'['w\u00f9']'",
+ "\u5BE5>'['lia\u00f3']'",
+ "\u5BE6>'['sh\u00ed']'",
+ "\u5BE7>'['zh\u00f9']'",
+ "\u5BE8>'['zha\u00ec']'",
+ "\u5BE9>'['sh\u0115n']'",
+ "\u5BEA>'['we\u012D']'",
+ "\u5BEB>'['xi\u0115']'",
+ "\u5BEC>'[kuan]'",
+ "\u5BED>'['hu\u00ec']'",
+ "\u5BEE>'['lia\u00f3']'",
+ "\u5BEF>'['j\u00f9n']'",
+ "\u5BF0>'['hu\u00e1n']'",
+ "\u5BF1>'['y\u00ec']'",
+ "\u5BF2>'['y\u00ed']'",
+ "\u5BF3>'['ba\u014F']'",
+ "\u5BF4>'['q\u00ecn']'",
+ "\u5BF5>'['ch\u014Fng']'",
+ "\u5BF6>'['ba\u014F']'",
+ "\u5BF7>'[feng]'",
+ "\u5BF8>'['c\u00f9n']'",
+ "\u5BF9>'['du\u00ec']'",
+ "\u5BFA>'['s\u00ec']'",
+ "\u5BFB>'['x\u00fan']'",
+ "\u5BFC>'['da\u014F']'",
+ "\u5BFD>'['l\u01DC']'",
+ "\u5BFE>'['du\u00ec']'",
+ "\u5BFF>'['sho\u00f9']'",
+ "\u5C00>'['p\u014F']'",
+ "\u5C01>'[feng]'",
+ "\u5C02>'[zhuan]'",
+ "\u5C03>'[fu]'",
+ "\u5C04>'['sh\u00e8']'",
+ "\u5C05>'['k\u00e8']'",
+ "\u5C06>'[jiang]'",
+ "\u5C07>'[jiang]'",
+ "\u5C08>'[zhuan]'",
+ "\u5C09>'['we\u00ec']'",
+ "\u5C0A>'[zun]'",
+ "\u5C0B>'['x\u00fan']'",
+ "\u5C0C>'['sh\u00f9']'",
+ "\u5C0D>'['du\u00ec']'",
+ "\u5C0E>'['da\u014F']'",
+ "\u5C0F>'['xia\u014F']'",
+ "\u5C10>'[ji]'",
+ "\u5C11>'['sha\u014F']'",
+ "\u5C12>'['\u0115r']'",
+ "\u5C13>'['\u0115r']'",
+ "\u5C14>'['\u0115r']'",
+ "\u5C15>'['g\u0103']'",
+ "\u5C16>'[jian]'",
+ "\u5C17>'['sh\u00fa']'",
+ "\u5C18>'['ch\u00e9n']'",
+ "\u5C19>'['sh\u00e0ng']'",
+ "\u5C1A>'['sh\u00e0ng']'",
+ "\u5C1B>'[MO]'",
+ "\u5C1C>'['g\u00e1']'",
+ "\u5C1D>'['ch\u00e1ng']'",
+ "\u5C1E>'['lia\u00f2']'",
+ "\u5C1F>'['xi\u0103n']'",
+ "\u5C20>'['xi\u0103n']'",
+ "\u5C22>'[wang]'",
+ "\u5C23>'[wang]'",
+ "\u5C24>'['yo\u00fa']'",
+ "\u5C25>'['lia\u00f2']'",
+ "\u5C26>'['lia\u00f2']'",
+ "\u5C27>'['ya\u00f3']'",
+ "\u5C28>'['m\u00e1ng']'",
+ "\u5C29>'[wang]'",
+ "\u5C2A>'[wang]'",
+ "\u5C2B>'[wang]'",
+ "\u5C2C>'['g\u00e0']'",
+ "\u5C2D>'['ya\u00f3']'",
+ "\u5C2E>'['du\u00f2']'",
+ "\u5C2F>'['ku\u00ec']'",
+ "\u5C30>'['zh\u014Fng']'",
+ "\u5C31>'['ji\u00f9']'",
+ "\u5C32>'[gan]'",
+ "\u5C33>'['g\u016D']'",
+ "\u5C34>'[gan]'",
+ "\u5C35>'['tu\u00ed']'",
+ "\u5C36>'[gan]'",
+ "\u5C37>'[gan]'",
+ "\u5C38>'[shi]'",
+ "\u5C39>'['y\u012Dn']'",
+ "\u5C3A>'['ch\u012D']'",
+ "\u5C3B>'[kao]'",
+ "\u5C3C>'['n\u00ed']'",
+ "\u5C3D>'['j\u012Dn']'",
+ "\u5C3E>'['we\u012D']'",
+ "\u5C3F>'['nia\u00f2']'",
+ "\u5C40>'['j\u00fa']'",
+ "\u5C41>'['p\u00ec']'",
+ "\u5C42>'['c\u00e9ng']'",
+ "\u5C43>'['x\u00ec']'",
+ "\u5C44>'[bi]'",
+ "\u5C45>'[ju]'",
+ "\u5C46>'['ji\u00e8']'",
+ "\u5C47>'['ti\u00e1n']'",
+ "\u5C48>'[qu]'",
+ "\u5C49>'['t\u00ec']'",
+ "\u5C4A>'['ji\u00e8']'",
+ "\u5C4B>'[wu]'",
+ "\u5C4C>'['dia\u014F']'",
+ "\u5C4D>'[shi]'",
+ "\u5C4E>'['sh\u012D']'",
+ "\u5C4F>'['p\u00edng']'",
+ "\u5C50>'[ji]'",
+ "\u5C51>'['xi\u00e8']'",
+ "\u5C52>'['ch\u00e9n']'",
+ "\u5C53>'['x\u00ec']'",
+ "\u5C54>'['n\u00ed']'",
+ "\u5C55>'['zh\u0103n']'",
+ "\u5C56>'[xi]'",
+ "\u5C58>'['m\u0103n']'",
+ "\u5C59>'[e]'",
+ "\u5C5A>'['lo\u00f9']'",
+ "\u5C5B>'['p\u00edng']'",
+ "\u5C5C>'['t\u00ec']'",
+ "\u5C5D>'['fe\u00ec']'",
+ "\u5C5E>'['sh\u016D']'",
+ "\u5C5F>'['xi\u00e8']'",
+ "\u5C60>'['t\u00fa']'",
+ "\u5C61>'['l\u01DA']'",
+ "\u5C62>'['l\u01DA']'",
+ "\u5C63>'['x\u012D']'",
+ "\u5C64>'['c\u00e9ng']'",
+ "\u5C65>'['l\u01DA']'",
+ "\u5C66>'['j\u00f9']'",
+ "\u5C67>'['xi\u00e8']'",
+ "\u5C68>'['j\u00f9']'",
+ "\u5C69>'[jue]'",
+ "\u5C6A>'['lia\u00f3']'",
+ "\u5C6B>'['ju\u00e9']'",
+ "\u5C6C>'['sh\u016D']'",
+ "\u5C6D>'['x\u00ec']'",
+ "\u5C6E>'['ch\u00e8']'",
+ "\u5C6F>'['t\u00fan']'",
+ "\u5C70>'['n\u00ec']'",
+ "\u5C71>'[shan]'",
+ "\u5C73>'[xian]'",
+ "\u5C74>'['l\u00ec']'",
+ "\u5C75>'[xue]'",
+ "\u5C78>'['l\u00f3ng']'",
+ "\u5C79>'['y\u00ec']'",
+ "\u5C7A>'['q\u012D']'",
+ "\u5C7B>'['r\u00e8n']'",
+ "\u5C7C>'['w\u00f9']'",
+ "\u5C7D>'['h\u00e0n']'",
+ "\u5C7E>'[shen]'",
+ "\u5C7F>'['y\u016D']'",
+ "\u5C80>'[chu]'",
+ "\u5C81>'['su\u00ec']'",
+ "\u5C82>'['q\u012D']'",
+ "\u5C84>'['yu\u00e8']'",
+ "\u5C85>'['b\u0103n']'",
+ "\u5C86>'['ya\u014F']'",
+ "\u5C87>'['\u00e1ng']'",
+ "\u5C88>'['y\u00e1']'",
+ "\u5C89>'['w\u00f9']'",
+ "\u5C8A>'['ji\u00e9']'",
+ "\u5C8B>'['\u00e8']'",
+ "\u5C8C>'['j\u00ed']'",
+ "\u5C8D>'[qian]'",
+ "\u5C8E>'[fen]'",
+ "\u5C8F>'['yu\u00e1n']'",
+ "\u5C90>'['q\u00ed']'",
+ "\u5C91>'['c\u00e9n']'",
+ "\u5C92>'['qi\u00e1n']'",
+ "\u5C93>'['q\u00ed']'",
+ "\u5C94>'['ch\u00e0']'",
+ "\u5C95>'['ji\u00e8']'",
+ "\u5C96>'[qu]'",
+ "\u5C97>'['g\u0103ng']'",
+ "\u5C98>'['xi\u00e0n']'",
+ "\u5C99>'['a\u00f2']'",
+ "\u5C9A>'['l\u00e1n']'",
+ "\u5C9B>'['da\u014F']'",
+ "\u5C9C>'[ba]'",
+ "\u5C9D>'['zu\u00f2']'",
+ "\u5C9E>'['zu\u00f2']'",
+ "\u5C9F>'['y\u0103ng']'",
+ "\u5CA0>'['j\u00f9']'",
+ "\u5CA1>'[gang]'",
+ "\u5CA2>'['k\u0115']'",
+ "\u5CA3>'['go\u016D']'",
+ "\u5CA4>'['xu\u00e8']'",
+ "\u5CA5>'[bei]'",
+ "\u5CA6>'['l\u00ec']'",
+ "\u5CA7>'['tia\u00f3']'",
+ "\u5CA8>'[ju]'",
+ "\u5CA9>'['y\u00e1n']'",
+ "\u5CAA>'['f\u00fa']'",
+ "\u5CAB>'['xi\u00f9']'",
+ "\u5CAC>'['ji\u0103']'",
+ "\u5CAD>'['l\u00edng']'",
+ "\u5CAE>'['tu\u00f3']'",
+ "\u5CAF>'[pei]'",
+ "\u5CB0>'['yo\u016D']'",
+ "\u5CB1>'['da\u00ec']'",
+ "\u5CB2>'['ku\u00e0ng']'",
+ "\u5CB3>'['yu\u00e8']'",
+ "\u5CB4>'[qu]'",
+ "\u5CB5>'['h\u00f9']'",
+ "\u5CB6>'['p\u00f2']'",
+ "\u5CB7>'['m\u00edn']'",
+ "\u5CB8>'['\u00e0n']'",
+ "\u5CB9>'['tia\u00f3']'",
+ "\u5CBA>'['l\u00edng']'",
+ "\u5CBB>'['ch\u00ed']'",
+ "\u5CBD>'[dong]'",
+ "\u5CBF>'[kui]'",
+ "\u5CC0>'['xi\u00f9']'",
+ "\u5CC1>'['ma\u014F']'",
+ "\u5CC2>'['t\u00f3ng']'",
+ "\u5CC3>'['xu\u00e9']'",
+ "\u5CC4>'['y\u00ec']'",
+ "\u5CC6>'[he]'",
+ "\u5CC7>'[ke]'",
+ "\u5CC8>'['lu\u00f2']'",
+ "\u5CC9>'[e]'",
+ "\u5CCA>'['f\u00f9']'",
+ "\u5CCB>'['x\u00fan']'",
+ "\u5CCC>'['di\u00e9']'",
+ "\u5CCD>'['l\u00f9']'",
+ "\u5CCE>'[an]'",
+ "\u5CCF>'['\u0115r']'",
+ "\u5CD0>'[gai]'",
+ "\u5CD1>'['qu\u00e1n']'",
+ "\u5CD2>'['t\u00f3ng']'",
+ "\u5CD3>'['y\u00ed']'",
+ "\u5CD4>'['m\u016D']'",
+ "\u5CD5>'['sh\u00ed']'",
+ "\u5CD6>'[an]'",
+ "\u5CD7>'['we\u00ed']'",
+ "\u5CD8>'[hu]'",
+ "\u5CD9>'['zh\u00ec']'",
+ "\u5CDA>'['m\u00ec']'",
+ "\u5CDB>'['l\u012D']'",
+ "\u5CDC>'[ji]'",
+ "\u5CDD>'['t\u00f3ng']'",
+ "\u5CDE>'['we\u00ed']'",
+ "\u5CDF>'['yo\u00f9']'",
+ "\u5CE1>'['xi\u00e1']'",
+ "\u5CE2>'['l\u012D']'",
+ "\u5CE3>'['ya\u00f3']'",
+ "\u5CE4>'['jia\u00f2']'",
+ "\u5CE5>'[zheng]'",
+ "\u5CE6>'['lu\u00e1n']'",
+ "\u5CE7>'[jiao]'",
+ "\u5CE8>'['\u00e9']'",
+ "\u5CE9>'['\u00e9']'",
+ "\u5CEA>'['y\u00f9']'",
+ "\u5CEB>'['y\u00e9']'",
+ "\u5CEC>'[bu]'",
+ "\u5CED>'['qia\u00f2']'",
+ "\u5CEE>'[qun]'",
+ "\u5CEF>'[feng]'",
+ "\u5CF0>'[feng]'",
+ "\u5CF1>'['na\u00f3']'",
+ "\u5CF2>'['l\u012D']'",
+ "\u5CF3>'['yo\u00fa']'",
+ "\u5CF4>'['xi\u00e0n']'",
+ "\u5CF5>'['h\u00f3ng']'",
+ "\u5CF6>'['da\u014F']'",
+ "\u5CF7>'[shen]'",
+ "\u5CF8>'['ch\u00e9ng']'",
+ "\u5CF9>'['t\u00fa']'",
+ "\u5CFA>'['g\u0115ng']'",
+ "\u5CFB>'['j\u00f9n']'",
+ "\u5CFC>'['ha\u00f2']'",
+ "\u5CFD>'['xi\u00e1']'",
+ "\u5CFE>'[yin]'",
+ "\u5CFF>'['y\u016D']'",
+ "\u5D00>'['l\u00e0ng']'",
+ "\u5D01>'['k\u0103n']'",
+ "\u5D02>'['la\u00f3']'",
+ "\u5D03>'['la\u00ed']'",
+ "\u5D04>'['xi\u0103n']'",
+ "\u5D05>'['qu\u00e8']'",
+ "\u5D06>'[kong]'",
+ "\u5D07>'['ch\u00f3ng']'",
+ "\u5D08>'['ch\u00f3ng']'",
+ "\u5D09>'['t\u00e0']'",
+ "\u5D0A>'[LIN]'",
+ "\u5D0B>'['hu\u00e1']'",
+ "\u5D0C>'[ju]'",
+ "\u5D0D>'['la\u00ed']'",
+ "\u5D0E>'['q\u00ed']'",
+ "\u5D0F>'['m\u00edn']'",
+ "\u5D10>'[kun]'",
+ "\u5D11>'[kun]'",
+ "\u5D12>'['z\u00fa']'",
+ "\u5D13>'['g\u00f9']'",
+ "\u5D14>'[cui]'",
+ "\u5D15>'['y\u00e1']'",
+ "\u5D16>'['y\u00e1']'",
+ "\u5D17>'['g\u0103ng']'",
+ "\u5D18>'['l\u00fan']'",
+ "\u5D19>'['l\u00fan']'",
+ "\u5D1A>'['l\u00e9ng']'",
+ "\u5D1B>'['ju\u00e9']'",
+ "\u5D1C>'[duo]'",
+ "\u5D1D>'[zheng]'",
+ "\u5D1E>'[guo]'",
+ "\u5D1F>'['y\u00edn']'",
+ "\u5D20>'[dong]'",
+ "\u5D21>'['h\u00e1n']'",
+ "\u5D22>'[zheng]'",
+ "\u5D23>'['we\u012D']'",
+ "\u5D24>'['ya\u00f3']'",
+ "\u5D25>'['p\u012D']'",
+ "\u5D26>'[yan]'",
+ "\u5D27>'[song]'",
+ "\u5D28>'['ji\u00e9']'",
+ "\u5D29>'[beng]'",
+ "\u5D2A>'['z\u00fa']'",
+ "\u5D2B>'['ju\u00e9']'",
+ "\u5D2C>'[dong]'",
+ "\u5D2D>'['zh\u0103n']'",
+ "\u5D2E>'['g\u00f9']'",
+ "\u5D2F>'['y\u00edn']'",
+ "\u5D31>'['z\u00e9']'",
+ "\u5D32>'['hu\u00e1ng']'",
+ "\u5D33>'['y\u00fa']'",
+ "\u5D34>'[wei]'",
+ "\u5D35>'['y\u00e1ng']'",
+ "\u5D36>'[feng]'",
+ "\u5D37>'['qi\u00fa']'",
+ "\u5D38>'['d\u00f9n']'",
+ "\u5D39>'['t\u00ed']'",
+ "\u5D3A>'['y\u012D']'",
+ "\u5D3B>'['zh\u00ec']'",
+ "\u5D3C>'['sh\u00ec']'",
+ "\u5D3D>'['za\u012D']'",
+ "\u5D3E>'['ya\u014F']'",
+ "\u5D3F>'['\u00e8']'",
+ "\u5D40>'['zh\u00f9']'",
+ "\u5D41>'[kan]'",
+ "\u5D42>'['l\u01DC']'",
+ "\u5D43>'['y\u0103n']'",
+ "\u5D44>'['me\u012D']'",
+ "\u5D45>'[gan]'",
+ "\u5D46>'[ji]'",
+ "\u5D47>'[ji]'",
+ "\u5D48>'['hu\u0103n']'",
+ "\u5D49>'['t\u00edng']'",
+ "\u5D4A>'['sh\u00e8ng']'",
+ "\u5D4B>'['me\u00ed']'",
+ "\u5D4C>'['qi\u00e0n']'",
+ "\u5D4D>'['w\u00f9']'",
+ "\u5D4E>'['y\u00fa']'",
+ "\u5D4F>'[zong]'",
+ "\u5D50>'['l\u00e1n']'",
+ "\u5D51>'['ju\u00e9']'",
+ "\u5D52>'['y\u00e1n']'",
+ "\u5D53>'['y\u00e1n']'",
+ "\u5D54>'['we\u012D']'",
+ "\u5D55>'[zong]'",
+ "\u5D56>'['ch\u00e1']'",
+ "\u5D57>'['su\u00ec']'",
+ "\u5D58>'['r\u00f3ng']'",
+ "\u5D5A>'[qin]'",
+ "\u5D5B>'['y\u00fa']'",
+ "\u5D5D>'['lo\u016D']'",
+ "\u5D5E>'['t\u00fa']'",
+ "\u5D5F>'[dui]'",
+ "\u5D60>'[xi]'",
+ "\u5D61>'[weng]'",
+ "\u5D62>'[cang]'",
+ "\u5D63>'[dang]'",
+ "\u5D64>'['h\u00f3ng']'",
+ "\u5D65>'['ji\u00e9']'",
+ "\u5D66>'['a\u00ed']'",
+ "\u5D67>'['li\u00fa']'",
+ "\u5D68>'['w\u016D']'",
+ "\u5D69>'[song]'",
+ "\u5D6A>'[qiao]'",
+ "\u5D6B>'[zi]'",
+ "\u5D6C>'['we\u00ed']'",
+ "\u5D6D>'[beng]'",
+ "\u5D6E>'[dian]'",
+ "\u5D6F>'['cu\u00f3']'",
+ "\u5D70>'['qi\u0103n']'",
+ "\u5D71>'['y\u014Fng']'",
+ "\u5D72>'['ni\u00e8']'",
+ "\u5D73>'['cu\u00f3']'",
+ "\u5D74>'['j\u00ed']'",
+ "\u5D77>'['s\u014Fng']'",
+ "\u5D78>'[zong]'",
+ "\u5D79>'['ji\u00e0ng']'",
+ "\u5D7A>'['lia\u00f3']'",
+ "\u5D7B>'[KANG]'",
+ "\u5D7C>'['ch\u0103n']'",
+ "\u5D7D>'['di\u00e9']'",
+ "\u5D7E>'[cen]'",
+ "\u5D7F>'['d\u012Dng']'",
+ "\u5D80>'[tu]'",
+ "\u5D81>'['lo\u016D']'",
+ "\u5D82>'['zh\u00e0ng']'",
+ "\u5D83>'['zh\u0103n']'",
+ "\u5D84>'['zh\u0103n']'",
+ "\u5D85>'['a\u00f3']'",
+ "\u5D86>'['ca\u00f3']'",
+ "\u5D87>'[qu]'",
+ "\u5D88>'[qiang]'",
+ "\u5D89>'[zui]'",
+ "\u5D8A>'['zu\u012D']'",
+ "\u5D8B>'['da\u014F']'",
+ "\u5D8C>'['da\u014F']'",
+ "\u5D8D>'['x\u00ed']'",
+ "\u5D8E>'['y\u00f9']'",
+ "\u5D8F>'['b\u00f3']'",
+ "\u5D90>'['l\u00f3ng']'",
+ "\u5D91>'['xi\u0103ng']'",
+ "\u5D92>'['c\u00e9ng']'",
+ "\u5D93>'[bo]'",
+ "\u5D94>'[qin]'",
+ "\u5D95>'[jiao]'",
+ "\u5D96>'['y\u0103n']'",
+ "\u5D97>'['la\u00f3']'",
+ "\u5D98>'['zh\u00e0n']'",
+ "\u5D99>'['l\u00edn']'",
+ "\u5D9A>'['lia\u00f3']'",
+ "\u5D9B>'['lia\u00f3']'",
+ "\u5D9C>'[jin]'",
+ "\u5D9D>'['d\u00e8ng']'",
+ "\u5D9E>'['du\u00f2']'",
+ "\u5D9F>'[zun]'",
+ "\u5DA0>'['jia\u00f2']'",
+ "\u5DA1>'['gu\u00ec']'",
+ "\u5DA2>'['ya\u00f3']'",
+ "\u5DA3>'['qia\u00f3']'",
+ "\u5DA4>'['ya\u00f3']'",
+ "\u5DA5>'['ju\u00e9']'",
+ "\u5DA6>'[zhan]'",
+ "\u5DA7>'['y\u00ec']'",
+ "\u5DA8>'['xu\u00e9']'",
+ "\u5DA9>'['na\u00f3']'",
+ "\u5DAA>'['y\u00e8']'",
+ "\u5DAB>'['y\u00e8']'",
+ "\u5DAC>'['y\u00ed']'",
+ "\u5DAD>'['\u00e8']'",
+ "\u5DAE>'['xi\u0103n']'",
+ "\u5DAF>'['j\u00ed']'",
+ "\u5DB0>'['xi\u00e8']'",
+ "\u5DB1>'['k\u0115']'",
+ "\u5DB2>'[xi]'",
+ "\u5DB3>'['d\u00ec']'",
+ "\u5DB4>'['a\u00f2']'",
+ "\u5DB5>'['zu\u012D']'",
+ "\u5DB7>'['n\u00ec']'",
+ "\u5DB8>'['r\u00f3ng']'",
+ "\u5DB9>'['da\u014F']'",
+ "\u5DBA>'['l\u012Dng']'",
+ "\u5DBB>'['z\u00e1']'",
+ "\u5DBC>'['y\u016D']'",
+ "\u5DBD>'['yu\u00e8']'",
+ "\u5DBE>'['y\u012Dn']'",
+ "\u5DC0>'[jie]'",
+ "\u5DC1>'['l\u00ec']'",
+ "\u5DC2>'['su\u012D']'",
+ "\u5DC3>'['l\u00f3ng']'",
+ "\u5DC4>'['l\u00f3ng']'",
+ "\u5DC5>'[dian]'",
+ "\u5DC6>'['y\u00edng']'",
+ "\u5DC7>'[xi]'",
+ "\u5DC8>'['j\u00fa']'",
+ "\u5DC9>'['ch\u00e1n']'",
+ "\u5DCA>'['y\u012Dng']'",
+ "\u5DCB>'[kui]'",
+ "\u5DCC>'['y\u00e1n']'",
+ "\u5DCD>'[wei]'",
+ "\u5DCE>'['na\u00f3']'",
+ "\u5DCF>'['qu\u00e1n']'",
+ "\u5DD0>'['cha\u014F']'",
+ "\u5DD1>'['cu\u00e1n']'",
+ "\u5DD2>'['lu\u00e1n']'",
+ "\u5DD3>'[dian]'",
+ "\u5DD4>'[dian]'",
+ "\u5DD6>'['y\u00e1n']'",
+ "\u5DD7>'['y\u00e1n']'",
+ "\u5DD8>'['y\u0103n']'",
+ "\u5DD9>'['na\u00f3']'",
+ "\u5DDA>'['y\u0103n']'",
+ "\u5DDB>'[chuan]'",
+ "\u5DDC>'['gu\u00ec']'",
+ "\u5DDD>'[chuan]'",
+ "\u5DDE>'[zhou]'",
+ "\u5DDF>'[huang]'",
+ "\u5DE0>'[jing]'",
+ "\u5DE1>'['x\u00fan']'",
+ "\u5DE2>'['cha\u00f3']'",
+ "\u5DE3>'['cha\u00f3']'",
+ "\u5DE4>'[lie]'",
+ "\u5DE5>'[gong]'",
+ "\u5DE6>'['zu\u014F']'",
+ "\u5DE7>'['qia\u014F']'",
+ "\u5DE8>'['j\u00f9']'",
+ "\u5DE9>'['g\u014Fng']'",
+ "\u5DEB>'[wu]'",
+ "\u5DEE>'[chai]'",
+ "\u5DEF>'['qi\u00fa']'",
+ "\u5DF0>'['qi\u00fa']'",
+ "\u5DF1>'['j\u012D']'",
+ "\u5DF2>'['y\u012D']'",
+ "\u5DF3>'['s\u00ec']'",
+ "\u5DF4>'[ba]'",
+ "\u5DF5>'[zhi]'",
+ "\u5DF6>'[zhao]'",
+ "\u5DF7>'['xi\u00e0ng']'",
+ "\u5DF8>'['y\u00ed']'",
+ "\u5DF9>'['j\u012Dn']'",
+ "\u5DFA>'['x\u00f9n']'",
+ "\u5DFB>'['ju\u00e0n']'",
+ "\u5DFD>'['x\u00f9n']'",
+ "\u5DFE>'[jin]'",
+ "\u5DFF>'['f\u00fa']'",
+ "\u5E00>'[za]'",
+ "\u5E01>'['b\u00ec']'",
+ "\u5E02>'['sh\u00ec']'",
+ "\u5E03>'['b\u00f9']'",
+ "\u5E04>'[ding]'",
+ "\u5E05>'['shua\u00ec']'",
+ "\u5E06>'[fan]'",
+ "\u5E07>'['ni\u00e8']'",
+ "\u5E08>'[shi]'",
+ "\u5E09>'[fen]'",
+ "\u5E0A>'['p\u00e0']'",
+ "\u5E0B>'['zh\u012D']'",
+ "\u5E0C>'[xi]'",
+ "\u5E0D>'['h\u00f9']'",
+ "\u5E0E>'['d\u00e0n']'",
+ "\u5E0F>'['we\u00ed']'",
+ "\u5E10>'['zh\u00e0ng']'",
+ "\u5E11>'['t\u0103ng']'",
+ "\u5E12>'['da\u00ec']'",
+ "\u5E13>'['m\u00e0']'",
+ "\u5E14>'['pe\u00ec']'",
+ "\u5E15>'['p\u00e0']'",
+ "\u5E16>'[tie]'",
+ "\u5E17>'['f\u00fa']'",
+ "\u5E18>'['li\u00e1n']'",
+ "\u5E19>'['zh\u00ec']'",
+ "\u5E1A>'['zho\u016D']'",
+ "\u5E1B>'['b\u00f3']'",
+ "\u5E1C>'['zh\u00ec']'",
+ "\u5E1D>'['d\u00ec']'",
+ "\u5E1E>'['m\u00f2']'",
+ "\u5E1F>'['y\u00ec']'",
+ "\u5E20>'['y\u00ec']'",
+ "\u5E21>'['p\u00edng']'",
+ "\u5E22>'['qi\u00e0']'",
+ "\u5E23>'['ju\u00e0n']'",
+ "\u5E24>'['r\u00fa']'",
+ "\u5E25>'['shua\u00ec']'",
+ "\u5E26>'['da\u00ec']'",
+ "\u5E27>'['zh\u00e8ng']'",
+ "\u5E28>'['shu\u00ec']'",
+ "\u5E29>'['qia\u00f2']'",
+ "\u5E2A>'[zhen]'",
+ "\u5E2B>'[shi]'",
+ "\u5E2C>'['q\u00fan']'",
+ "\u5E2D>'['x\u00ed']'",
+ "\u5E2E>'[bang]'",
+ "\u5E2F>'['da\u00ec']'",
+ "\u5E30>'[gui]'",
+ "\u5E31>'['cho\u00fa']'",
+ "\u5E32>'['p\u00edng']'",
+ "\u5E33>'['zh\u00e0ng']'",
+ "\u5E34>'[sha]'",
+ "\u5E35>'[wan]'",
+ "\u5E36>'['da\u00ec']'",
+ "\u5E37>'['we\u00ed']'",
+ "\u5E38>'['ch\u00e1ng']'",
+ "\u5E39>'['sh\u00e0']'",
+ "\u5E3A>'['q\u00ed']'",
+ "\u5E3B>'['z\u00e9']'",
+ "\u5E3C>'['gu\u00f3']'",
+ "\u5E3D>'['ma\u00f2']'",
+ "\u5E3E>'['d\u016D']'",
+ "\u5E3F>'['ho\u00fa']'",
+ "\u5E40>'['zh\u00e8ng']'",
+ "\u5E41>'[xu]'",
+ "\u5E42>'['m\u00ec']'",
+ "\u5E43>'['we\u00ed']'",
+ "\u5E44>'['w\u00f2']'",
+ "\u5E45>'['f\u00fa']'",
+ "\u5E46>'['y\u00ec']'",
+ "\u5E47>'[bang]'",
+ "\u5E48>'['p\u00edng']'",
+ "\u5E4A>'[gong]'",
+ "\u5E4B>'['p\u00e1n']'",
+ "\u5E4C>'['hu\u0103ng']'",
+ "\u5E4D>'[dao]'",
+ "\u5E4E>'['m\u00ec']'",
+ "\u5E4F>'[jia]'",
+ "\u5E50>'['t\u00e9ng']'",
+ "\u5E51>'[hui]'",
+ "\u5E52>'[zhong]'",
+ "\u5E53>'[shan]'",
+ "\u5E54>'['m\u00e0n']'",
+ "\u5E55>'['m\u00f9']'",
+ "\u5E56>'[biao]'",
+ "\u5E57>'['gu\u00f3']'",
+ "\u5E58>'['z\u00e9']'",
+ "\u5E59>'['m\u00f9']'",
+ "\u5E5A>'[bang]'",
+ "\u5E5B>'['zh\u00e0ng']'",
+ "\u5E5C>'['ji\u014Fng']'",
+ "\u5E5D>'['ch\u0103n']'",
+ "\u5E5E>'['f\u00fa']'",
+ "\u5E5F>'['zh\u00ec']'",
+ "\u5E60>'[hu]'",
+ "\u5E61>'[fan]'",
+ "\u5E62>'['chu\u00e1ng']'",
+ "\u5E63>'['b\u00ec']'",
+ "\u5E66>'['m\u00ec']'",
+ "\u5E67>'[qiao]'",
+ "\u5E68>'[chan]'",
+ "\u5E69>'['f\u00e9n']'",
+ "\u5E6A>'['m\u00e9ng']'",
+ "\u5E6B>'[bang]'",
+ "\u5E6C>'['cho\u00fa']'",
+ "\u5E6D>'['mi\u00e8']'",
+ "\u5E6E>'['ch\u00fa']'",
+ "\u5E6F>'['ji\u00e9']'",
+ "\u5E70>'['xi\u0103n']'",
+ "\u5E71>'['l\u00e1n']'",
+ "\u5E72>'[gan]'",
+ "\u5E73>'['p\u00edng']'",
+ "\u5E74>'['ni\u00e1n']'",
+ "\u5E75>'[qian]'",
+ "\u5E76>'['b\u00ecng']'",
+ "\u5E77>'['b\u00ecng']'",
+ "\u5E78>'['x\u00ecng']'",
+ "\u5E79>'['g\u00e0n']'",
+ "\u5E7A>'[yao]'",
+ "\u5E7B>'['hu\u00e0n']'",
+ "\u5E7C>'['yo\u00f9']'",
+ "\u5E7D>'[you]'",
+ "\u5E7E>'['j\u012D']'",
+ "\u5E7F>'['y\u0103n']'",
+ "\u5E80>'['p\u012D']'",
+ "\u5E81>'[ting]'",
+ "\u5E82>'['z\u00e8']'",
+ "\u5E83>'['gu\u0103ng']'",
+ "\u5E84>'[zhuang]'",
+ "\u5E85>'['m\u014D']'",
+ "\u5E86>'['q\u00ecng']'",
+ "\u5E87>'['b\u00ec']'",
+ "\u5E88>'['q\u00edn']'",
+ "\u5E89>'['d\u00f9n']'",
+ "\u5E8A>'['chu\u00e1ng']'",
+ "\u5E8B>'['gu\u012D']'",
+ "\u5E8C>'['y\u0103']'",
+ "\u5E8D>'['ba\u00ec']'",
+ "\u5E8E>'['ji\u00e8']'",
+ "\u5E8F>'['x\u00f9']'",
+ "\u5E90>'['l\u00fa']'",
+ "\u5E91>'['w\u016D']'",
+ "\u5E93>'['k\u00f9']'",
+ "\u5E94>'['y\u00ecng']'",
+ "\u5E95>'['d\u012D']'",
+ "\u5E96>'['pa\u00f3']'",
+ "\u5E97>'['di\u00e0n']'",
+ "\u5E98>'[ya]'",
+ "\u5E99>'['mia\u00f2']'",
+ "\u5E9A>'[geng]'",
+ "\u5E9B>'[ci]'",
+ "\u5E9C>'['f\u016D']'",
+ "\u5E9D>'['t\u00f3ng']'",
+ "\u5E9E>'['p\u00e1ng']'",
+ "\u5E9F>'['fe\u00ec']'",
+ "\u5EA0>'['xi\u00e1ng']'",
+ "\u5EA1>'['y\u012D']'",
+ "\u5EA2>'['zh\u00ec']'",
+ "\u5EA3>'[tiao]'",
+ "\u5EA4>'['zh\u00ec']'",
+ "\u5EA5>'[xiu]'",
+ "\u5EA6>'['d\u00f9']'",
+ "\u5EA7>'['zu\u00f2']'",
+ "\u5EA8>'[xiao]'",
+ "\u5EA9>'['t\u00fa']'",
+ "\u5EAA>'['gu\u012D']'",
+ "\u5EAB>'['k\u00f9']'",
+ "\u5EAC>'['p\u00e1ng']'",
+ "\u5EAD>'['t\u00edng']'",
+ "\u5EAE>'['yo\u016D']'",
+ "\u5EAF>'[bu]'",
+ "\u5EB0>'[ding]'",
+ "\u5EB1>'['ch\u0115ng']'",
+ "\u5EB2>'['la\u00ed']'",
+ "\u5EB3>'[bei]'",
+ "\u5EB4>'['j\u00ed']'",
+ "\u5EB5>'[an]'",
+ "\u5EB6>'['sh\u00f9']'",
+ "\u5EB7>'[kang]'",
+ "\u5EB8>'[yong]'",
+ "\u5EB9>'['tu\u014F']'",
+ "\u5EBA>'[song]'",
+ "\u5EBB>'['sh\u00f9']'",
+ "\u5EBC>'['q\u012Dng']'",
+ "\u5EBD>'['y\u00f9']'",
+ "\u5EBE>'['y\u016D']'",
+ "\u5EBF>'['mia\u00f2']'",
+ "\u5EC0>'[sou]'",
+ "\u5EC1>'['c\u00e8']'",
+ "\u5EC2>'[xiang]'",
+ "\u5EC3>'['fe\u00ec']'",
+ "\u5EC4>'['ji\u00f9']'",
+ "\u5EC5>'['h\u00e9']'",
+ "\u5EC6>'['hu\u00ec']'",
+ "\u5EC7>'['li\u00f9']'",
+ "\u5EC8>'['sh\u00e0']'",
+ "\u5EC9>'['li\u00e1n']'",
+ "\u5ECA>'['l\u00e1ng']'",
+ "\u5ECB>'[sou]'",
+ "\u5ECC>'['ji\u00e0n']'",
+ "\u5ECD>'['po\u016D']'",
+ "\u5ECE>'['q\u012Dng']'",
+ "\u5ECF>'['ji\u00f9']'",
+ "\u5ED0>'['ji\u00f9']'",
+ "\u5ED1>'['q\u00edn']'",
+ "\u5ED2>'['a\u00f3']'",
+ "\u5ED3>'['ku\u00f2']'",
+ "\u5ED4>'['lo\u00fa']'",
+ "\u5ED5>'[yin]'",
+ "\u5ED6>'['lia\u00f2']'",
+ "\u5ED7>'['da\u00ec']'",
+ "\u5ED8>'['l\u00f9']'",
+ "\u5ED9>'['y\u00ec']'",
+ "\u5EDA>'['ch\u00fa']'",
+ "\u5EDB>'['ch\u00e1n']'",
+ "\u5EDC>'[tu]'",
+ "\u5EDD>'[si]'",
+ "\u5EDE>'[xin]'",
+ "\u5EDF>'['mia\u00f2']'",
+ "\u5EE0>'['ch\u0103ng']'",
+ "\u5EE1>'['w\u016D']'",
+ "\u5EE2>'['fe\u00ec']'",
+ "\u5EE3>'['gu\u0103ng']'",
+ "\u5EE5>'['kua\u00ec']'",
+ "\u5EE6>'['b\u00ec']'",
+ "\u5EE7>'['qi\u00e1ng']'",
+ "\u5EE8>'['xi\u00e8']'",
+ "\u5EE9>'['l\u012Dn']'",
+ "\u5EEA>'['l\u012Dn']'",
+ "\u5EEB>'['lia\u00f3']'",
+ "\u5EEC>'['l\u00fa']'",
+ "\u5EEE>'['y\u00edng']'",
+ "\u5EEF>'[xian]'",
+ "\u5EF0>'[ting]'",
+ "\u5EF1>'[yong]'",
+ "\u5EF2>'['l\u00ed']'",
+ "\u5EF3>'[ting]'",
+ "\u5EF4>'['y\u012Dn']'",
+ "\u5EF5>'['x\u00fan']'",
+ "\u5EF6>'['y\u00e1n']'",
+ "\u5EF7>'['t\u00edng']'",
+ "\u5EF8>'['d\u00ed']'",
+ "\u5EF9>'['p\u00f2']'",
+ "\u5EFA>'['ji\u00e0n']'",
+ "\u5EFB>'['hu\u00ed']'",
+ "\u5EFC>'['na\u012D']'",
+ "\u5EFD>'['hu\u00ed']'",
+ "\u5EFE>'['g\u00f2ng']'",
+ "\u5EFF>'['ni\u00e0n']'",
+ "\u5F00>'[kai]'",
+ "\u5F01>'['bi\u00e0n']'",
+ "\u5F02>'['y\u00ec']'",
+ "\u5F03>'['q\u00ec']'",
+ "\u5F04>'['n\u00f2ng']'",
+ "\u5F05>'['f\u00e9n']'",
+ "\u5F06>'['j\u016D']'",
+ "\u5F07>'['y\u0103n']'",
+ "\u5F08>'['y\u00ec']'",
+ "\u5F09>'['z\u00e0ng']'",
+ "\u5F0A>'['b\u00ec']'",
+ "\u5F0B>'['y\u00ec']'",
+ "\u5F0C>'[yi]'",
+ "\u5F0D>'['\u00e8r']'",
+ "\u5F0E>'[san]'",
+ "\u5F0F>'['sh\u00ec']'",
+ "\u5F10>'['\u00e8r']'",
+ "\u5F11>'['sh\u00ec']'",
+ "\u5F12>'['sh\u00ec']'",
+ "\u5F13>'[gong]'",
+ "\u5F14>'['dia\u00f2']'",
+ "\u5F15>'['y\u012Dn']'",
+ "\u5F16>'['h\u00f9']'",
+ "\u5F17>'['f\u00fa']'",
+ "\u5F18>'['h\u00f3ng']'",
+ "\u5F19>'[wu]'",
+ "\u5F1A>'['tu\u00ed']'",
+ "\u5F1B>'['ch\u00ed']'",
+ "\u5F1C>'['ji\u00e0ng']'",
+ "\u5F1D>'['b\u00e0']'",
+ "\u5F1E>'['sh\u0115n']'",
+ "\u5F1F>'['d\u00ec']'",
+ "\u5F20>'[zhang]'",
+ "\u5F21>'['ju\u00e9']'",
+ "\u5F22>'[tao]'",
+ "\u5F23>'['f\u016D']'",
+ "\u5F24>'['d\u012D']'",
+ "\u5F25>'['m\u00ed']'",
+ "\u5F26>'['xi\u00e1n']'",
+ "\u5F27>'['h\u00fa']'",
+ "\u5F28>'[chao]'",
+ "\u5F29>'['n\u016D']'",
+ "\u5F2A>'['j\u00ecng']'",
+ "\u5F2B>'['zh\u0115n']'",
+ "\u5F2C>'['y\u00ed']'",
+ "\u5F2D>'['m\u012D']'",
+ "\u5F2E>'[quan]'",
+ "\u5F2F>'[wan]'",
+ "\u5F30>'[shao]'",
+ "\u5F31>'['ru\u00f2']'",
+ "\u5F32>'[xuan]'",
+ "\u5F33>'['j\u00ecng']'",
+ "\u5F34>'[dun]'",
+ "\u5F35>'[zhang]'",
+ "\u5F36>'['ji\u00e0ng']'",
+ "\u5F37>'['qi\u00e1ng']'",
+ "\u5F38>'['p\u00e9ng']'",
+ "\u5F39>'['d\u00e0n']'",
+ "\u5F3A>'['qi\u00e1ng']'",
+ "\u5F3B>'['b\u00ec']'",
+ "\u5F3C>'['b\u00ec']'",
+ "\u5F3D>'['sh\u00e8']'",
+ "\u5F3E>'['d\u00e0n']'",
+ "\u5F3F>'['ji\u0103n']'",
+ "\u5F40>'['go\u00f9']'",
+ "\u5F42>'[fa]'",
+ "\u5F43>'['b\u00ec']'",
+ "\u5F44>'[kou]'",
+ "\u5F46>'['bi\u00e8']'",
+ "\u5F47>'[xiao]'",
+ "\u5F48>'['d\u00e0n']'",
+ "\u5F49>'['ku\u00f2']'",
+ "\u5F4A>'['qi\u00e1ng']'",
+ "\u5F4B>'['h\u00f3ng']'",
+ "\u5F4C>'['m\u00ed']'",
+ "\u5F4D>'['ku\u00f2']'",
+ "\u5F4E>'[wan]'",
+ "\u5F4F>'['ju\u00e9']'",
+ "\u5F50>'['j\u00ec']'",
+ "\u5F51>'['j\u00ec']'",
+ "\u5F52>'[gui]'",
+ "\u5F53>'[dang]'",
+ "\u5F54>'['l\u00f9']'",
+ "\u5F55>'['l\u00f9']'",
+ "\u5F56>'['tu\u00e0n']'",
+ "\u5F57>'['hu\u00ec']'",
+ "\u5F58>'['zh\u00ec']'",
+ "\u5F59>'['hu\u00ec']'",
+ "\u5F5A>'['hu\u00ec']'",
+ "\u5F5B>'['y\u00ed']'",
+ "\u5F5C>'['y\u00ed']'",
+ "\u5F5D>'['y\u00ed']'",
+ "\u5F5E>'['y\u00ed']'",
+ "\u5F5F>'['hu\u00f2']'",
+ "\u5F60>'['hu\u00f2']'",
+ "\u5F61>'[shan]'",
+ "\u5F62>'['x\u00edng']'",
+ "\u5F63>'['w\u00e9n']'",
+ "\u5F64>'['t\u00f3ng']'",
+ "\u5F65>'['y\u00e0n']'",
+ "\u5F66>'['y\u00e0n']'",
+ "\u5F67>'['y\u00f9']'",
+ "\u5F68>'[chi]'",
+ "\u5F69>'['ca\u012D']'",
+ "\u5F6A>'[biao]'",
+ "\u5F6B>'[diao]'",
+ "\u5F6C>'[bin]'",
+ "\u5F6D>'['p\u00e9ng']'",
+ "\u5F6E>'['y\u014Fng']'",
+ "\u5F6F>'[piao]'",
+ "\u5F70>'[zhang]'",
+ "\u5F71>'['y\u012Dng']'",
+ "\u5F72>'[chi]'",
+ "\u5F73>'['ch\u00ec']'",
+ "\u5F74>'['zhu\u00f3']'",
+ "\u5F75>'['tu\u014F']'",
+ "\u5F76>'['j\u00ed']'",
+ "\u5F77>'['p\u00e1ng']'",
+ "\u5F78>'[zhong]'",
+ "\u5F79>'['y\u00ec']'",
+ "\u5F7A>'['w\u00e1ng']'",
+ "\u5F7B>'['ch\u00e8']'",
+ "\u5F7C>'['b\u012D']'",
+ "\u5F7D>'['ch\u00ed']'",
+ "\u5F7E>'['l\u012Dng']'",
+ "\u5F7F>'['f\u00fa']'",
+ "\u5F80>'['w\u0103ng']'",
+ "\u5F81>'[zheng]'",
+ "\u5F82>'['c\u00fa']'",
+ "\u5F83>'['w\u0103ng']'",
+ "\u5F84>'['j\u00ecng']'",
+ "\u5F85>'['da\u00ec']'",
+ "\u5F86>'[xi]'",
+ "\u5F87>'['x\u00f9n']'",
+ "\u5F88>'['h\u0115n']'",
+ "\u5F89>'['y\u00e1ng']'",
+ "\u5F8A>'['hua\u00ed']'",
+ "\u5F8B>'['l\u01DC']'",
+ "\u5F8C>'['ho\u00f9']'",
+ "\u5F8D>'[wa]'",
+ "\u5F8E>'['ch\u0115ng']'",
+ "\u5F8F>'['zh\u00ec']'",
+ "\u5F90>'['x\u00fa']'",
+ "\u5F91>'['j\u00ecng']'",
+ "\u5F92>'['t\u00fa']'",
+ "\u5F93>'['c\u00f3ng']'",
+ "\u5F95>'['la\u00ed']'",
+ "\u5F96>'['c\u00f3ng']'",
+ "\u5F97>'['d\u00e9']'",
+ "\u5F98>'['pa\u00ed']'",
+ "\u5F99>'['x\u012D']'",
+ "\u5F9B>'['q\u00ec']'",
+ "\u5F9C>'['ch\u00e1ng']'",
+ "\u5F9D>'['zh\u00ec']'",
+ "\u5F9E>'['c\u00f3ng']'",
+ "\u5F9F>'[zhou]'",
+ "\u5FA0>'['la\u00ed']'",
+ "\u5FA1>'['y\u00f9']'",
+ "\u5FA2>'['xi\u00e8']'",
+ "\u5FA3>'['ji\u00e8']'",
+ "\u5FA4>'['ji\u00e0n']'",
+ "\u5FA5>'['ch\u00ed']'",
+ "\u5FA6>'['ji\u0103']'",
+ "\u5FA7>'['bi\u00e0n']'",
+ "\u5FA8>'['hu\u00e1ng']'",
+ "\u5FA9>'['f\u00f9']'",
+ "\u5FAA>'['x\u00fan']'",
+ "\u5FAB>'['we\u012D']'",
+ "\u5FAC>'['p\u00e1ng']'",
+ "\u5FAD>'['ya\u00f3']'",
+ "\u5FAE>'[wei]'",
+ "\u5FAF>'[xi]'",
+ "\u5FB0>'[zheng]'",
+ "\u5FB1>'['pia\u00f2']'",
+ "\u5FB2>'['ch\u00ed']'",
+ "\u5FB3>'['d\u00e9']'",
+ "\u5FB4>'[zheng]'",
+ "\u5FB5>'[zheng]'",
+ "\u5FB6>'['bi\u00e8']'",
+ "\u5FB7>'['d\u00e9']'",
+ "\u5FB8>'[chong]'",
+ "\u5FB9>'['ch\u00e8']'",
+ "\u5FBA>'['jia\u014F']'",
+ "\u5FBB>'['we\u00ec']'",
+ "\u5FBC>'['jia\u00f2']'",
+ "\u5FBD>'[hui]'",
+ "\u5FBE>'['me\u00ed']'",
+ "\u5FBF>'['l\u00f2ng']'",
+ "\u5FC0>'[xiang]'",
+ "\u5FC1>'['ba\u00f2']'",
+ "\u5FC2>'['q\u00fa']'",
+ "\u5FC3>'[xin]'",
+ "\u5FC4>'['shu4xin1p\u00e1ng']'",
+ "\u5FC5>'['b\u00ec']'",
+ "\u5FC6>'['y\u00ec']'",
+ "\u5FC7>'['l\u00e8']'",
+ "\u5FC8>'['r\u00e9n']'",
+ "\u5FC9>'[dao]'",
+ "\u5FCA>'['d\u00ecng']'",
+ "\u5FCB>'['ga\u012D']'",
+ "\u5FCC>'['j\u00ec']'",
+ "\u5FCD>'['r\u0115n']'",
+ "\u5FCE>'['r\u00e9n']'",
+ "\u5FCF>'['ch\u00e0n']'",
+ "\u5FD0>'['t\u0103n']'",
+ "\u5FD1>'['t\u00e8']'",
+ "\u5FD2>'['t\u00e8']'",
+ "\u5FD3>'[gan]'",
+ "\u5FD4>'['q\u00ec']'",
+ "\u5FD5>'['sh\u00ec']'",
+ "\u5FD6>'['c\u016Dn']'",
+ "\u5FD7>'['zh\u00ec']'",
+ "\u5FD8>'['w\u00e0ng']'",
+ "\u5FD9>'['m\u00e1ng']'",
+ "\u5FDA>'[xi]'",
+ "\u5FDB>'['f\u00e1n']'",
+ "\u5FDC>'[ying]'",
+ "\u5FDD>'['ti\u0103n']'",
+ "\u5FDE>'['m\u00edn']'",
+ "\u5FDF>'['m\u00edn']'",
+ "\u5FE0>'[zhong]'",
+ "\u5FE1>'[chong]'",
+ "\u5FE2>'['w\u00f9']'",
+ "\u5FE3>'['j\u00ed']'",
+ "\u5FE4>'['w\u016D']'",
+ "\u5FE5>'['x\u00ec']'",
+ "\u5FE6>'['y\u00e8']'",
+ "\u5FE7>'[you]'",
+ "\u5FE8>'['w\u00e0n']'",
+ "\u5FE9>'[cong]'",
+ "\u5FEA>'[zhong]'",
+ "\u5FEB>'['kua\u00ec']'",
+ "\u5FEC>'['y\u00f9']'",
+ "\u5FED>'['bi\u00e0n']'",
+ "\u5FEE>'['zh\u00ec']'",
+ "\u5FEF>'['q\u00ed']'",
+ "\u5FF0>'['cu\u00ec']'",
+ "\u5FF1>'['ch\u00e9n']'",
+ "\u5FF2>'['ta\u00ec']'",
+ "\u5FF3>'['t\u00fan']'",
+ "\u5FF4>'['qi\u00e1n']'",
+ "\u5FF5>'['ni\u00e0n']'",
+ "\u5FF6>'['h\u00fan']'",
+ "\u5FF7>'[xiong]'",
+ "\u5FF8>'['ni\u016D']'",
+ "\u5FF9>'['w\u0103ng']'",
+ "\u5FFA>'[xian]'",
+ "\u5FFB>'[xin]'",
+ "\u5FFC>'[kang]'",
+ "\u5FFD>'[hu]'",
+ "\u5FFE>'['ka\u00ec']'",
+ "\u5FFF>'['f\u00e8n']'",
+ "\u6000>'['hua\u00ed']'",
+ "\u6001>'['ta\u00ec']'",
+ "\u6002>'['s\u014Fng']'",
+ "\u6003>'['w\u016D']'",
+ "\u6004>'['o\u00f9']'",
+ "\u6005>'['ch\u00e0ng']'",
+ "\u6006>'['chu\u00e0ng']'",
+ "\u6007>'['j\u00f9']'",
+ "\u6008>'['y\u00ec']'",
+ "\u6009>'['ba\u014F']'",
+ "\u600A>'[chao]'",
+ "\u600B>'['m\u00edn']'",
+ "\u600C>'[pei]'",
+ "\u600D>'['zu\u00f2']'",
+ "\u600E>'['z\u0115n']'",
+ "\u600F>'['y\u00e0ng']'",
+ "\u6010>'['ko\u00f9']'",
+ "\u6011>'['b\u00e0n']'",
+ "\u6012>'['n\u00f9']'",
+ "\u6013>'['na\u00f3']'",
+ "\u6014>'[zheng]'",
+ "\u6015>'['p\u00e0']'",
+ "\u6016>'['b\u00f9']'",
+ "\u6017>'[tie]'",
+ "\u6018>'['g\u00f9']'",
+ "\u6019>'['h\u00f9']'",
+ "\u601A>'['j\u00f9']'",
+ "\u601B>'['d\u00e1']'",
+ "\u601C>'['li\u00e1n']'",
+ "\u601D>'[si]'",
+ "\u601E>'[chou]'",
+ "\u601F>'['d\u00ec']'",
+ "\u6020>'['da\u00ec']'",
+ "\u6021>'['y\u00ed']'",
+ "\u6022>'['t\u00fa']'",
+ "\u6023>'['yo\u00fa']'",
+ "\u6024>'[fu]'",
+ "\u6025>'['j\u00ed']'",
+ "\u6026>'[peng]'",
+ "\u6027>'['x\u00ecng']'",
+ "\u6028>'['yu\u00e0n']'",
+ "\u6029>'['n\u00ed']'",
+ "\u602A>'['gua\u00ec']'",
+ "\u602B>'['f\u00fa']'",
+ "\u602C>'['x\u00ec']'",
+ "\u602D>'['b\u00ec']'",
+ "\u602E>'[you]'",
+ "\u602F>'['qi\u00e8']'",
+ "\u6030>'['xu\u00e0n']'",
+ "\u6031>'[cong]'",
+ "\u6032>'['b\u012Dng']'",
+ "\u6033>'['hu\u0103ng']'",
+ "\u6034>'['x\u00f9']'",
+ "\u6035>'['ch\u00f9']'",
+ "\u6036>'[pi]'",
+ "\u6037>'[xi]'",
+ "\u6038>'[xi]'",
+ "\u6039>'[tan]'",
+ "\u603B>'['z\u014Fng']'",
+ "\u603C>'['du\u00ec']'",
+ "\u603F>'['y\u00ec']'",
+ "\u6040>'['ch\u012D']'",
+ "\u6041>'['r\u00e8n']'",
+ "\u6042>'['x\u00fan']'",
+ "\u6043>'['sh\u00ec']'",
+ "\u6044>'['x\u00ec']'",
+ "\u6045>'['la\u014F']'",
+ "\u6046>'['h\u00e9ng']'",
+ "\u6047>'[kuang]'",
+ "\u6048>'['m\u00fa']'",
+ "\u6049>'['zh\u012D']'",
+ "\u604A>'['xi\u00e9']'",
+ "\u604B>'['li\u00e0n']'",
+ "\u604C>'[tiao]'",
+ "\u604D>'['hu\u0103ng']'",
+ "\u604E>'['di\u00e9']'",
+ "\u604F>'['ha\u014F']'",
+ "\u6050>'['k\u014Fng']'",
+ "\u6051>'['gu\u012D']'",
+ "\u6052>'['h\u00e9ng']'",
+ "\u6053>'[xi]'",
+ "\u6054>'['xia\u00f2']'",
+ "\u6055>'['sh\u00f9']'",
+ "\u6056>'[s1]'",
+ "\u6057>'['ku\u0103']'",
+ "\u6058>'[qiu]'",
+ "\u6059>'['y\u00e0ng']'",
+ "\u605A>'['hu\u00ec']'",
+ "\u605B>'['hu\u00ed']'",
+ "\u605C>'['ch\u00ec']'",
+ "\u605D>'['ji\u00e1']'",
+ "\u605E>'['y\u00ed']'",
+ "\u605F>'[xiong]'",
+ "\u6060>'['gua\u00ec']'",
+ "\u6061>'['l\u00ecn']'",
+ "\u6062>'[hui]'",
+ "\u6063>'['z\u00ec']'",
+ "\u6064>'['x\u00f9']'",
+ "\u6065>'['ch\u012D']'",
+ "\u6066>'['xi\u00e0ng']'",
+ "\u6067>'['n\u01DC']'",
+ "\u6068>'['h\u00e8n']'",
+ "\u6069>'[en]'",
+ "\u606A>'['k\u00e8']'",
+ "\u606B>'[tong]'",
+ "\u606C>'['ti\u00e1n']'",
+ "\u606D>'[gong]'",
+ "\u606E>'['qu\u00e1n']'",
+ "\u606F>'[xi]'",
+ "\u6070>'['qi\u00e0']'",
+ "\u6071>'['yu\u00e8']'",
+ "\u6072>'[peng]'",
+ "\u6073>'['k\u0115n']'",
+ "\u6074>'['d\u00e9']'",
+ "\u6075>'['hu\u00ec']'",
+ "\u6076>'['\u00e8']'",
+ "\u6078>'['t\u00f2ng']'",
+ "\u6079>'['y\u00e0n']'",
+ "\u607A>'['ka\u012D']'",
+ "\u607B>'['c\u00e8']'",
+ "\u607C>'['na\u014F']'",
+ "\u607D>'['y\u00f9n']'",
+ "\u607E>'['m\u00e1ng']'",
+ "\u607F>'['y\u014Fng']'",
+ "\u6080>'['y\u014Fng']'",
+ "\u6081>'[yuan]'",
+ "\u6082>'[pi]'",
+ "\u6083>'['k\u016Dn']'",
+ "\u6084>'['qia\u014F']'",
+ "\u6085>'['yu\u00e8']'",
+ "\u6086>'['y\u00f9']'",
+ "\u6087>'['y\u00f9']'",
+ "\u6088>'['ji\u00e8']'",
+ "\u6089>'[xi]'",
+ "\u608A>'['zh\u00e9']'",
+ "\u608B>'['l\u00ecn']'",
+ "\u608C>'['t\u00ec']'",
+ "\u608D>'['h\u00e0n']'",
+ "\u608E>'['ha\u00f2']'",
+ "\u608F>'['qi\u00e8']'",
+ "\u6090>'['t\u00ec']'",
+ "\u6091>'['b\u00f9']'",
+ "\u6092>'['y\u00ec']'",
+ "\u6093>'['qi\u00e0n']'",
+ "\u6094>'['hu\u012D']'",
+ "\u6095>'[xi]'",
+ "\u6096>'['be\u00ec']'",
+ "\u6097>'['m\u00e1n']'",
+ "\u6098>'[yi]'",
+ "\u6099>'[heng]'",
+ "\u609A>'['s\u014Fng']'",
+ "\u609B>'[quan]'",
+ "\u609C>'['ch\u0115ng']'",
+ "\u609D>'[hui]'",
+ "\u609E>'['w\u00f9']'",
+ "\u609F>'['w\u00f9']'",
+ "\u60A0>'[you]'",
+ "\u60A1>'['l\u00ed']'",
+ "\u60A2>'['li\u00e0ng']'",
+ "\u60A3>'['hu\u00e0n']'",
+ "\u60A4>'[cong]'",
+ "\u60A5>'['y\u00ec']'",
+ "\u60A6>'['yu\u00e8']'",
+ "\u60A7>'['l\u00ec']'",
+ "\u60A8>'['n\u00edn']'",
+ "\u60A9>'['na\u014F']'",
+ "\u60AA>'['\u00e8']'",
+ "\u60AB>'['qu\u00e8']'",
+ "\u60AC>'['xu\u00e1n']'",
+ "\u60AD>'[qian]'",
+ "\u60AE>'['w\u00f9']'",
+ "\u60AF>'['m\u012Dn']'",
+ "\u60B0>'['c\u00f3ng']'",
+ "\u60B1>'['fe\u012D']'",
+ "\u60B2>'[bei]'",
+ "\u60B3>'['du\u00f3']'",
+ "\u60B4>'['cu\u00ec']'",
+ "\u60B5>'['ch\u00e0ng']'",
+ "\u60B6>'['m\u00e8n']'",
+ "\u60B7>'['l\u00ec']'",
+ "\u60B8>'['j\u00ec']'",
+ "\u60B9>'['gu\u00e0n']'",
+ "\u60BA>'['gu\u00e0n']'",
+ "\u60BB>'['x\u00ecng']'",
+ "\u60BC>'['da\u00f2']'",
+ "\u60BD>'[qi]'",
+ "\u60BE>'[kong]'",
+ "\u60BF>'['ti\u0103n']'",
+ "\u60C0>'['l\u00fan']'",
+ "\u60C1>'[xi]'",
+ "\u60C2>'['k\u0103n']'",
+ "\u60C3>'[kun]'",
+ "\u60C4>'['n\u00ec']'",
+ "\u60C5>'['q\u00edng']'",
+ "\u60C6>'['cho\u00fa']'",
+ "\u60C7>'[dun]'",
+ "\u60C8>'['gu\u014F']'",
+ "\u60C9>'[chan]'",
+ "\u60CA>'['li\u00e1ng']'",
+ "\u60CB>'['w\u0103n']'",
+ "\u60CC>'[yuan]'",
+ "\u60CD>'[jin]'",
+ "\u60CE>'['j\u00ec']'",
+ "\u60CF>'['l\u00edn']'",
+ "\u60D0>'['y\u00f9']'",
+ "\u60D1>'['hu\u00f2']'",
+ "\u60D2>'['h\u00e9']'",
+ "\u60D3>'['qu\u00e1n']'",
+ "\u60D4>'['t\u00e1n']'",
+ "\u60D5>'['t\u00ec']'",
+ "\u60D6>'['t\u00ec']'",
+ "\u60D7>'[nie]'",
+ "\u60D8>'['w\u0103ng']'",
+ "\u60D9>'['chu\u00f2']'",
+ "\u60DA>'[bu]'",
+ "\u60DB>'[hun]'",
+ "\u60DC>'[xi]'",
+ "\u60DD>'['t\u0103ng']'",
+ "\u60DE>'[xin]'",
+ "\u60DF>'['we\u00ed']'",
+ "\u60E0>'['hu\u00ec']'",
+ "\u60E1>'['\u00e8']'",
+ "\u60E2>'['ru\u012D']'",
+ "\u60E3>'['z\u014Fng']'",
+ "\u60E4>'[jian]'",
+ "\u60E5>'['y\u014Fng']'",
+ "\u60E6>'['di\u00e0n']'",
+ "\u60E7>'['j\u00f9']'",
+ "\u60E8>'['c\u0103n']'",
+ "\u60E9>'['ch\u00e9ng']'",
+ "\u60EA>'['d\u00e9']'",
+ "\u60EB>'['be\u00ec']'",
+ "\u60EC>'['qi\u00e8']'",
+ "\u60ED>'['c\u00e1n']'",
+ "\u60EE>'['d\u00e0n']'",
+ "\u60EF>'['gu\u00e0n']'",
+ "\u60F0>'['du\u00f2']'",
+ "\u60F1>'['na\u014F']'",
+ "\u60F2>'['y\u00f9n']'",
+ "\u60F3>'['xi\u0103ng']'",
+ "\u60F4>'['zhu\u00ec']'",
+ "\u60F5>'['di\u00e8']'",
+ "\u60F6>'['hu\u00e1ng']'",
+ "\u60F7>'['ch\u016Dn']'",
+ "\u60F8>'['qi\u00f3ng']'",
+ "\u60F9>'['r\u0115']'",
+ "\u60FA>'[xing]'",
+ "\u60FB>'['c\u00e8']'",
+ "\u60FC>'['bi\u0103n']'",
+ "\u60FD>'[hun]'",
+ "\u60FE>'[zong]'",
+ "\u60FF>'['t\u00ed']'",
+ "\u6100>'['qia\u014F']'",
+ "\u6101>'['cho\u00fa']'",
+ "\u6102>'['be\u00ec']'",
+ "\u6103>'[xuan]'",
+ "\u6104>'[wei]'",
+ "\u6105>'['g\u00e9']'",
+ "\u6106>'[qian]'",
+ "\u6107>'['we\u012D']'",
+ "\u6108>'['y\u00f9']'",
+ "\u6109>'['y\u00fa']'",
+ "\u610A>'['b\u00ec']'",
+ "\u610B>'[xuan]'",
+ "\u610C>'['hu\u00e0n']'",
+ "\u610D>'['m\u012Dn']'",
+ "\u610E>'['b\u00ec']'",
+ "\u610F>'['y\u00ec']'",
+ "\u6110>'['mi\u0103n']'",
+ "\u6111>'['y\u014Fng']'",
+ "\u6112>'['ka\u00ec']'",
+ "\u6113>'['d\u00e0ng']'",
+ "\u6114>'[yin]'",
+ "\u6115>'['\u00e8']'",
+ "\u6116>'['ch\u00e9n']'",
+ "\u6117>'['mo\u00f9']'",
+ "\u6118>'['k\u00e8']'",
+ "\u6119>'['k\u00e8']'",
+ "\u611A>'['y\u00fa']'",
+ "\u611B>'['a\u00ec']'",
+ "\u611C>'['qi\u00e8']'",
+ "\u611D>'['y\u0103n']'",
+ "\u611E>'['nu\u00f2']'",
+ "\u611F>'['g\u0103n']'",
+ "\u6120>'['y\u00f9n']'",
+ "\u6121>'['z\u014Fng']'",
+ "\u6122>'[sai]'",
+ "\u6123>'['l\u00e9ng']'",
+ "\u6124>'['f\u00e8n']'",
+ "\u6126>'['ku\u00ec']'",
+ "\u6127>'['ku\u00ec']'",
+ "\u6128>'['qu\u00e8']'",
+ "\u6129>'[gong]'",
+ "\u612A>'['y\u00fan']'",
+ "\u612B>'['s\u00f9']'",
+ "\u612C>'['s\u00f9']'",
+ "\u612D>'['q\u00ed']'",
+ "\u612E>'['ya\u00f3']'",
+ "\u612F>'['s\u014Fng']'",
+ "\u6130>'['hu\u0103ng']'",
+ "\u6131>'['j\u00ed']'",
+ "\u6132>'['g\u016D']'",
+ "\u6133>'['j\u00f9']'",
+ "\u6134>'['chu\u00e0ng']'",
+ "\u6135>'['n\u00ec']'",
+ "\u6136>'['xi\u00e9']'",
+ "\u6137>'['ka\u012D']'",
+ "\u6138>'['zh\u0115ng']'",
+ "\u6139>'['y\u014Fng']'",
+ "\u613A>'['ca\u014F']'",
+ "\u613B>'['s\u00f9n']'",
+ "\u613C>'['sh\u00e8n']'",
+ "\u613D>'['b\u00f3']'",
+ "\u613E>'['ka\u00ec']'",
+ "\u613F>'['yu\u00e0n']'",
+ "\u6140>'['xi\u00e9']'",
+ "\u6141>'['h\u00f9n']'",
+ "\u6142>'['y\u014Fng']'",
+ "\u6143>'['y\u0103ng']'",
+ "\u6144>'['l\u00ec']'",
+ "\u6145>'[sao]'",
+ "\u6146>'[tao]'",
+ "\u6147>'[yin]'",
+ "\u6148>'['c\u00ed']'",
+ "\u6149>'['x\u00f9']'",
+ "\u614A>'['qi\u00e0n']'",
+ "\u614B>'['ta\u00ec']'",
+ "\u614C>'[huang]'",
+ "\u614D>'['y\u00f9n']'",
+ "\u614E>'['sh\u00e8n']'",
+ "\u614F>'['m\u012Dng']'",
+ "\u6151>'['sh\u00e8']'",
+ "\u6152>'['c\u00f3ng']'",
+ "\u6153>'['pia\u00f2']'",
+ "\u6154>'['m\u00f2']'",
+ "\u6155>'['m\u00f9']'",
+ "\u6156>'['gu\u00f3']'",
+ "\u6157>'['ch\u00ec']'",
+ "\u6158>'['c\u0103n']'",
+ "\u6159>'['c\u00e1n']'",
+ "\u615A>'['c\u00e1n']'",
+ "\u615B>'['cu\u00ed']'",
+ "\u615C>'['m\u012Dn']'",
+ "\u615D>'['t\u00e8']'",
+ "\u615E>'[zhang]'",
+ "\u615F>'['t\u00f2ng']'",
+ "\u6160>'['a\u00f2']'",
+ "\u6161>'['shu\u0103ng']'",
+ "\u6162>'['m\u00e0n']'",
+ "\u6163>'['gu\u00e0n']'",
+ "\u6164>'['qu\u00e8']'",
+ "\u6165>'['za\u00f2']'",
+ "\u6166>'['ji\u00f9']'",
+ "\u6167>'['hu\u00ec']'",
+ "\u6168>'['ka\u012D']'",
+ "\u6169>'['li\u00e1n']'",
+ "\u616A>'['o\u00f9']'",
+ "\u616B>'['s\u014Fng']'",
+ "\u616C>'['j\u012Dn']'",
+ "\u616D>'['y\u00ecn']'",
+ "\u616E>'['l\u01DC']'",
+ "\u616F>'[shang]'",
+ "\u6170>'['we\u00ec']'",
+ "\u6171>'['tu\u00e1n']'",
+ "\u6172>'['m\u00e1n']'",
+ "\u6173>'[qian]'",
+ "\u6174>'['sh\u00e8']'",
+ "\u6175>'[yong]'",
+ "\u6176>'['q\u00ecng']'",
+ "\u6177>'[kang]'",
+ "\u6178>'['d\u00ec']'",
+ "\u6179>'['zh\u00ed']'",
+ "\u617A>'['lo\u00fa']'",
+ "\u617B>'['ju\u00e0n']'",
+ "\u617C>'[qi]'",
+ "\u617D>'[qi]'",
+ "\u617E>'['y\u00f9']'",
+ "\u617F>'['p\u00edng']'",
+ "\u6180>'['lia\u00f3']'",
+ "\u6181>'[cong]'",
+ "\u6182>'[you]'",
+ "\u6183>'[chong]'",
+ "\u6184>'['zh\u00ec']'",
+ "\u6185>'['t\u00f2ng']'",
+ "\u6186>'[cheng]'",
+ "\u6187>'['q\u00ec']'",
+ "\u6188>'[qu]'",
+ "\u6189>'['p\u00e9ng']'",
+ "\u618A>'['be\u00ec']'",
+ "\u618B>'[bie]'",
+ "\u618C>'['ch\u00fan']'",
+ "\u618D>'[jiao]'",
+ "\u618E>'[zeng]'",
+ "\u618F>'['ch\u00ec']'",
+ "\u6190>'['li\u00e1n']'",
+ "\u6191>'['p\u00edng']'",
+ "\u6192>'['ku\u00ec']'",
+ "\u6193>'['hu\u00ec']'",
+ "\u6194>'['qia\u00f3']'",
+ "\u6195>'['ch\u00e9ng']'",
+ "\u6196>'['y\u00ecn']'",
+ "\u6197>'['y\u00ecn']'",
+ "\u6198>'['x\u012D']'",
+ "\u6199>'['x\u012D']'",
+ "\u619A>'['d\u00e0n']'",
+ "\u619B>'['t\u00e1n']'",
+ "\u619C>'['du\u014F']'",
+ "\u619D>'['du\u00ec']'",
+ "\u619E>'['du\u00ec']'",
+ "\u619F>'['s\u00f9']'",
+ "\u61A0>'['ju\u00e9']'",
+ "\u61A1>'['c\u00e8']'",
+ "\u61A2>'[xiao]'",
+ "\u61A3>'['f\u00e1n']'",
+ "\u61A4>'['f\u00e8n']'",
+ "\u61A5>'['la\u00f3']'",
+ "\u61A6>'['la\u00f2']'",
+ "\u61A7>'[chong]'",
+ "\u61A8>'[han]'",
+ "\u61A9>'['q\u00ec']'",
+ "\u61AA>'['xi\u00e1n']'",
+ "\u61AB>'['m\u012Dn']'",
+ "\u61AC>'['j\u012Dng']'",
+ "\u61AD>'['lia\u014F']'",
+ "\u61AE>'['w\u016D']'",
+ "\u61AF>'['c\u0103n']'",
+ "\u61B0>'['ju\u00e9']'",
+ "\u61B1>'['c\u00f9']'",
+ "\u61B2>'['xi\u00e0n']'",
+ "\u61B3>'['t\u0103n']'",
+ "\u61B4>'['sh\u00e9ng']'",
+ "\u61B5>'[pi]'",
+ "\u61B6>'['y\u00ec']'",
+ "\u61B7>'['ch\u016D']'",
+ "\u61B8>'[xian]'",
+ "\u61B9>'['na\u00f3']'",
+ "\u61BA>'['d\u00e0n']'",
+ "\u61BB>'['t\u0103n']'",
+ "\u61BC>'['j\u012Dng']'",
+ "\u61BD>'[song]'",
+ "\u61BE>'['h\u00e0n']'",
+ "\u61BF>'[jiao]'",
+ "\u61C0>'['wa\u00ec']'",
+ "\u61C1>'['hu\u00e1n']'",
+ "\u61C2>'['d\u014Fng']'",
+ "\u61C3>'['q\u00edn']'",
+ "\u61C4>'['q\u00edn']'",
+ "\u61C5>'['q\u00fa']'",
+ "\u61C6>'['ca\u014F']'",
+ "\u61C7>'['k\u0115n']'",
+ "\u61C8>'['xi\u00e8']'",
+ "\u61C9>'['y\u00ecng']'",
+ "\u61CA>'['a\u00f2']'",
+ "\u61CB>'['ma\u00f2']'",
+ "\u61CC>'['y\u00ec']'",
+ "\u61CD>'['l\u012Dn']'",
+ "\u61CE>'['s\u00e8']'",
+ "\u61CF>'['j\u00f9n']'",
+ "\u61D0>'['hua\u00ed']'",
+ "\u61D1>'['m\u00e8n']'",
+ "\u61D2>'['l\u0103n']'",
+ "\u61D3>'['a\u00ec']'",
+ "\u61D4>'['l\u012Dn']'",
+ "\u61D5>'[yan]'",
+ "\u61D6>'[gua]'",
+ "\u61D7>'['xi\u00e0']'",
+ "\u61D8>'['ch\u00ec']'",
+ "\u61D9>'['y\u016D']'",
+ "\u61DA>'['y\u00ecn']'",
+ "\u61DB>'[dai]'",
+ "\u61DC>'['m\u00e8ng']'",
+ "\u61DD>'['a\u00ec']'",
+ "\u61DE>'['m\u00e9ng']'",
+ "\u61DF>'['du\u00ec']'",
+ "\u61E0>'['q\u00ed']'",
+ "\u61E1>'['m\u014F']'",
+ "\u61E2>'['l\u00e1n']'",
+ "\u61E3>'['m\u00e8n']'",
+ "\u61E4>'['cho\u00fa']'",
+ "\u61E5>'['zh\u00ec']'",
+ "\u61E6>'['nu\u00f2']'",
+ "\u61E7>'['nu\u00f2']'",
+ "\u61E8>'[yan]'",
+ "\u61E9>'['y\u0103ng']'",
+ "\u61EA>'['b\u00f3']'",
+ "\u61EB>'['zh\u00ed']'",
+ "\u61EC>'['ku\u00e0ng']'",
+ "\u61ED>'['ku\u00e0ng']'",
+ "\u61EE>'['yo\u016D']'",
+ "\u61EF>'[fu]'",
+ "\u61F0>'['li\u00fa']'",
+ "\u61F1>'['mi\u00e8']'",
+ "\u61F2>'['ch\u00e9ng']'",
+ "\u61F4>'['ch\u00e0n']'",
+ "\u61F5>'['m\u00e9ng']'",
+ "\u61F6>'['l\u0103n']'",
+ "\u61F7>'['hua\u00ed']'",
+ "\u61F8>'['xu\u00e1n']'",
+ "\u61F9>'['r\u00e0ng']'",
+ "\u61FA>'['ch\u00e0n']'",
+ "\u61FB>'['j\u00ec']'",
+ "\u61FC>'['j\u00f9']'",
+ "\u61FD>'[huan]'",
+ "\u61FE>'['sh\u00e8']'",
+ "\u61FF>'['y\u00ec']'",
+ "\u6200>'['li\u00e0n']'",
+ "\u6201>'['n\u0103n']'",
+ "\u6202>'['m\u00ed']'",
+ "\u6203>'['t\u0103ng']'",
+ "\u6204>'['ju\u00e9']'",
+ "\u6205>'['g\u00e0ng']'",
+ "\u6206>'['g\u00e0ng']'",
+ "\u6207>'['g\u00e0ng']'",
+ "\u6208>'[ge]'",
+ "\u6209>'['yu\u00e8']'",
+ "\u620A>'['w\u00f9']'",
+ "\u620B>'[jian]'",
+ "\u620C>'[xu]'",
+ "\u620D>'['sh\u00f9']'",
+ "\u620E>'['r\u00f3ng']'",
+ "\u620F>'['x\u00ec']'",
+ "\u6210>'['ch\u00e9ng']'",
+ "\u6211>'['w\u014F']'",
+ "\u6212>'['ji\u00e8']'",
+ "\u6213>'[ge]'",
+ "\u6214>'[jian]'",
+ "\u6215>'[qiang]'",
+ "\u6216>'['hu\u00f2']'",
+ "\u6217>'[qiang]'",
+ "\u6218>'['zh\u00e0n']'",
+ "\u6219>'['d\u00f2ng']'",
+ "\u621A>'[qi]'",
+ "\u621B>'['ji\u00e1']'",
+ "\u621C>'['di\u00e9']'",
+ "\u621D>'['ze\u00ed']'",
+ "\u621E>'['ji\u00e1']'",
+ "\u621F>'['j\u012D']'",
+ "\u6220>'['sh\u00ec']'",
+ "\u6221>'[kan]'",
+ "\u6222>'['j\u00ed']'",
+ "\u6223>'['ku\u00ed']'",
+ "\u6224>'['ga\u00ec']'",
+ "\u6225>'['d\u0115ng']'",
+ "\u6226>'['zh\u00e0n']'",
+ "\u6227>'[chuang]'",
+ "\u6228>'[ge]'",
+ "\u6229>'['ji\u0103n']'",
+ "\u622A>'['ji\u00e9']'",
+ "\u622B>'['y\u00f9']'",
+ "\u622C>'['ji\u0103n']'",
+ "\u622D>'['y\u0103n']'",
+ "\u622E>'['l\u00f9']'",
+ "\u622F>'['x\u00ec']'",
+ "\u6230>'['zh\u00e0n']'",
+ "\u6231>'['x\u00ec']'",
+ "\u6232>'['x\u00ec']'",
+ "\u6233>'[chuo]'",
+ "\u6234>'['da\u00ec']'",
+ "\u6235>'['q\u00fa']'",
+ "\u6236>'['h\u00f9']'",
+ "\u6237>'['h\u00f9']'",
+ "\u6238>'['h\u00f9']'",
+ "\u6239>'['\u00e8']'",
+ "\u623A>'['sh\u00ec']'",
+ "\u623B>'['l\u00ec']'",
+ "\u623C>'['ma\u014F']'",
+ "\u623D>'['h\u00f9']'",
+ "\u623E>'['l\u00ec']'",
+ "\u623F>'['f\u00e1ng']'",
+ "\u6240>'['su\u014F']'",
+ "\u6241>'['bi\u0103n']'",
+ "\u6242>'['di\u00e0n']'",
+ "\u6243>'[jiong]'",
+ "\u6244>'['sh\u0103ng']'",
+ "\u6245>'['y\u00ed']'",
+ "\u6246>'['y\u012D']'",
+ "\u6247>'['sh\u00e0n']'",
+ "\u6248>'['h\u00f9']'",
+ "\u6249>'[fei]'",
+ "\u624A>'['y\u0103n']'",
+ "\u624B>'['sho\u016D']'",
+ "\u624C>'['t1shou3p\u00e1ng']'",
+ "\u624D>'['ca\u00ed']'",
+ "\u624E>'[zha]'",
+ "\u624F>'['qi\u00fa']'",
+ "\u6250>'['l\u00e8']'",
+ "\u6251>'[bu]'",
+ "\u6252>'[ba]'",
+ "\u6253>'['d\u0103']'",
+ "\u6254>'[reng]'",
+ "\u6255>'['f\u00fa']'",
+ "\u6257>'['za\u00ec']'",
+ "\u6258>'[tuo]'",
+ "\u6259>'['zh\u00e0ng']'",
+ "\u625A>'[diao]'",
+ "\u625B>'['k\u00e1ng']'",
+ "\u625C>'[yu]'",
+ "\u625D>'[ku]'",
+ "\u625E>'['h\u00e0n']'",
+ "\u625F>'[shen]'",
+ "\u6260>'[cha]'",
+ "\u6261>'['y\u012D']'",
+ "\u6262>'['g\u016D']'",
+ "\u6263>'['ko\u00f9']'",
+ "\u6264>'['w\u00f9']'",
+ "\u6265>'[tuo]'",
+ "\u6266>'[qian]'",
+ "\u6267>'['zh\u00ed']'",
+ "\u6268>'['r\u00e8n']'",
+ "\u6269>'['ku\u00f2']'",
+ "\u626A>'['m\u00e9n']'",
+ "\u626B>'['sa\u014F']'",
+ "\u626C>'['y\u00e1ng']'",
+ "\u626D>'['ni\u016D']'",
+ "\u626E>'['b\u00e0n']'",
+ "\u626F>'['ch\u0115']'",
+ "\u6270>'['ra\u014F']'",
+ "\u6271>'[xi]'",
+ "\u6272>'['qi\u00e1n']'",
+ "\u6273>'[ban]'",
+ "\u6274>'['ji\u00e1']'",
+ "\u6275>'['y\u00fa']'",
+ "\u6276>'['f\u00fa']'",
+ "\u6277>'['a\u00f2']'",
+ "\u6278>'[xi]'",
+ "\u6279>'[pi]'",
+ "\u627A>'['zh\u012D']'",
+ "\u627B>'['z\u00ec']'",
+ "\u627C>'['\u00e8']'",
+ "\u627D>'['d\u00f9n']'",
+ "\u627E>'['zha\u014F']'",
+ "\u627F>'['ch\u00e9ng']'",
+ "\u6280>'['j\u00ec']'",
+ "\u6281>'['y\u0103n']'",
+ "\u6282>'['ku\u00e1ng']'",
+ "\u6283>'['bi\u00e0n']'",
+ "\u6284>'[chao]'",
+ "\u6285>'[ju]'",
+ "\u6286>'['w\u00e8n']'",
+ "\u6287>'['h\u00fa']'",
+ "\u6288>'['yu\u00e8']'",
+ "\u6289>'['ju\u00e9']'",
+ "\u628A>'['b\u0103']'",
+ "\u628B>'['q\u00ecn']'",
+ "\u628C>'['zh\u0115n']'",
+ "\u628D>'['zh\u0115ng']'",
+ "\u628E>'['y\u016Dn']'",
+ "\u628F>'['w\u00e1n']'",
+ "\u6290>'['n\u00f9']'",
+ "\u6291>'['y\u00ec']'",
+ "\u6292>'[shu]'",
+ "\u6293>'[zhua]'",
+ "\u6294>'['po\u00fa']'",
+ "\u6295>'['to\u00fa']'",
+ "\u6296>'['do\u016D']'",
+ "\u6297>'['k\u00e0ng']'",
+ "\u6298>'['zh\u00e9']'",
+ "\u6299>'['po\u00fa']'",
+ "\u629A>'['f\u016D']'",
+ "\u629B>'[pao]'",
+ "\u629C>'['b\u00e1']'",
+ "\u629D>'['a\u014F']'",
+ "\u629E>'['z\u00e9']'",
+ "\u629F>'['tu\u00e1n']'",
+ "\u62A0>'[kou]'",
+ "\u62A1>'['l\u00fan']'",
+ "\u62A2>'['qi\u0103ng']'",
+ "\u62A4>'['h\u00f9']'",
+ "\u62A5>'['ba\u00f2']'",
+ "\u62A6>'['b\u012Dng']'",
+ "\u62A7>'['zh\u012D']'",
+ "\u62A8>'[peng]'",
+ "\u62A9>'[tan]'",
+ "\u62AA>'[pu]'",
+ "\u62AB>'[pi]'",
+ "\u62AC>'['ta\u00ed']'",
+ "\u62AD>'['ya\u014F']'",
+ "\u62AE>'['zh\u0115n']'",
+ "\u62AF>'[zha]'",
+ "\u62B0>'['y\u0103ng']'",
+ "\u62B1>'['ba\u00f2']'",
+ "\u62B2>'[he]'",
+ "\u62B3>'['n\u012D']'",
+ "\u62B4>'['y\u00ec']'",
+ "\u62B5>'['d\u012D']'",
+ "\u62B6>'['ch\u00ec']'",
+ "\u62B7>'[pi]'",
+ "\u62B8>'[za]'",
+ "\u62B9>'['m\u014F']'",
+ "\u62BA>'['m\u014F']'",
+ "\u62BB>'['sh\u00e8n']'",
+ "\u62BC>'[ya]'",
+ "\u62BD>'[chou]'",
+ "\u62BE>'[qu]'",
+ "\u62BF>'['m\u012Dn']'",
+ "\u62C0>'['ch\u00f9']'",
+ "\u62C1>'[jia]'",
+ "\u62C2>'['f\u00fa']'",
+ "\u62C3>'['zh\u0103n']'",
+ "\u62C4>'['zh\u016D']'",
+ "\u62C5>'['d\u00e0n']'",
+ "\u62C6>'[chai]'",
+ "\u62C7>'['m\u016D']'",
+ "\u62C8>'['ni\u00e1n']'",
+ "\u62C9>'[la]'",
+ "\u62CA>'['f\u016D']'",
+ "\u62CB>'[pao]'",
+ "\u62CC>'['b\u00e0n']'",
+ "\u62CD>'[pai]'",
+ "\u62CE>'[ling]'",
+ "\u62CF>'['n\u00e1']'",
+ "\u62D0>'['gua\u012D']'",
+ "\u62D1>'['qi\u00e1n']'",
+ "\u62D2>'['j\u00f9']'",
+ "\u62D3>'['tu\u00f2']'",
+ "\u62D4>'['b\u00e1']'",
+ "\u62D5>'[tuo]'",
+ "\u62D6>'[tuo]'",
+ "\u62D7>'['a\u014F']'",
+ "\u62D8>'[ju]'",
+ "\u62D9>'['zhu\u00f3']'",
+ "\u62DA>'['p\u00e0n']'",
+ "\u62DB>'[zhao]'",
+ "\u62DC>'['ba\u00ec']'",
+ "\u62DD>'['ba\u00ec']'",
+ "\u62DE>'['d\u012D']'",
+ "\u62DF>'['n\u012D']'",
+ "\u62E0>'['j\u00f9']'",
+ "\u62E1>'['ku\u00f2']'",
+ "\u62E2>'['l\u014Fng']'",
+ "\u62E3>'['ji\u0103n']'",
+ "\u62E5>'['y\u014Fng']'",
+ "\u62E6>'['l\u00e1n']'",
+ "\u62E7>'['n\u00edng']'",
+ "\u62E8>'[bo]'",
+ "\u62E9>'['z\u00e9']'",
+ "\u62EA>'[qian]'",
+ "\u62EB>'['h\u00e9n']'",
+ "\u62EC>'[gua]'",
+ "\u62ED>'['sh\u00ec']'",
+ "\u62EE>'['ji\u00e9']'",
+ "\u62EF>'['zh\u0115ng']'",
+ "\u62F0>'['n\u012Dn']'",
+ "\u62F1>'['g\u014Fng']'",
+ "\u62F2>'['g\u014Fng']'",
+ "\u62F3>'['qu\u00e1n']'",
+ "\u62F4>'[shuan]'",
+ "\u62F5>'['c\u00fan']'",
+ "\u62F6>'['z\u0103n']'",
+ "\u62F7>'['ka\u014F']'",
+ "\u62F8>'['ch\u012D']'",
+ "\u62F9>'['xi\u00e9']'",
+ "\u62FA>'['c\u00e8']'",
+ "\u62FB>'[hui]'",
+ "\u62FC>'[pin]'",
+ "\u62FD>'[zhuai]'",
+ "\u62FE>'['sh\u00ed']'",
+ "\u62FF>'['n\u00e1']'",
+ "\u6300>'['b\u00f2']'",
+ "\u6301>'['ch\u00ed']'",
+ "\u6302>'['gu\u00e0']'",
+ "\u6303>'['zh\u00ec']'",
+ "\u6304>'['ku\u00f2']'",
+ "\u6305>'['du\u014F']'",
+ "\u6306>'['du\u014F']'",
+ "\u6307>'['zh\u012D']'",
+ "\u6308>'['qi\u00e8']'",
+ "\u6309>'['\u00e0n']'",
+ "\u630A>'['n\u00f2ng']'",
+ "\u630B>'['zh\u00e8n']'",
+ "\u630C>'['g\u00e9']'",
+ "\u630D>'['jia\u00f2']'",
+ "\u630E>'[ku]'",
+ "\u630F>'['d\u00f2ng']'",
+ "\u6310>'['r\u00fa']'",
+ "\u6311>'[tiao]'",
+ "\u6312>'['li\u00e8']'",
+ "\u6313>'[zha]'",
+ "\u6314>'['l\u01DA']'",
+ "\u6315>'['di\u00e9']'",
+ "\u6316>'[wa]'",
+ "\u6317>'['ju\u00e9']'",
+ "\u6319>'['j\u016D']'",
+ "\u631A>'['zh\u00ec']'",
+ "\u631B>'['lu\u00e1n']'",
+ "\u631C>'['y\u00e0']'",
+ "\u631D>'[zhua]'",
+ "\u631E>'['t\u00e0']'",
+ "\u631F>'['xi\u00e9']'",
+ "\u6320>'['na\u00f3']'",
+ "\u6321>'['d\u0103ng']'",
+ "\u6322>'['jia\u014F']'",
+ "\u6323>'[zheng]'",
+ "\u6324>'['j\u012D']'",
+ "\u6325>'[hui]'",
+ "\u6326>'['x\u00fan']'",
+ "\u6328>'[ai]'",
+ "\u6329>'[tuo]'",
+ "\u632A>'['nu\u00f3']'",
+ "\u632B>'['cu\u00f2']'",
+ "\u632C>'['b\u00f3']'",
+ "\u632D>'['g\u0115ng']'",
+ "\u632E>'['t\u012D']'",
+ "\u632F>'['zh\u00e8n']'",
+ "\u6330>'['ch\u00e9ng']'",
+ "\u6331>'[suo]'",
+ "\u6332>'[suo]'",
+ "\u6333>'[keng]'",
+ "\u6334>'['me\u012D']'",
+ "\u6335>'['l\u00f2ng']'",
+ "\u6336>'['j\u00fa']'",
+ "\u6337>'['p\u00e9ng']'",
+ "\u6338>'['ji\u0103n']'",
+ "\u6339>'['y\u00ec']'",
+ "\u633A>'['t\u012Dng']'",
+ "\u633B>'[shan]'",
+ "\u633C>'['nu\u00f2']'",
+ "\u633D>'['w\u0103n']'",
+ "\u633E>'['xi\u00e9']'",
+ "\u633F>'[cha]'",
+ "\u6340>'[feng]'",
+ "\u6341>'['jia\u014F']'",
+ "\u6342>'['w\u016D']'",
+ "\u6343>'['j\u00f9n']'",
+ "\u6344>'['ji\u00f9']'",
+ "\u6345>'['t\u014Fng']'",
+ "\u6346>'['k\u016Dn']'",
+ "\u6347>'['hu\u00f2']'",
+ "\u6348>'['t\u00fa']'",
+ "\u6349>'[zhuo]'",
+ "\u634A>'['po\u00fa']'",
+ "\u634B>'['l\u00e8']'",
+ "\u634C>'[ba]'",
+ "\u634D>'['h\u00e0n']'",
+ "\u634E>'[shao]'",
+ "\u634F>'[nie]'",
+ "\u6350>'[juan]'",
+ "\u6351>'['z\u00e9']'",
+ "\u6352>'['s\u014Fng']'",
+ "\u6353>'['y\u00e9']'",
+ "\u6354>'['ju\u00e9']'",
+ "\u6355>'['b\u016D']'",
+ "\u6356>'['hu\u00e1n']'",
+ "\u6357>'['b\u00f9']'",
+ "\u6358>'['z\u00f9n']'",
+ "\u6359>'['y\u00ec']'",
+ "\u635A>'[zhai]'",
+ "\u635B>'['l\u01DA']'",
+ "\u635C>'[sou]'",
+ "\u635D>'[tuo]'",
+ "\u635E>'[lao]'",
+ "\u635F>'['s\u016Dn']'",
+ "\u6360>'[bang]'",
+ "\u6361>'['ji\u0103n']'",
+ "\u6362>'['hu\u00e0n']'",
+ "\u6363>'['da\u014F']'",
+ "\u6365>'['w\u00e0n']'",
+ "\u6366>'['q\u00edn']'",
+ "\u6367>'['p\u0115ng']'",
+ "\u6368>'['sh\u0115']'",
+ "\u6369>'['li\u00e8']'",
+ "\u636A>'['m\u00edn']'",
+ "\u636B>'['m\u00e9n']'",
+ "\u636C>'['f\u016D']'",
+ "\u636D>'['ba\u012D']'",
+ "\u636E>'['j\u00f9']'",
+ "\u636F>'['da\u014F']'",
+ "\u6370>'['w\u014F']'",
+ "\u6371>'['a\u00ed']'",
+ "\u6372>'['ju\u0103n']'",
+ "\u6373>'['yu\u00e8']'",
+ "\u6374>'['z\u014Fng']'",
+ "\u6375>'['ch\u0115n']'",
+ "\u6376>'['chu\u00ed']'",
+ "\u6377>'['ji\u00e9']'",
+ "\u6378>'[tu]'",
+ "\u6379>'['b\u00e8n']'",
+ "\u637A>'['n\u00e0']'",
+ "\u637B>'['ni\u0103n']'",
+ "\u637C>'['nu\u00f3']'",
+ "\u637D>'['z\u00fa']'",
+ "\u637E>'['w\u00f2']'",
+ "\u637F>'[xi]'",
+ "\u6380>'[xian]'",
+ "\u6381>'['ch\u00e9ng']'",
+ "\u6382>'[dian]'",
+ "\u6383>'['sa\u014F']'",
+ "\u6384>'['l\u00fan']'",
+ "\u6385>'['q\u00ecng']'",
+ "\u6386>'[gang]'",
+ "\u6387>'['du\u00f3']'",
+ "\u6388>'['sho\u00f9']'",
+ "\u6389>'['dia\u00f2']'",
+ "\u638A>'['po\u00fa']'",
+ "\u638B>'['d\u012D']'",
+ "\u638C>'['zh\u0103ng']'",
+ "\u638D>'['g\u016Dn']'",
+ "\u638E>'['j\u012D']'",
+ "\u638F>'[tao]'",
+ "\u6390>'[qia]'",
+ "\u6391>'['q\u00ed']'",
+ "\u6392>'['pa\u00ed']'",
+ "\u6393>'['sh\u00fa']'",
+ "\u6394>'[qian]'",
+ "\u6395>'['l\u00ecng']'",
+ "\u6396>'['y\u00ec']'",
+ "\u6397>'['y\u00e0']'",
+ "\u6398>'['ju\u00e9']'",
+ "\u6399>'[zheng]'",
+ "\u639A>'['li\u0103ng']'",
+ "\u639B>'['gu\u00e0']'",
+ "\u639C>'['y\u012D']'",
+ "\u639D>'['hu\u00f2']'",
+ "\u639E>'['sh\u00e0n']'",
+ "\u639F>'['zh\u0115ng']'",
+ "\u63A0>'['l\u00fc\u00e8']'",
+ "\u63A1>'['ca\u012D']'",
+ "\u63A2>'['t\u00e0n']'",
+ "\u63A3>'['ch\u00e8']'",
+ "\u63A4>'[bing]'",
+ "\u63A5>'[jie]'",
+ "\u63A6>'['t\u00ec']'",
+ "\u63A7>'['k\u00f2ng']'",
+ "\u63A8>'[tui]'",
+ "\u63A9>'['y\u0103n']'",
+ "\u63AA>'['cu\u00f2']'",
+ "\u63AB>'[zou]'",
+ "\u63AC>'['j\u00fa']'",
+ "\u63AD>'['ti\u00e0n']'",
+ "\u63AE>'['qi\u00e1n']'",
+ "\u63AF>'['k\u00e8n']'",
+ "\u63B0>'[bai]'",
+ "\u63B1>'['sho\u016D']'",
+ "\u63B2>'[jie]'",
+ "\u63B3>'['l\u016D']'",
+ "\u63B4>'['gu\u00f3']'",
+ "\u63B7>'['zh\u00ed']'",
+ "\u63B8>'['d\u0103n']'",
+ "\u63BA>'[xian]'",
+ "\u63BB>'[sao]'",
+ "\u63BC>'['gu\u00e0n']'",
+ "\u63BD>'['p\u00e8ng']'",
+ "\u63BE>'['yu\u00e0n']'",
+ "\u63BF>'['nu\u00f2']'",
+ "\u63C0>'['ji\u0103n']'",
+ "\u63C1>'[zhen]'",
+ "\u63C2>'[jiu]'",
+ "\u63C3>'[jian]'",
+ "\u63C4>'['y\u00fa']'",
+ "\u63C5>'['y\u00e1n']'",
+ "\u63C6>'['ku\u00ed']'",
+ "\u63C7>'['n\u0103n']'",
+ "\u63C8>'[hong]'",
+ "\u63C9>'['ro\u00fa']'",
+ "\u63CA>'['p\u00ec']'",
+ "\u63CB>'[wei]'",
+ "\u63CC>'[sai]'",
+ "\u63CD>'['zo\u00f9']'",
+ "\u63CE>'[xuan]'",
+ "\u63CF>'['mia\u00f3']'",
+ "\u63D0>'['t\u00ed']'",
+ "\u63D1>'[nie]'",
+ "\u63D2>'[cha]'",
+ "\u63D3>'['sh\u00ec']'",
+ "\u63D4>'['z\u014Fng']'",
+ "\u63D5>'['zh\u00e8n']'",
+ "\u63D6>'[yi]'",
+ "\u63D7>'['sh\u016Dn']'",
+ "\u63D8>'['h\u00e9ng']'",
+ "\u63D9>'['bi\u00e0n']'",
+ "\u63DA>'['y\u00e1ng']'",
+ "\u63DB>'['hu\u00e0n']'",
+ "\u63DC>'['y\u0103n']'",
+ "\u63DD>'['zu\u00e0n']'",
+ "\u63DE>'['\u0103n']'",
+ "\u63DF>'[xu]'",
+ "\u63E0>'['y\u00e0']'",
+ "\u63E1>'['w\u00f2']'",
+ "\u63E2>'['k\u00e8']'",
+ "\u63E3>'['chua\u012D']'",
+ "\u63E4>'['j\u00ed']'",
+ "\u63E5>'['t\u00ec']'",
+ "\u63E6>'['l\u00e1']'",
+ "\u63E7>'['l\u00e0']'",
+ "\u63E8>'['ch\u00e9ng']'",
+ "\u63E9>'[kai]'",
+ "\u63EA>'[jiu]'",
+ "\u63EB>'[jiu]'",
+ "\u63EC>'['t\u00fa']'",
+ "\u63ED>'[jie]'",
+ "\u63EE>'[hui]'",
+ "\u63EF>'[geng]'",
+ "\u63F0>'['ch\u00f2ng']'",
+ "\u63F1>'['shu\u00f2']'",
+ "\u63F2>'['sh\u00e9']'",
+ "\u63F3>'['xi\u00e8']'",
+ "\u63F4>'['yu\u00e1n']'",
+ "\u63F5>'['qi\u00e1n']'",
+ "\u63F6>'['y\u00e9']'",
+ "\u63F7>'[cha]'",
+ "\u63F8>'[zha]'",
+ "\u63F9>'[bei]'",
+ "\u63FA>'['ya\u00f3']'",
+ "\u63FD>'['l\u0103n']'",
+ "\u63FE>'['w\u00e8n']'",
+ "\u63FF>'['q\u00ecn']'",
+ "\u6400>'[chan]'",
+ "\u6401>'[ge]'",
+ "\u6402>'['lo\u016D']'",
+ "\u6403>'['z\u014Fng']'",
+ "\u6404>'[geng]'",
+ "\u6405>'['jia\u014F']'",
+ "\u6406>'['go\u00f9']'",
+ "\u6407>'['q\u00ecn']'",
+ "\u6408>'['y\u014Fng']'",
+ "\u6409>'['qu\u00e8']'",
+ "\u640A>'[chou]'",
+ "\u640B>'['ch\u012D']'",
+ "\u640C>'['zh\u0103n']'",
+ "\u640D>'['s\u016Dn']'",
+ "\u640E>'[sun]'",
+ "\u640F>'['b\u00f3']'",
+ "\u6410>'['ch\u00f9']'",
+ "\u6411>'['r\u014Fng']'",
+ "\u6412>'['b\u00e8ng']'",
+ "\u6413>'[cuo]'",
+ "\u6414>'[sao]'",
+ "\u6415>'['k\u00e8']'",
+ "\u6416>'['ya\u00f3']'",
+ "\u6417>'['da\u014F']'",
+ "\u6418>'[zhi]'",
+ "\u6419>'['n\u00f9']'",
+ "\u641A>'['xi\u00e9']'",
+ "\u641B>'[jian]'",
+ "\u641C>'[sou]'",
+ "\u641D>'['qi\u016D']'",
+ "\u641E>'['ga\u014F']'",
+ "\u641F>'['xi\u0103n']'",
+ "\u6420>'['shu\u00f2']'",
+ "\u6421>'['s\u0103ng']'",
+ "\u6422>'['j\u00ecn']'",
+ "\u6423>'['mi\u00e8']'",
+ "\u6424>'['\u00e8']'",
+ "\u6425>'['chu\u00ed']'",
+ "\u6426>'['nu\u00f2']'",
+ "\u6427>'[shan]'",
+ "\u6428>'['t\u00e0']'",
+ "\u6429>'['ji\u00e9']'",
+ "\u642A>'['t\u00e1ng']'",
+ "\u642B>'['p\u00e1n']'",
+ "\u642C>'[ban]'",
+ "\u642D>'[da]'",
+ "\u642E>'['l\u00ec']'",
+ "\u642F>'[tao]'",
+ "\u6430>'['h\u00fa']'",
+ "\u6431>'['zh\u00ec']'",
+ "\u6432>'[wa]'",
+ "\u6433>'['xi\u00e1']'",
+ "\u6434>'[qian]'",
+ "\u6435>'['w\u00e8n']'",
+ "\u6436>'['qi\u0103ng']'",
+ "\u6437>'['ti\u00e1n']'",
+ "\u6438>'[zhen]'",
+ "\u6439>'['\u00e8']'",
+ "\u643A>'[xi]'",
+ "\u643B>'['nu\u00f2']'",
+ "\u643C>'['qu\u00e1n']'",
+ "\u643D>'['ch\u00e1']'",
+ "\u643E>'['zh\u00e0']'",
+ "\u643F>'['g\u00e9']'",
+ "\u6440>'['w\u016D']'",
+ "\u6441>'['\u00e8n']'",
+ "\u6442>'['sh\u00e8']'",
+ "\u6443>'['k\u00e1ng']'",
+ "\u6444>'['sh\u00e8']'",
+ "\u6445>'[shu]'",
+ "\u6446>'['ba\u012D']'",
+ "\u6447>'['ya\u00f3']'",
+ "\u6448>'['b\u00ecn']'",
+ "\u6449>'[sou]'",
+ "\u644A>'[tan]'",
+ "\u644B>'['s\u00e0']'",
+ "\u644C>'['ch\u0103n']'",
+ "\u644D>'[suo]'",
+ "\u644E>'['lia\u00f3']'",
+ "\u644F>'[chong]'",
+ "\u6450>'[chuang]'",
+ "\u6451>'['gu\u00f3']'",
+ "\u6452>'['b\u00ecng']'",
+ "\u6453>'['f\u00e9ng']'",
+ "\u6454>'[shuai]'",
+ "\u6455>'['d\u00ec']'",
+ "\u6456>'['q\u00ec']'",
+ "\u6457>'[SOU]'",
+ "\u6458>'[zhai]'",
+ "\u6459>'['li\u0103n']'",
+ "\u645A>'['t\u00e1ng']'",
+ "\u645B>'[chi]'",
+ "\u645C>'['gu\u00e0n']'",
+ "\u645D>'['l\u00f9']'",
+ "\u645E>'['lu\u00f3']'",
+ "\u645F>'['lo\u016D']'",
+ "\u6460>'['z\u014Fng']'",
+ "\u6461>'['ga\u00ec']'",
+ "\u6462>'['h\u00f9']'",
+ "\u6463>'[zha]'",
+ "\u6464>'['chu\u0103ng']'",
+ "\u6465>'['t\u00e0ng']'",
+ "\u6466>'['hu\u00e0']'",
+ "\u6467>'[cui]'",
+ "\u6468>'['na\u00ed']'",
+ "\u6469>'['m\u00f3']'",
+ "\u646A>'[jiang]'",
+ "\u646B>'[gui]'",
+ "\u646C>'['y\u00ecng']'",
+ "\u646D>'['zh\u00ed']'",
+ "\u646E>'['a\u00f3']'",
+ "\u646F>'['zh\u00ec']'",
+ "\u6470>'['ni\u00e8']'",
+ "\u6471>'['m\u00e1n']'",
+ "\u6472>'['sh\u00e0n']'",
+ "\u6473>'[kou]'",
+ "\u6474>'[shu]'",
+ "\u6475>'['su\u014F']'",
+ "\u6476>'['tu\u00e1n']'",
+ "\u6477>'['jia\u014F']'",
+ "\u6478>'[mo]'",
+ "\u6479>'['m\u00f3']'",
+ "\u647A>'['zh\u00e9']'",
+ "\u647B>'[xian]'",
+ "\u647C>'[keng]'",
+ "\u647D>'['pia\u014F']'",
+ "\u647E>'['ji\u00e0ng']'",
+ "\u647F>'[yin]'",
+ "\u6480>'['go\u00f9']'",
+ "\u6481>'[qian]'",
+ "\u6482>'['l\u00fc\u00e8']'",
+ "\u6483>'['j\u00ed']'",
+ "\u6484>'[ying]'",
+ "\u6485>'[jue]'",
+ "\u6486>'[pie]'",
+ "\u6487>'['pi\u0115']'",
+ "\u6488>'[lao]'",
+ "\u6489>'[dun]'",
+ "\u648A>'['xi\u00e0n']'",
+ "\u648B>'['ru\u00e1n']'",
+ "\u648C>'['ku\u00ec']'",
+ "\u648D>'['z\u0103n']'",
+ "\u648E>'['y\u00ec']'",
+ "\u648F>'['x\u00fan']'",
+ "\u6490>'[cheng]'",
+ "\u6491>'[cheng]'",
+ "\u6492>'['s\u0103']'",
+ "\u6493>'['na\u00f3']'",
+ "\u6494>'['h\u00e8ng']'",
+ "\u6495>'[si]'",
+ "\u6496>'['qi\u0103n']'",
+ "\u6497>'['hu\u00e1ng']'",
+ "\u6498>'[da]'",
+ "\u6499>'['z\u016Dn']'",
+ "\u649A>'['ni\u0103n']'",
+ "\u649B>'['l\u012Dn']'",
+ "\u649C>'['zh\u0115ng']'",
+ "\u649D>'[hui]'",
+ "\u649E>'['zhu\u00e0ng']'",
+ "\u649F>'['jia\u014F']'",
+ "\u64A0>'['j\u012D']'",
+ "\u64A1>'[cao]'",
+ "\u64A2>'['d\u0103n']'",
+ "\u64A3>'['d\u0103n']'",
+ "\u64A4>'['ch\u00e8']'",
+ "\u64A5>'[bo]'",
+ "\u64A6>'['ch\u0115']'",
+ "\u64A7>'['ju\u00e9']'",
+ "\u64A8>'[xiao]'",
+ "\u64A9>'['lia\u00f3']'",
+ "\u64AA>'['b\u00e8n']'",
+ "\u64AB>'['f\u016D']'",
+ "\u64AC>'['qia\u00f2']'",
+ "\u64AD>'['b\u00f2']'",
+ "\u64AE>'[cuo]'",
+ "\u64AF>'['zhu\u00f3']'",
+ "\u64B0>'['zhu\u00e0n']'",
+ "\u64B1>'['tu\u014F']'",
+ "\u64B2>'[pu]'",
+ "\u64B3>'['q\u00ecn']'",
+ "\u64B4>'[dun]'",
+ "\u64B5>'['ni\u0103n']'",
+ "\u64B7>'['xi\u00e9']'",
+ "\u64B8>'['l\u016D']'",
+ "\u64B9>'['jia\u014F']'",
+ "\u64BA>'[cuan]'",
+ "\u64BB>'['t\u00e0']'",
+ "\u64BC>'['h\u00e0n']'",
+ "\u64BD>'['qia\u00f2']'",
+ "\u64BE>'[zhua]'",
+ "\u64BF>'['ji\u0103n']'",
+ "\u64C0>'['g\u0103n']'",
+ "\u64C1>'['y\u014Fng']'",
+ "\u64C2>'['le\u00ed']'",
+ "\u64C3>'['ku\u014F']'",
+ "\u64C4>'['l\u016D']'",
+ "\u64C5>'['sh\u00e0n']'",
+ "\u64C6>'['zhu\u00f3']'",
+ "\u64C7>'['z\u00e9']'",
+ "\u64C8>'[pu]'",
+ "\u64C9>'['chu\u00f2']'",
+ "\u64CA>'['j\u00ed']'",
+ "\u64CB>'['d\u0103ng']'",
+ "\u64CC>'['su\u014F']'",
+ "\u64CD>'[cao]'",
+ "\u64CE>'['q\u00edng']'",
+ "\u64CF>'['j\u00ecng']'",
+ "\u64D0>'['hu\u00e0n']'",
+ "\u64D1>'[jie]'",
+ "\u64D2>'['q\u00edn']'",
+ "\u64D3>'['kua\u012D']'",
+ "\u64D4>'[dan]'",
+ "\u64D5>'[xi]'",
+ "\u64D6>'['g\u0115']'",
+ "\u64D7>'['p\u00ec']'",
+ "\u64D8>'['b\u00f2']'",
+ "\u64D9>'['a\u00f2']'",
+ "\u64DA>'['j\u00f9']'",
+ "\u64DB>'['y\u00e8']'",
+ "\u64DE>'['so\u016D']'",
+ "\u64DF>'['m\u00ed']'",
+ "\u64E0>'['j\u012D']'",
+ "\u64E1>'['ta\u00ed']'",
+ "\u64E2>'['zhu\u00f3']'",
+ "\u64E3>'['da\u014F']'",
+ "\u64E4>'['x\u012Dng']'",
+ "\u64E5>'['l\u0103n']'",
+ "\u64E6>'[ca]'",
+ "\u64E7>'['j\u016D']'",
+ "\u64E8>'['y\u00e9']'",
+ "\u64E9>'['r\u016D']'",
+ "\u64EA>'['y\u00e8']'",
+ "\u64EB>'['y\u00e8']'",
+ "\u64EC>'['n\u012D']'",
+ "\u64ED>'['h\u00f9']'",
+ "\u64EE>'['j\u00ed']'",
+ "\u64EF>'['b\u00ecn']'",
+ "\u64F0>'['n\u00edng']'",
+ "\u64F1>'[ge]'",