ICU-6098 Tag ICU4J 3.9.2 (4.0M2)
X-SVN-Rev: 23616
diff --git a/.classpath b/.classpath
new file mode 100644
index 0000000..6a5636c
--- /dev/null
+++ b/.classpath
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry excluding="**/.svn/**|com/ibm/icu/dev/eclipse/|com/ibm/icu/dev/tool/docs/" kind="src" path="src"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+ <classpathentry kind="output" path="classes"/>
+</classpath>
diff --git a/.externalToolBuilders/icu4j_Resources.launch b/.externalToolBuilders/icu4j_Resources.launch
new file mode 100644
index 0000000..4e3cd7a
--- /dev/null
+++ b/.externalToolBuilders/icu4j_Resources.launch
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<launchConfiguration type="org.eclipse.ant.AntBuilderLaunchConfigurationType">
+<stringAttribute key="org.eclipse.ant.ui.ATTR_ANT_AFTER_CLEAN_TARGETS" value="resources,"/>
+<stringAttribute key="org.eclipse.ant.ui.ATTR_ANT_MANUAL_TARGETS" value="resources,"/>
+<booleanAttribute key="org.eclipse.ant.ui.ATTR_TARGETS_UPDATED" value="true"/>
+<booleanAttribute key="org.eclipse.ant.ui.DEFAULT_VM_INSTALL" value="false"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/icu4j/build.xml"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<booleanAttribute key="org.eclipse.debug.ui.ATTR_LAUNCH_IN_BACKGROUND" value="false"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.ant.ui.AntClasspathProvider"/>
+<booleanAttribute key="org.eclipse.jdt.launching.DEFAULT_CLASSPATH" value="true"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="icu4j"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/icu4j/build.xml}"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_RUN_BUILD_KINDS" value="full,incremental,"/>
+<booleanAttribute key="org.eclipse.ui.externaltools.ATTR_TRIGGERS_CONFIGURED" value="true"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY" value="${workspace_loc:/icu4j}"/>
+</launchConfiguration>
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..b8c56f3
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,307 @@
+* text=auto !eol
+
+*.c text !eol
+*.cc text !eol
+*.classpath text !eol
+*.cpp text !eol
+*.css text !eol
+*.dsp text !eol
+*.dsw text !eol
+*.filters text !eol
+*.h text !eol
+*.htm text !eol
+*.html text !eol
+*.in text !eol
+*.java text !eol
+*.launch text !eol
+*.mak text !eol
+*.md text !eol
+*.MF text !eol
+*.mk text !eol
+*.pl text !eol
+*.pm text !eol
+*.project text !eol
+*.properties text !eol
+*.py text !eol
+*.rc text !eol
+*.sh text eol=lf
+*.sln text !eol
+*.stub text !eol
+*.txt text !eol
+*.ucm text !eol
+*.vcproj text !eol
+*.vcxproj text !eol
+*.xml text !eol
+*.xsl text !eol
+*.xslt text !eol
+Makefile text !eol
+configure text !eol
+LICENSE text !eol
+README text !eol
+
+*.bin -text
+*.brk -text
+*.cnv -text
+*.icu -text
+*.res -text
+*.nrm -text
+*.spp -text
+*.tri2 -text
+
+/.classpath -text
+/.project -text
+/eclipseFragment.txt -text
+eclipseProjectMisc/initSrc.launch -text
+eclipseProjectMisc/normSrc.launch -text
+/ee.foundation.jar -text
+/preprocessor.txt -text
+src/com/ibm/icu/ICUConfig.properties -text
+src/com/ibm/icu/charset/CharsetISO2022.java -text
+src/com/ibm/icu/dev/data/rbbi/english.dict -text
+src/com/ibm/icu/dev/data/testdata.jar -text
+src/com/ibm/icu/dev/data/thai6.ucs -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.base/.project -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.base/build.properties -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.base/feature.xml -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.base/sourceTemplatePlugin/build.properties -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.base/sourceTemplatePlugin/plugin.properties -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.jse4/.project -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.jse4/build.properties -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.jse4/feature.xml -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.jse4/sourceTemplatePlugin/build.properties -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu.jse4/sourceTemplatePlugin/plugin.properties -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu/.project -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu/build.properties -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu/feature.xml -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu/sourceTemplatePlugin/build.properties -text
+src/com/ibm/icu/dev/eclipse/features/com.ibm.icu/sourceTemplatePlugin/plugin.properties -text
+src/com/ibm/icu/dev/eclipse/pdebuild/allElements.xml -text
+src/com/ibm/icu/dev/eclipse/pdebuild/build.properties -text
+src/com/ibm/icu/dev/eclipse/pdebuild/customTargets.xml -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/.classpath -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/.project -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.core.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/.settings/org.eclipse.jdt.ui.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/META-INF/MANIFEST.MF -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/build.properties -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/BreakIteratorTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CalendarTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CollationKeyTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/CollatorTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DateFormatSymbolsTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DateFormatTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DecimalFormatSymbolsTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/DecimalFormatTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ICUTestCase.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/MessageFormatTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/NumberFormatTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/SimpleDateFormatTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/TimeZoneTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base.tests/src/com/ibm/icu/tests/ULocaleTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/.classpath -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/.project -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/.settings/org.eclipse.jdt.core.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/.settings/org.eclipse.jdt.ui.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/META-INF/MANIFEST.MF -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/build.properties -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/plugin.properties -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/BreakIterator.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/CollationKey.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/Collator.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/DateFormat.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/DateFormatSymbols.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/DecimalFormat.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/DecimalFormatSymbols.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/MessageFormat.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/NumberFormat.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/text/SimpleDateFormat.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/util/Calendar.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/util/TimeZone.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.base/src/com/ibm/icu/util/ULocale.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.jse4/.classpath -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.jse4/.project -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.jse4/.settings/org.eclipse.jdt.core.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.jse4/.settings/org.eclipse.jdt.ui.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.jse4/META-INF/MANIFEST.MF -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.jse4/build.properties -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.jse4/fragment-icu-jse4.properties -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.jse4/plugin.properties -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.tests/.classpath -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.tests/.project -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.tests/.settings/org.eclipse.jdt.core.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.tests/.settings/org.eclipse.jdt.ui.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.tests/META-INF/MANIFEST.MF -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.tests/build.properties -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu.tests/src/com/ibm/icu/tests/UnitTest.java -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu/.classpath -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu/.project -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu/.settings/org.eclipse.jdt.core.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu/.settings/org.eclipse.jdt.ui.prefs -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu/META-INF/MANIFEST.MF -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu/build.properties -text
+src/com/ibm/icu/dev/eclipse/plugins/com.ibm.icu/plugin.properties -text
+src/com/ibm/icu/dev/test/calendar/DataDrivenCalendarTest.java -text
+src/com/ibm/icu/dev/test/calendar/IndianTest.java -text
+src/com/ibm/icu/dev/test/duration/DataReadWriteTest.java -text
+src/com/ibm/icu/dev/test/duration/LanguageTestRoot.java -text
+src/com/ibm/icu/dev/test/duration/PeriodBuilderFactoryTest.java -text
+src/com/ibm/icu/dev/test/duration/PeriodBuilderTest.java -text
+src/com/ibm/icu/dev/test/duration/PeriodTest.java -text
+src/com/ibm/icu/dev/test/duration/ResourceBasedPeriodFormatterDataServiceTest.java -text
+src/com/ibm/icu/dev/test/duration/TestAll.java -text
+src/com/ibm/icu/dev/test/duration/languages/TestAll.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_ar_EG.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_en.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_es.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_fr.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_he_IL.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_hi.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_it.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_ja.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_ko.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_ru.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_zh_Hans.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_zh_Hans_SG.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_zh_Hant.java -text
+src/com/ibm/icu/dev/test/duration/languages/Test_zh_Hant_HK.java -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_ar_EG.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_en.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_es.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_fr.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_he_IL.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_hi.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_it.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_ja.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_ko.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_ru.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_th.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_zh_Hans.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_zh_Hans_SG.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_zh_Hant.txt -text
+src/com/ibm/icu/dev/test/duration/testdata/testdata_zh_Hant_HK.txt -text
+src/com/ibm/icu/dev/test/format/DataDrivenFormatTest.java -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Asian.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Chinese.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Japanese.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Japanese_h.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Japanese_k.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Korean.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Latin.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Russian.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_SerbianSH.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_SerbianSR.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Simplified_Chinese.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Thai.txt -text
+src/com/ibm/icu/dev/test/rbbi/rbbitst.txt -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.OlsonTimeZone.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.TimeZoneAdapter.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.BigDecimal.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.MathContext.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ArabicShapingException.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormatSymbols.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormatSymbols.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.MessageFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.NumberFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.RuleBasedNumberFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.SimpleDateFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.StringPrepParseException.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.BuddhistCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Calendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ChineseCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.CopticCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Currency.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.EthiopicCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.GregorianCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.HebrewCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.IslamicCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.JapaneseCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.SimpleTimeZone.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.TimeZone.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ULocale.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.UResourceTypeMismatchException.dat -text
+src/com/ibm/icu/dev/test/util/CalendarFieldsSet.java -text
+src/com/ibm/icu/dev/test/util/DateTimeStyleSet.java -text
+src/com/ibm/icu/dev/test/util/DebugUtilities.java -text
+src/com/ibm/icu/dev/test/util/DebugUtilitiesData.java -text
+src/com/ibm/icu/dev/test/util/DebugUtilitiesTest.java -text
+src/com/ibm/icu/dev/test/util/FieldsSet.java -text
+src/com/ibm/icu/dev/tool/docs/icu4j28.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j30.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j32.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j34.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j341.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j342.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j343.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j36.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j38.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j381.api.gz -text
+src/com/ibm/icu/dev/tool/tzu/icu.gif -text
+src/com/ibm/icu/dev/tool/tzu/runicutzu.bat -text
+src/com/ibm/icu/dev/tool/tzu/runicutzu.cmd -text
+src/com/ibm/icu/dev/tool/tzu/runicutzu.sh -text
+src/com/ibm/icu/dev/tool/tzu/runicutzuenv.bat -text
+src/com/ibm/icu/dev/tool/tzu/runicutzuenv.cmd -text
+src/com/ibm/icu/dev/tool/tzu/runicutzuenv.sh -text
+src/com/ibm/icu/impl/data/icudata.jar -text
+src/com/ibm/icu/impl/duration/BasicDurationFormatter.java -text
+src/com/ibm/icu/impl/duration/BasicDurationFormatterFactory.java -text
+src/com/ibm/icu/impl/duration/BasicPeriodBuilderFactory.java -text
+src/com/ibm/icu/impl/duration/BasicPeriodFormatter.java -text
+src/com/ibm/icu/impl/duration/BasicPeriodFormatterFactory.java -text
+src/com/ibm/icu/impl/duration/BasicPeriodFormatterService.java -text
+src/com/ibm/icu/impl/duration/DateFormatter.java -text
+src/com/ibm/icu/impl/duration/DurationFormatter.java -text
+src/com/ibm/icu/impl/duration/DurationFormatterFactory.java -text
+src/com/ibm/icu/impl/duration/Period.java -text
+src/com/ibm/icu/impl/duration/PeriodBuilder.java -text
+src/com/ibm/icu/impl/duration/PeriodBuilderFactory.java -text
+src/com/ibm/icu/impl/duration/PeriodFormatter.java -text
+src/com/ibm/icu/impl/duration/PeriodFormatterFactory.java -text
+src/com/ibm/icu/impl/duration/PeriodFormatterService.java -text
+src/com/ibm/icu/impl/duration/TimeUnit.java -text
+src/com/ibm/icu/impl/duration/TimeUnitConstants.java -text
+src/com/ibm/icu/impl/duration/impl/DataRecord.java -text
+src/com/ibm/icu/impl/duration/impl/PeriodFormatterData.java -text
+src/com/ibm/icu/impl/duration/impl/PeriodFormatterDataService.java -text
+src/com/ibm/icu/impl/duration/impl/RecordReader.java -text
+src/com/ibm/icu/impl/duration/impl/RecordWriter.java -text
+src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java -text
+src/com/ibm/icu/impl/duration/impl/Utils.java -text
+src/com/ibm/icu/impl/duration/impl/XMLRecordReader.java -text
+src/com/ibm/icu/impl/duration/impl/XMLRecordWriter.java -text
+src/com/ibm/icu/impl/duration/impl/YMDDateFormatter.java -text
+src/com/ibm/icu/impl/duration/impl/data/index.txt -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_ar_EG.xml.escaped -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_en.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_es.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_fr.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_he_IL.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_hi.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_it.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_ja.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_ko.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_ru.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_th.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hans_SG.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant.xml -text
+src/com/ibm/icu/impl/duration/impl/data/pfd_zh_Hant_HK.xml -text
+src/com/ibm/icu/text/BreakCTDictionary.java -text
+src/com/ibm/icu/text/DurationFormat.java -text
+src/com/ibm/icu/text/ThaiBreakIterator.java -text
+src/com/ibm/richtext/textapps/resources/unicode.arabic.red -text
+src/com/ibm/richtext/textapps/resources/unicode.hebrew.red -text
+
+# The following file types are stored in Git-LFS.
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.dat filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b8d1806
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+/*.jar
+/.clover
+/classes
+/classes.wrapper
+/doc
diff --git a/.project b/.project
new file mode 100644
index 0000000..1a303d7
--- /dev/null
+++ b/.project
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>icu4j</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.eclipse.ui.externaltools.ExternalToolBuilder</name>
+ <triggers>full,incremental,</triggers>
+ <arguments>
+ <dictionary>
+ <key>LaunchConfigHandle</key>
+ <value><project>/.externalToolBuilders/icu4j_Resources.launch</value>
+ </dictionary>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 0000000..c961391
--- /dev/null
+++ b/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,4 @@
+#Mon Dec 17 14:42:45 PST 2007
+eclipse.preferences.version=1
+encoding//src/com/ibm/icu/dev/test/format/NumberFormatTestCases.txt=UTF-8
+encoding/<project>=US-ASCII
diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..d3b3ff1
--- /dev/null
+++ b/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,317 @@
+#Wed Aug 22 13:05:31 EDT 2007
+eclipse.preferences.version=1
+instance/org.eclipse.core.net/org.eclipse.core.net.hasMigrated=true
+org.eclipse.jdt.core.codeComplete.argumentPrefixes=
+org.eclipse.jdt.core.codeComplete.argumentSuffixes=
+org.eclipse.jdt.core.codeComplete.fieldPrefixes=
+org.eclipse.jdt.core.codeComplete.fieldSuffixes=
+org.eclipse.jdt.core.codeComplete.localPrefixes=
+org.eclipse.jdt.core.codeComplete.localSuffixes=
+org.eclipse.jdt.core.codeComplete.staticFieldPrefixes=
+org.eclipse.jdt.core.codeComplete.staticFieldSuffixes=
+org.eclipse.jdt.core.compiler.problem.annotationSuperInterface=warning
+org.eclipse.jdt.core.compiler.problem.autoboxing=ignore
+org.eclipse.jdt.core.compiler.problem.deprecation=warning
+org.eclipse.jdt.core.compiler.problem.deprecationInDeprecatedCode=disabled
+org.eclipse.jdt.core.compiler.problem.deprecationWhenOverridingDeprecatedMethod=disabled
+org.eclipse.jdt.core.compiler.problem.discouragedReference=warning
+org.eclipse.jdt.core.compiler.problem.emptyStatement=warning
+org.eclipse.jdt.core.compiler.problem.fallthroughCase=ignore
+org.eclipse.jdt.core.compiler.problem.fieldHiding=ignore
+org.eclipse.jdt.core.compiler.problem.finalParameterBound=warning
+org.eclipse.jdt.core.compiler.problem.finallyBlockNotCompletingNormally=warning
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=error
+org.eclipse.jdt.core.compiler.problem.hiddenCatchBlock=warning
+org.eclipse.jdt.core.compiler.problem.incompatibleNonInheritedInterfaceMethod=warning
+org.eclipse.jdt.core.compiler.problem.incompleteEnumSwitch=ignore
+org.eclipse.jdt.core.compiler.problem.indirectStaticAccess=ignore
+org.eclipse.jdt.core.compiler.problem.localVariableHiding=warning
+org.eclipse.jdt.core.compiler.problem.methodWithConstructorName=error
+org.eclipse.jdt.core.compiler.problem.missingDeprecatedAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingOverrideAnnotation=ignore
+org.eclipse.jdt.core.compiler.problem.missingSerialVersion=warning
+org.eclipse.jdt.core.compiler.problem.noEffectAssignment=warning
+org.eclipse.jdt.core.compiler.problem.noImplicitStringConversion=warning
+org.eclipse.jdt.core.compiler.problem.nonExternalizedStringLiteral=ignore
+org.eclipse.jdt.core.compiler.problem.nullReference=ignore
+org.eclipse.jdt.core.compiler.problem.overridingPackageDefaultMethod=warning
+org.eclipse.jdt.core.compiler.problem.parameterAssignment=ignore
+org.eclipse.jdt.core.compiler.problem.possibleAccidentalBooleanAssignment=warning
+org.eclipse.jdt.core.compiler.problem.rawTypeReference=ignore
+org.eclipse.jdt.core.compiler.problem.specialParameterHidingField=disabled
+org.eclipse.jdt.core.compiler.problem.staticAccessReceiver=warning
+org.eclipse.jdt.core.compiler.problem.suppressWarnings=enabled
+org.eclipse.jdt.core.compiler.problem.syntheticAccessEmulation=ignore
+org.eclipse.jdt.core.compiler.problem.typeParameterHiding=warning
+org.eclipse.jdt.core.compiler.problem.uncheckedTypeOperation=ignore
+org.eclipse.jdt.core.compiler.problem.undocumentedEmptyBlock=ignore
+org.eclipse.jdt.core.compiler.problem.unhandledWarningToken=warning
+org.eclipse.jdt.core.compiler.problem.unnecessaryElse=ignore
+org.eclipse.jdt.core.compiler.problem.unnecessaryTypeCheck=ignore
+org.eclipse.jdt.core.compiler.problem.unqualifiedFieldAccess=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownException=ignore
+org.eclipse.jdt.core.compiler.problem.unusedDeclaredThrownExceptionWhenOverriding=disabled
+org.eclipse.jdt.core.compiler.problem.unusedImport=warning
+org.eclipse.jdt.core.compiler.problem.unusedLabel=warning
+org.eclipse.jdt.core.compiler.problem.unusedLocal=warning
+org.eclipse.jdt.core.compiler.problem.unusedParameter=ignore
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenImplementingAbstract=disabled
+org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disabled
+org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
+org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
+org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_assignment=0
+org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16
+org.eclipse.jdt.core.formatter.alignment_for_compact_if=16
+org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80
+org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0
+org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16
+org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16
+org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16
+org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16
+org.eclipse.jdt.core.formatter.blank_lines_after_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_after_package=1
+org.eclipse.jdt.core.formatter.blank_lines_before_field=1
+org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0
+org.eclipse.jdt.core.formatter.blank_lines_before_imports=1
+org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1
+org.eclipse.jdt.core.formatter.blank_lines_before_method=1
+org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1
+org.eclipse.jdt.core.formatter.blank_lines_before_package=0
+org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1
+org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1
+org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line
+org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines=false
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false
+org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false
+org.eclipse.jdt.core.formatter.comment.format_block_comments=true
+org.eclipse.jdt.core.formatter.comment.format_comments=true
+org.eclipse.jdt.core.formatter.comment.format_header=false
+org.eclipse.jdt.core.formatter.comment.format_html=true
+org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true
+org.eclipse.jdt.core.formatter.comment.format_line_comments=true
+org.eclipse.jdt.core.formatter.comment.format_source_code=true
+org.eclipse.jdt.core.formatter.comment.indent_parameter_description=true
+org.eclipse.jdt.core.formatter.comment.indent_root_tags=true
+org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert
+org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=insert
+org.eclipse.jdt.core.formatter.comment.line_length=80
+org.eclipse.jdt.core.formatter.compact_else_if=true
+org.eclipse.jdt.core.formatter.continuation_indentation=2
+org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2
+org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true
+org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true
+org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_empty_lines=false
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true
+org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true
+org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false
+org.eclipse.jdt.core.formatter.indentation.size=4
+org.eclipse.jdt.core.formatter.insert_new_line_after_annotation=insert
+org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert
+org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert
+org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert
+org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert
+org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert
+org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert
+org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert
+org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert
+org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert
+org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert
+org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert
+org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false
+org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false
+org.eclipse.jdt.core.formatter.lineSplit=80
+org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false
+org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0
+org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1
+org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true
+org.eclipse.jdt.core.formatter.tabulation.char=space
+org.eclipse.jdt.core.formatter.tabulation.size=4
+org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false
+org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true
diff --git a/.settings/org.eclipse.jdt.ui.prefs b/.settings/org.eclipse.jdt.ui.prefs
new file mode 100644
index 0000000..efab0a0
--- /dev/null
+++ b/.settings/org.eclipse.jdt.ui.prefs
@@ -0,0 +1,12 @@
+#Wed Aug 22 13:05:31 EDT 2007
+eclipse.preferences.version=1
+formatter_profile=_ICU4J standard
+formatter_settings_version=11
+instance/org.eclipse.core.net/org.eclipse.core.net.hasMigrated=true
+internal.default.compliance=user
+org.eclipse.jdt.ui.exception.name=e
+org.eclipse.jdt.ui.gettersetter.use.is=true
+org.eclipse.jdt.ui.javadoc=true
+org.eclipse.jdt.ui.keywordthis=false
+org.eclipse.jdt.ui.overrideannotation=false
+org.eclipse.jdt.ui.text.custom_code_templates=<?xml version\="1.0" encoding\="UTF-8" standalone\="no"?><templates><template autoinsert\="true" context\="gettercomment_context" deleted\="false" description\="Comment for getter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.gettercomment" name\="gettercomment">/**\r\n * @return the ${bare_field_name}\r\n */</template><template autoinsert\="true" context\="settercomment_context" deleted\="false" description\="Comment for setter method" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.settercomment" name\="settercomment">/**\r\n * @param ${param} the ${bare_field_name} to set\r\n */</template><template autoinsert\="true" context\="constructorcomment_context" deleted\="false" description\="Comment for created constructors" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorcomment" name\="constructorcomment">/**\r\n * ${tags}\r\n */</template><template autoinsert\="false" context\="filecomment_context" deleted\="false" description\="Comment for created Java files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.filecomment" name\="filecomment">/*\r\n *******************************************************************************\r\n * Copyright (C) ${year}, International Business Machines Corporation and *\r\n * others. All Rights Reserved. *\r\n *******************************************************************************\r\n */</template><template autoinsert\="true" context\="typecomment_context" deleted\="false" description\="Comment for created types" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.typecomment" name\="typecomment">/**\r\n * @author ${user}\r\n *\r\n * ${tags}\r\n */</template><template autoinsert\="true" context\="fieldcomment_context" deleted\="false" description\="Comment for fields" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.fieldcomment" name\="fieldcomment">/**\r\n * \r\n */</template><template autoinsert\="true" context\="methodcomment_context" deleted\="false" description\="Comment for non-overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodcomment" name\="methodcomment">/**\r\n * ${tags}\r\n */</template><template autoinsert\="true" context\="overridecomment_context" deleted\="false" description\="Comment for overriding methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.overridecomment" name\="overridecomment">/* (non-Javadoc)\r\n * ${see_to_overridden}\r\n */</template><template autoinsert\="true" context\="delegatecomment_context" deleted\="false" description\="Comment for delegate methods" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.delegatecomment" name\="delegatecomment">/**\r\n * ${tags}\r\n * ${see_to_target}\r\n */</template><template autoinsert\="true" context\="newtype_context" deleted\="false" description\="Newly created files" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.newtype" name\="newtype">${filecomment}\r\n${package_declaration}\r\n\r\n${typecomment}\r\n${type_declaration}</template><template autoinsert\="true" context\="classbody_context" deleted\="false" description\="Code in new class type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.classbody" name\="classbody">\r\n</template><template autoinsert\="true" context\="interfacebody_context" deleted\="false" description\="Code in new interface type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.interfacebody" name\="interfacebody">\r\n</template><template autoinsert\="true" context\="enumbody_context" deleted\="false" description\="Code in new enum type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.enumbody" name\="enumbody">\r\n</template><template autoinsert\="true" context\="annotationbody_context" deleted\="false" description\="Code in new annotation type bodies" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.annotationbody" name\="annotationbody">\r\n</template><template autoinsert\="true" context\="catchblock_context" deleted\="false" description\="Code in new catch blocks" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.catchblock" name\="catchblock">// ${todo} Auto-generated catch block\r\n${exception_var}.printStackTrace();</template><template autoinsert\="true" context\="methodbody_context" deleted\="false" description\="Code in created method stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.methodbody" name\="methodbody">// ${todo} Auto-generated method stub\r\n${body_statement}</template><template autoinsert\="true" context\="constructorbody_context" deleted\="false" description\="Code in created constructor stubs" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.constructorbody" name\="constructorbody">${body_statement}\r\n// ${todo} Auto-generated constructor stub</template><template autoinsert\="true" context\="getterbody_context" deleted\="false" description\="Code in created getters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.getterbody" name\="getterbody">return ${field};</template><template autoinsert\="true" context\="setterbody_context" deleted\="false" description\="Code in created setters" enabled\="true" id\="org.eclipse.jdt.ui.text.codetemplates.setterbody" name\="setterbody">${field} \= ${param};</template></templates>
diff --git a/APIChangeReport.html b/APIChangeReport.html
new file mode 100644
index 0000000..bb07d1e
--- /dev/null
+++ b/APIChangeReport.html
@@ -0,0 +1,620 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>ICU4J API Comparison: ICU4J 3.6 with ICU4J 3.8</title>
+<!-- Copyright 2007, IBM, All Rights Reserved. -->
+</head>
+<body>
+<h1>ICU4J API Comparison: ICU4J 3.6 with ICU4J 3.8</h1>
+
+<hr/>
+<h2>Removed from ICU4J 3.6</h2>
+
+<h3>Package com.ibm.icu.text</h3>
+<ul>
+DateTimePatternGenerator
+<ul>
+<li>(draft) public DateTimePatternGenerator <i>add</i>(java.lang.String, boolean, DateTimePatternGenerator.PatternInfo)</li>
+<li>(draft) public java.lang.String <i>getAppendItemFormats</i>(int)</li>
+<li>(draft) public java.lang.String <i>getAppendItemNames</i>(int)</li>
+<li>(draft) public static DateTimePatternGenerator <i>newInstance</i>()</li>
+<li>(draft) public void <i>setAppendItemFormats</i>(int, java.lang.String)</li>
+<li>(draft) public void <i>setAppendItemNames</i>(int, java.lang.String)</li>
+</ul>
+RuleBasedTransliterator
+<ul>
+<li><span style='color:red'>*internal* </span>public <i>RuleBasedTransliterator</i>(java.lang.String, java.lang.String)</li>
+<li><span style='color:red'>*internal* </span>public <i>RuleBasedTransliterator</i>(java.lang.String, java.lang.String, int, UnicodeFilter)</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.util</h3>
+<ul>
+SimpleTimeZone
+<ul>
+<li><span style='color:red'>*internal* </span>public <i>SimpleTimeZone</i>(java.util.SimpleTimeZone, java.lang.String)</li>
+</ul>
+TimeZone
+<ul>
+<li><span style='color:red'>*internal* </span>protected static final int MILLIS_PER_DAY</li>
+<li><span style='color:red'>*internal* </span>protected static final int MILLIS_PER_HOUR</li>
+</ul>
+</ul>
+
+
+<hr/>
+<h2>Withdrawn, Deprecated, or Obsoleted in ICU4J 3.8</h2>
+<p>(no API obsoleted)</p>
+
+<hr/>
+<h2>Changed in ICU4J 3.8 (old, new)</h2>
+
+<h3>Package com.ibm.icu.text</h3>
+<ul>
+<li> (stable) public abstract class <i>Transliterator</i> extends java.lang.Object</br>
+(stable) public abstract class <i>Transliterator</i> extends java.lang.Object implements com.ibm.icu.text.StringTransform</li>
+</ul>
+
+<h3>Package com.ibm.icu.util</h3>
+<ul>
+<li> (stable) public class <i>SimpleTimeZone</i> extends com.ibm.icu.impl.JDKTimeZone</br>
+(stable) public class <i>SimpleTimeZone</i> extends com.ibm.icu.util.BasicTimeZone</li>
+</ul>
+
+
+<hr/>
+<h2>Promoted to stable in ICU4J 3.8</h2>
+
+<h3>Package com.ibm.icu.lang</h3>
+<ul>
+<li>(stable) public static interface <i>UCharacter.GraphemeClusterBreak</i></li>
+<li>(stable) public static interface <i>UCharacter.SentenceBreak</i></li>
+<li>(stable) public static interface <i>UCharacter.WordBreak</i></li>
+UCharacter
+<ul>
+<li>(stable) public static final int MAX_RADIX</li>
+<li>(stable) public static final int MIN_RADIX</li>
+<li>(draft) public static java.lang.String <i>getName</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public static boolean <i>isJavaIdentifierPart</i>(int)</li>
+<li>(stable) public static boolean <i>isJavaIdentifierStart</i>(int)</li>
+<li>(stable) public static java.lang.String <i>toLowerCase</i>(ULocale, java.lang.String)</li>
+<li>(stable) public static java.lang.String <i>toTitleCase</i>(ULocale, java.lang.String, BreakIterator)</li>
+<li>(stable) public static java.lang.String <i>toUpperCase</i>(ULocale, java.lang.String)</li>
+</ul>
+UCharacter.LineBreak
+<ul>
+<li>(stable) public static final int H2</li>
+<li>(stable) public static final int H3</li>
+<li>(stable) public static final int JL</li>
+<li>(stable) public static final int JT</li>
+<li>(stable) public static final int JV</li>
+</ul>
+UCharacter.UnicodeBlock
+<ul>
+<li>(stable) public static final UCharacter.UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION</li>
+<li>(stable) public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock ANCIENT_GREEK_NUMBERS</li>
+<li>(stable) public static final int ANCIENT_GREEK_NUMBERS_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock ARABIC_SUPPLEMENT</li>
+<li>(stable) public static final int ARABIC_SUPPLEMENT_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock BUGINESE</li>
+<li>(stable) public static final int BUGINESE_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock CJK_STROKES</li>
+<li>(stable) public static final int CJK_STROKES_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT</li>
+<li>(stable) public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock COPTIC</li>
+<li>(stable) public static final int COPTIC_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock ETHIOPIC_EXTENDED</li>
+<li>(stable) public static final int ETHIOPIC_EXTENDED_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock ETHIOPIC_SUPPLEMENT</li>
+<li>(stable) public static final int ETHIOPIC_SUPPLEMENT_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock GEORGIAN_SUPPLEMENT</li>
+<li>(stable) public static final int GEORGIAN_SUPPLEMENT_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock GLAGOLITIC</li>
+<li>(stable) public static final int GLAGOLITIC_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock KHAROSHTHI</li>
+<li>(stable) public static final int KHAROSHTHI_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock MODIFIER_TONE_LETTERS</li>
+<li>(stable) public static final int MODIFIER_TONE_LETTERS_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock NEW_TAI_LUE</li>
+<li>(stable) public static final int NEW_TAI_LUE_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock OLD_PERSIAN</li>
+<li>(stable) public static final int OLD_PERSIAN_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT</li>
+<li>(stable) public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock SUPPLEMENTAL_PUNCTUATION</li>
+<li>(stable) public static final int SUPPLEMENTAL_PUNCTUATION_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock SYLOTI_NAGRI</li>
+<li>(stable) public static final int SYLOTI_NAGRI_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock TIFINAGH</li>
+<li>(stable) public static final int TIFINAGH_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock VERTICAL_FORMS</li>
+<li>(stable) public static final int VERTICAL_FORMS_ID</li>
+</ul>
+UProperty
+<ul>
+<li>(stable) public static final int GRAPHEME_CLUSTER_BREAK</li>
+<li>(stable) public static final int PATTERN_SYNTAX</li>
+<li>(stable) public static final int PATTERN_WHITE_SPACE</li>
+<li>(stable) public static final int POSIX_ALNUM</li>
+<li>(stable) public static final int POSIX_BLANK</li>
+<li>(stable) public static final int POSIX_GRAPH</li>
+<li>(stable) public static final int POSIX_PRINT</li>
+<li>(stable) public static final int POSIX_XDIGIT</li>
+<li>(stable) public static final int SENTENCE_BREAK</li>
+<li>(stable) public static final int WORD_BREAK</li>
+</ul>
+UScript
+<ul>
+<li>(stable) public static final int BUGINESE</li>
+<li>(stable) public static final int GLAGOLITIC</li>
+<li>(stable) public static final int KHAROSHTHI</li>
+<li>(stable) public static final int NEW_TAI_LUE</li>
+<li>(stable) public static final int OLD_PERSIAN</li>
+<li>(stable) public static final int SYLOTI_NAGRI</li>
+<li>(stable) public static final int TIFINAGH</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.text</h3>
+<ul>
+<li>(stable) public class <i>CharsetDetector</i></li>
+<li>(stable) public class <i>CharsetMatch</i></li>
+<li>(draft) public static abstract class <i>UnicodeSet.XSymbolTable</i></li>
+ArabicShapingException
+<ul>
+<li>(draft) public <i>ArabicShapingException</i>(java.lang.String)</li>
+</ul>
+BreakIterator
+<ul>
+<li>(stable) public static BreakIterator <i>getCharacterInstance</i>(ULocale)</li>
+<li>(stable) public static BreakIterator <i>getLineInstance</i>(ULocale)</li>
+<li>(stable) public static BreakIterator <i>getSentenceInstance</i>(ULocale)</li>
+<li>(stable) public static BreakIterator <i>getTitleInstance</i>(ULocale)</li>
+<li>(stable) public static BreakIterator <i>getWordInstance</i>(ULocale)</li>
+<li>(stable) public static java.lang.Object <i>registerInstance</i>(BreakIterator, ULocale, int)</li>
+</ul>
+ChineseDateFormat
+<ul>
+<li>(stable) public <i>ChineseDateFormat</i>(java.lang.String, ULocale)</li>
+</ul>
+ChineseDateFormatSymbols
+<ul>
+<li>(stable) public <i>ChineseDateFormatSymbols</i>(Calendar, ULocale)</li>
+<li>(stable) public <i>ChineseDateFormatSymbols</i>(ULocale)</li>
+</ul>
+Collator
+<ul>
+<li>(stable) public static final int FULL_DECOMPOSITION</li>
+<li>(stable) public static java.lang.String <i>getDisplayName</i>(ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayName</i>(ULocale, ULocale)</li>
+<li>(stable) public static final java.lang.Object <i>registerInstance</i>(Collator, ULocale)</li>
+</ul>
+Collator.CollatorFactory
+<ul>
+<li>(stable) public Collator <i>createCollator</i>(ULocale)</li>
+<li>(stable) public java.lang.String <i>getDisplayName</i>(ULocale, ULocale)</li>
+</ul>
+DateFormat
+<ul>
+<li>(stable) public static final int STANDALONE_DAY_FIELD</li>
+<li>(stable) public static final int STANDALONE_MONTH_FIELD</li>
+<li>(stable) public static final int TIMEZONE_GENERIC_FIELD</li>
+<li>(stable) public static final DateFormat <i>getDateInstance</i>(Calendar, int, ULocale)</li>
+<li>(stable) public static final DateFormat <i>getDateInstance</i>(int, ULocale)</li>
+<li>(stable) public static final DateFormat <i>getDateTimeInstance</i>(Calendar, int, int, ULocale)</li>
+<li>(stable) public static final DateFormat <i>getTimeInstance</i>(Calendar, int, ULocale)</li>
+<li>(stable) public static final DateFormat <i>getTimeInstance</i>(int, ULocale)</li>
+</ul>
+DateFormatSymbols
+<ul>
+<li>(stable) public <i>DateFormatSymbols</i>(Calendar, ULocale)</li>
+<li>(stable) public <i>DateFormatSymbols</i>(ULocale)</li>
+<li>(stable) public <i>DateFormatSymbols</i>(java.lang.Class, ULocale)</li>
+<li>(stable) public <i>DateFormatSymbols</i>(java.util.ResourceBundle, ULocale)</li>
+<li>(stable) public static java.util.ResourceBundle <i>getDateFormatBundle</i>(Calendar, ULocale)</li>
+<li>(stable) public static java.util.ResourceBundle <i>getDateFormatBundle</i>(java.lang.Class, ULocale)</li>
+<li>(stable) public java.lang.String[] <i>getEraNames</i>()</li>
+<li>(stable) public java.lang.String[] <i>getMonths</i>(int, int)</li>
+<li>(stable) public java.lang.String[] <i>getWeekdays</i>(int, int)</li>
+<li>(stable) public void <i>setEraNames</i>(java.lang.String[])</li>
+<li>(stable) public void <i>setMonths</i>(java.lang.String[], int, int)</li>
+<li>(draft) public void <i>setQuarters</i>(java.lang.String[], int, int)</li>
+<li>(stable) public void <i>setWeekdays</i>(java.lang.String[], int, int)</li>
+</ul>
+DecimalFormat
+<ul>
+<li>(stable) public void <i>setRoundingIncrement</i>(BigDecimal)</li>
+</ul>
+DecimalFormatSymbols
+<ul>
+<li>(stable) public <i>DecimalFormatSymbols</i>(ULocale)</li>
+<li>(stable) public Currency <i>getCurrency</i>()</li>
+<li>(stable) public ULocale <i>getULocale</i>()</li>
+<li>(stable) public void <i>setCurrency</i>(Currency)</li>
+</ul>
+MessageFormat
+<ul>
+<li>(stable) public static java.lang.String <i>autoQuoteApostrophe</i>(java.lang.String)</li>
+</ul>
+Normalizer
+<ul>
+<li>(draft) public static int <i>getFC_NFKC_Closure</i>(int, char[])</li>
+<li>(draft) public static java.lang.String <i>getFC_NFKC_Closure</i>(int)</li>
+</ul>
+NumberFormat
+<ul>
+<li>(stable) public static NumberFormat <i>getCurrencyInstance</i>(ULocale)</li>
+<li>(stable) public static NumberFormat <i>getInstance</i>(ULocale)</li>
+<li>(stable) public static NumberFormat <i>getIntegerInstance</i>(ULocale)</li>
+<li>(stable) public static NumberFormat <i>getNumberInstance</i>(ULocale)</li>
+<li>(stable) protected static java.lang.String <i>getPattern</i>(ULocale, int)</li>
+<li>(stable) public static NumberFormat <i>getPercentInstance</i>(ULocale)</li>
+<li>(stable) public static NumberFormat <i>getScientificInstance</i>(ULocale)</li>
+</ul>
+NumberFormat.NumberFormatFactory
+<ul>
+<li>(stable) public NumberFormat <i>createFormat</i>(ULocale, int)</li>
+</ul>
+NumberFormat.SimpleNumberFormatFactory
+<ul>
+<li>(stable) public <i>NumberFormat.SimpleNumberFormatFactory</i>(ULocale)</li>
+<li>(stable) public <i>NumberFormat.SimpleNumberFormatFactory</i>(ULocale, boolean)</li>
+</ul>
+RawCollationKey
+<ul>
+<li>(stable) public int <i>compareTo</i>(java.lang.Object)</li>
+</ul>
+RuleBasedCollator
+<ul>
+<li>(stable) public void <i>getContractionsAndExpansions</i>(UnicodeSet, UnicodeSet, boolean)</li>
+</ul>
+RuleBasedNumberFormat
+<ul>
+<li>(stable) public <i>RuleBasedNumberFormat</i>(ULocale, int)</li>
+<li>(stable) public <i>RuleBasedNumberFormat</i>(java.lang.String, ULocale)</li>
+<li>(stable) public <i>RuleBasedNumberFormat</i>(java.lang.String, java.lang.String[][])</li>
+<li>(stable) public <i>RuleBasedNumberFormat</i>(java.lang.String, java.lang.String[][], ULocale)</li>
+<li>(stable) public java.lang.String <i>getRuleSetDisplayName</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getRuleSetDisplayName</i>(java.lang.String, ULocale)</li>
+<li>(stable) public ULocale[] <i>getRuleSetDisplayNameLocales</i>()</li>
+<li>(stable) public java.lang.String[] <i>getRuleSetDisplayNames</i>()</li>
+<li>(stable) public java.lang.String[] <i>getRuleSetDisplayNames</i>(ULocale)</li>
+</ul>
+SimpleDateFormat
+<ul>
+<li>(stable) public <i>SimpleDateFormat</i>(java.lang.String, ULocale)</li>
+</ul>
+StringSearch
+<ul>
+<li>(stable) public <i>StringSearch</i>(java.lang.String, java.text.CharacterIterator, ULocale)</li>
+</ul>
+Transliterator
+<ul>
+<li>(stable) public static java.lang.String <i>getDisplayName</i>(java.lang.String, ULocale)</li>
+<li>(stable) public static void <i>registerAlias</i>(java.lang.String, java.lang.String)</li>
+</ul>
+UnicodeSet
+<ul>
+<li>(stable) public static final int ADD_CASE_MAPPINGS</li>
+<li>(draft) public static final int CASE</li>
+<li>(stable) public static final int CASE_INSENSITIVE</li>
+<li>(draft) public static final int IGNORE_SPACE</li>
+<li>(draft) public <i>UnicodeSet</i>(java.lang.String, int)</li>
+<li>(stable) public <i>UnicodeSet</i>(java.lang.String, java.text.ParsePosition, SymbolTable, int)</li>
+<li>(draft) public java.lang.StringBuffer <i>_generatePattern</i>(java.lang.StringBuffer, boolean, boolean)</li>
+<li>(draft) public UnicodeSet <i>applyPattern</i>(java.lang.String, int)</li>
+<li>(stable) public UnicodeSet <i>applyPropertyAlias</i>(java.lang.String, java.lang.String, SymbolTable)</li>
+<li>(draft) public java.lang.Object <i>cloneAsThawed</i>()</li>
+<li>(draft) public UnicodeSet <i>closeOver</i>(int)</li>
+<li>(draft) public java.lang.Object <i>freeze</i>()</li>
+<li>(draft) public boolean <i>isFrozen</i>()</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.util</h3>
+<ul>
+<li>(stable) public final class <i>CopticCalendar</i></li>
+<li>(stable) public final class <i>EthiopicCalendar</i></li>
+<li>(stable) public interface <i>Freezable</i></li>
+<li>(stable) public final class <i>UniversalTimeScale</i></li>
+BuddhistCalendar
+<ul>
+<li>(stable) public <i>BuddhistCalendar</i>(TimeZone, ULocale)</li>
+<li>(stable) public <i>BuddhistCalendar</i>(ULocale)</li>
+<li>(draft) public java.lang.String <i>getType</i>()</li>
+</ul>
+Calendar
+<ul>
+<li>(stable) protected <i>Calendar</i>(TimeZone, ULocale)</li>
+<li>(stable) public int <i>compareTo</i>(Calendar)</li>
+<li>(stable) public int <i>compareTo</i>(java.lang.Object)</li>
+<li>(stable) public DateFormat <i>getDateTimeFormat</i>(int, int, ULocale)</li>
+<li>(stable) public java.lang.String <i>getDisplayName</i>(ULocale)</li>
+<li>(stable) public static synchronized Calendar <i>getInstance</i>(TimeZone, ULocale)</li>
+<li>(stable) public static synchronized Calendar <i>getInstance</i>(ULocale)</li>
+<li>(draft) public java.lang.String <i>getType</i>()</li>
+</ul>
+ChineseCalendar
+<ul>
+<li>(stable) public <i>ChineseCalendar</i>(TimeZone, ULocale)</li>
+<li>(draft) public java.lang.String <i>getType</i>()</li>
+</ul>
+Currency
+<ul>
+<li>(stable) public static Currency <i>getInstance</i>(ULocale)</li>
+<li>(stable) public java.lang.String <i>getName</i>(ULocale, int, boolean[])</li>
+<li>(stable) public java.lang.String <i>getName</i>(java.util.Locale, int, boolean[])</li>
+<li>(stable) public java.lang.String <i>getSymbol</i>()</li>
+<li>(stable) public java.lang.String <i>getSymbol</i>(ULocale)</li>
+<li>(stable) public java.lang.String <i>getSymbol</i>(java.util.Locale)</li>
+<li>(stable) public static java.lang.Object <i>registerInstance</i>(Currency, ULocale)</li>
+</ul>
+GregorianCalendar
+<ul>
+<li>(stable) public <i>GregorianCalendar</i>(TimeZone, ULocale)</li>
+<li>(stable) public <i>GregorianCalendar</i>(ULocale)</li>
+<li>(draft) public java.lang.String <i>getType</i>()</li>
+</ul>
+HebrewCalendar
+<ul>
+<li>(stable) public <i>HebrewCalendar</i>(TimeZone, ULocale)</li>
+<li>(stable) public <i>HebrewCalendar</i>(ULocale)</li>
+<li>(draft) public java.lang.String <i>getType</i>()</li>
+</ul>
+IslamicCalendar
+<ul>
+<li>(stable) public <i>IslamicCalendar</i>(TimeZone, ULocale)</li>
+<li>(stable) public <i>IslamicCalendar</i>(ULocale)</li>
+<li>(draft) public java.lang.String <i>getType</i>()</li>
+</ul>
+JapaneseCalendar
+<ul>
+<li>(stable) public <i>JapaneseCalendar</i>(TimeZone, ULocale)</li>
+<li>(stable) public <i>JapaneseCalendar</i>(ULocale)</li>
+<li>(draft) public java.lang.String <i>getType</i>()</li>
+</ul>
+LocaleData
+<ul>
+<li>(stable) public static final int ALT_QUOTATION_END</li>
+<li>(stable) public static final int ALT_QUOTATION_START</li>
+<li>(stable) public static final int DELIMITER_COUNT</li>
+<li>(stable) public static final int ES_AUXILIARY</li>
+<li>(stable) public static final int ES_COUNT</li>
+<li>(stable) public static final int ES_STANDARD</li>
+<li>(stable) public static final int QUOTATION_END</li>
+<li>(stable) public static final int QUOTATION_START</li>
+<li>(stable) public java.lang.String <i>getDelimiter</i>(int)</li>
+<li>(stable) public UnicodeSet <i>getExemplarSet</i>(int, int)</li>
+<li>(stable) public static final LocaleData <i>getInstance</i>()</li>
+<li>(stable) public static final LocaleData <i>getInstance</i>(ULocale)</li>
+<li>(stable) public boolean <i>getNoSubstitute</i>()</li>
+<li>(stable) public void <i>setNoSubstitute</i>(boolean)</li>
+</ul>
+SimpleTimeZone
+<ul>
+<li>(stable) public <i>SimpleTimeZone</i>(int, java.lang.String, int, int, int, int, int, int, int, int, int, int, int)</li>
+<li>(stable) public java.lang.Object <i>clone</i>()</li>
+<li>(stable) public boolean <i>equals</i>(java.lang.Object)</li>
+<li>(stable) public int <i>getRawOffset</i>()</li>
+<li>(stable) public boolean <i>hasSameRules</i>(TimeZone)</li>
+<li>(stable) public int <i>hashCode</i>()</li>
+<li>(stable) public boolean <i>inDaylightTime</i>(java.util.Date)</li>
+<li>(stable) public void <i>setRawOffset</i>(int)</li>
+<li>(stable) public java.lang.String <i>toString</i>()</li>
+<li>(stable) public boolean <i>useDaylightTime</i>()</li>
+</ul>
+TimeZone
+<ul>
+<li>(stable) public boolean <i>equals</i>(java.lang.Object)</li>
+<li>(stable) public java.lang.String <i>getDisplayName</i>(boolean, int, ULocale)</li>
+<li>(stable) public final java.lang.String <i>getDisplayName</i>(ULocale)</li>
+<li>(stable) public int <i>hashCode</i>()</li>
+</ul>
+ULocale
+<ul>
+<li>(stable) public static ULocale <i>acceptLanguage</i>(ULocale[], boolean[])</li>
+<li>(stable) public static ULocale <i>acceptLanguage</i>(ULocale[], ULocale[], boolean[])</li>
+<li>(stable) public static ULocale <i>acceptLanguage</i>(java.lang.String, boolean[])</li>
+<li>(stable) public static ULocale <i>acceptLanguage</i>(java.lang.String, ULocale[], boolean[])</li>
+</ul>
+UResourceBundle
+<ul>
+<li>(draft) public static UResourceBundle <i>getBundleInstance</i>(java.lang.String, ULocale, java.lang.ClassLoader)</li>
+<li>(draft) public static UResourceBundle <i>getBundleInstance</i>(java.lang.String, java.util.Locale, java.lang.ClassLoader)</li>
+</ul>
+VersionInfo
+<ul>
+<li>(stable) public static final VersionInfo UNICODE_4_0_1</li>
+<li>(stable) public static final VersionInfo UNICODE_4_1</li>
+<li>(stable) public static final VersionInfo UNICODE_5_0</li>
+</ul>
+</ul>
+
+
+<hr/>
+<h2>Added in ICU4J 3.8</h2>
+
+<h3>Package com.ibm.icu.lang</h3>
+<ul>
+UCharacter
+<ul>
+<li>(draft) public static final int TITLECASE_NO_BREAK_ADJUSTMENT</li>
+<li>(draft) public static final int TITLECASE_NO_LOWERCASE</li>
+<li>(draft) public static java.lang.String <i>toTitleCase</i>(ULocale, java.lang.String, BreakIterator, int)</li>
+</ul>
+UScript
+<ul>
+<li>(draft) public static final int CARIAN</li>
+<li>(draft) public static final int JAPANESE</li>
+<li>(draft) public static final int LANNA</li>
+<li>(draft) public static final int LYCIAN</li>
+<li>(draft) public static final int LYDIAN</li>
+<li>(draft) public static final int MEITEI_MAYEK</li>
+<li>(draft) public static final int MOON</li>
+<li>(draft) public static final int OL_CHIKI</li>
+<li>(draft) public static final int REJANG</li>
+<li>(draft) public static final int SAURASHTRA</li>
+<li>(draft) public static final int SIGN_WRITING</li>
+<li>(draft) public static final int SUNDANESE</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.text</h3>
+<ul>
+<li>(draft) public class <i>Bidi</i></li>
+<li>(draft) public class <i>BidiClassifier</i></li>
+<li>(draft) public class <i>BidiRun</i></li>
+<li>(stable) public static class <i>ChineseDateFormat.Field</i></li>
+<li>(stable) public static class <i>DateFormat.Field</i></li>
+<li>(draft) public abstract class <i>DurationFormat</i></li>
+<li>(stable) public static class <i>MessageFormat.Field</i></li>
+<li>(draft) public class <i>PluralFormat</i></li>
+<li>(draft) public class <i>PluralRules</i></li>
+<li>(draft) public interface <i>StringTransform</i></li>
+ChineseDateFormat
+<ul>
+<li>(draft) protected DateFormat.Field <i>patternCharToDateFormatField</i>(char)</li>
+</ul>
+DateFormat
+<ul>
+<li>(draft) public static final int NONE</li>
+<li>(draft) public static final int RELATIVE</li>
+<li>(draft) public static final int RELATIVE_DEFAULT</li>
+<li>(draft) public static final int RELATIVE_FULL</li>
+<li>(draft) public static final int RELATIVE_LONG</li>
+<li>(draft) public static final int RELATIVE_MEDIUM</li>
+<li>(draft) public static final int RELATIVE_SHORT</li>
+<li>(stable) public static final int TIMEZONE_SPECIAL_FIELD</li>
+</ul>
+DateFormatSymbols
+<ul>
+<li>(stable) public static java.util.Locale[] <i>getAvailableLocales</i>()</li>
+<li>(draft) public static ULocale[] <i>getAvailableULocales</i>()</li>
+<li>(stable) public static DateFormatSymbols <i>getInstance</i>()</li>
+<li>(draft) public static DateFormatSymbols <i>getInstance</i>(ULocale)</li>
+<li>(stable) public static DateFormatSymbols <i>getInstance</i>(java.util.Locale)</li>
+</ul>
+DateTimePatternGenerator
+<ul>
+<li>(draft) public DateTimePatternGenerator <i>addPattern</i>(java.lang.String, boolean, DateTimePatternGenerator.PatternInfo)</li>
+<li>(draft) public java.lang.String <i>getAppendItemFormat</i>(int)</li>
+<li>(draft) public java.lang.String <i>getAppendItemName</i>(int)</li>
+<li>(draft) public static DateTimePatternGenerator <i>getEmptyInstance</i>()</li>
+<li>(draft) public void <i>setAppendItemFormat</i>(int, java.lang.String)</li>
+<li>(draft) public void <i>setAppendItemName</i>(int, java.lang.String)</li>
+</ul>
+DecimalFormatSymbols
+<ul>
+<li>(stable) public static java.util.Locale[] <i>getAvailableLocales</i>()</li>
+<li>(draft) public static ULocale[] <i>getAvailableULocales</i>()</li>
+<li>(stable) public static DecimalFormatSymbols <i>getInstance</i>()</li>
+<li>(draft) public static DecimalFormatSymbols <i>getInstance</i>(ULocale)</li>
+<li>(stable) public static DecimalFormatSymbols <i>getInstance</i>(java.util.Locale)</li>
+</ul>
+MessageFormat
+<ul>
+<li>(draft) public static java.lang.String <i>format</i>(java.lang.String, java.util.Map)</li>
+<li>(draft) public final java.lang.StringBuffer <i>format</i>(java.util.Map, java.lang.StringBuffer, java.text.FieldPosition)</li>
+<li>(stable) public java.text.AttributedCharacterIterator <i>formatToCharacterIterator</i>(java.lang.Object)</li>
+<li>(draft) public java.util.Map <i>parseToMap</i>(java.lang.String)</li>
+<li>(draft) public java.util.Map <i>parseToMap</i>(java.lang.String, java.text.ParsePosition)</li>
+<li>(draft) public void <i>setFormatByArgumentName</i>(java.lang.String, java.text.Format)</li>
+<li>(draft) public void <i>setFormatsByArgumentName</i>(java.util.Map)</li>
+<li>(draft) public boolean <i>usesNamedArguments</i>()</li>
+</ul>
+SimpleDateFormat
+<ul>
+<li>(stable) public java.text.AttributedCharacterIterator <i>formatToCharacterIterator</i>(java.lang.Object)</li>
+<li><span style='color:red'>*internal* </span>public static SimpleDateFormat <i>getInstance</i>(Calendar.FormatConfiguration)</li>
+<li>(draft) protected DateFormat.Field <i>patternCharToDateFormatField</i>(char)</li>
+</ul>
+StringPrepParseException
+<ul>
+<li>(draft) public static final int DOMAIN_NAME_TOO_LONG_ERROR</li>
+<li>(draft) public int <i>getError</i>()</li>
+</ul>
+Transliterator
+<ul>
+<li>(draft) public java.lang.String <i>transform</i>(java.lang.String)</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.util</h3>
+<ul>
+<li>(draft) public class <i>AnnualTimeZoneRule</i></li>
+<li>(draft) public abstract class <i>BasicTimeZone</i></li>
+<li><span style='color:red'>*internal* </span>public static class <i>Calendar.FormatConfiguration</i></li>
+<li>(draft) public class <i>DateTimeRule</i></li>
+<li>(draft) public class <i>IndianCalendar</i></li>
+<li>(draft) public class <i>InitialTimeZoneRule</i></li>
+<li>(draft) public class <i>RuleBasedTimeZone</i></li>
+<li>(draft) public class <i>TaiwanCalendar</i></li>
+<li>(draft) public class <i>TimeArrayTimeZoneRule</i></li>
+<li>(draft) public abstract class <i>TimeZoneRule</i></li>
+<li>(draft) public class <i>TimeZoneTransition</i></li>
+<li>(draft) public class <i>UResourceBundleIterator</i></li>
+<li>(draft) public class <i>VTimeZone</i></li>
+SimpleTimeZone
+<ul>
+<li>(stable) public static final int STANDARD_TIME</li>
+<li>(stable) public static final int UTC_TIME</li>
+<li>(stable) public static final int WALL_TIME</li>
+<li>(draft) public TimeZoneTransition <i>getNextTransition</i>(long, boolean)</li>
+<li>(draft) public TimeZoneTransition <i>getPreviousTransition</i>(long, boolean)</li>
+<li>(draft) public TimeZoneRule[] <i>getTimeZoneRules</i>()</li>
+<li>(stable) public void <i>setID</i>(java.lang.String)</li>
+</ul>
+TimeZone
+<ul>
+<li>(draft) public static synchronized java.lang.String <i>getTZDataVersion</i>()</li>
+</ul>
+UResourceBundle
+<ul>
+<li><span style='color:red'>*internal* </span>protected static final int ALIAS</li>
+<li>(draft) public static final int ARRAY</li>
+<li>(draft) public static final int BINARY</li>
+<li>(draft) public static final int INT</li>
+<li>(draft) public static final int INT_VECTOR</li>
+<li>(draft) public static final int NONE</li>
+<li>(draft) public static final int STRING</li>
+<li>(draft) public static final int TABLE</li>
+<li><span style='color:red'>*internal* </span>protected static final int TABLE32</li>
+<li><span style='color:red'>*internal* </span>protected boolean isTopLevel</li>
+<li><span style='color:red'>*internal* </span>protected java.lang.String key</li>
+<li><span style='color:red'>*internal* </span>protected long resource</li>
+<li><span style='color:red'>*internal* </span>protected int size</li>
+<li>(draft) public UResourceBundle <i>get</i>(int)</li>
+<li>(draft) public UResourceBundle <i>get</i>(java.lang.String)</li>
+<li>(draft) public byte[] <i>getBinary</i>(byte[])</li>
+<li>(draft) public java.nio.ByteBuffer <i>getBinary</i>()</li>
+<li>(draft) public int <i>getInt</i>()</li>
+<li>(draft) public int[] <i>getIntVector</i>()</li>
+<li>(draft) public UResourceBundleIterator <i>getIterator</i>()</li>
+<li>(draft) public java.lang.String <i>getKey</i>()</li>
+<li>(draft) public java.util.Enumeration <i>getKeys</i>()</li>
+<li>(draft) public int <i>getSize</i>()</li>
+<li>(draft) public java.lang.String <i>getString</i>()</li>
+<li>(draft) public java.lang.String <i>getString</i>(int)</li>
+<li>(draft) public java.lang.String[] <i>getStringArray</i>()</li>
+<li>(draft) public int <i>getType</i>()</li>
+<li>(draft) public int <i>getUInt</i>()</li>
+<li>(draft) public VersionInfo <i>getVersion</i>()</li>
+<li>(draft) protected UResourceBundle <i>handleGet</i>(int, java.util.HashMap, UResourceBundle)</li>
+<li>(draft) protected UResourceBundle <i>handleGet</i>(java.lang.String, java.util.HashMap, UResourceBundle)</li>
+<li>(draft) protected java.util.Enumeration <i>handleGetKeys</i>()</li>
+<li>(draft) protected java.lang.Object <i>handleGetObject</i>(java.lang.String)</li>
+<li>(draft) protected java.lang.String[] <i>handleGetStringArray</i>()</li>
+</ul>
+UniversalTimeScale
+<ul>
+<li>(draft) public static final int UNIX_MICROSECONDS_TIME</li>
+</ul>
+</ul>
+
+<hr/>
+<p><i><font size="-1">Contents generated by ReportAPI tool on Sun Aug 26 23:15:39 EDT 2007<br/>Copyright (C) 2007, International Business Machines Corporation, All Rights Reserved.</font></i></p>
+</body>
+</html>
diff --git a/build.properties b/build.properties
new file mode 100644
index 0000000..47221a9
--- /dev/null
+++ b/build.properties
@@ -0,0 +1,30 @@
+#*
+#*******************************************************************************
+#* Copyright (C) 2006-2008, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+#* This is the properties file for ICU4J builds.
+#*
+
+# spec version won't be changed in a feature stream
+icu4j.spec.version.string=3.9
+
+# impl version will be updated for maintenance releases.
+# It must be <icu4j.spec.version.string>[.<maint-version>]
+icu4j.impl.version.string=3.9.2
+
+# data version number won't be changed in a feature stream
+icu4j.data.version.number=39
+
+# these version numbers are used by API change report
+icu4j.version.number=392
+icu4j.previous.version.number=381
+
+current.year=2008
+api.report.out=${api.dir}/icu4j_compare_${icu4j.previous.version.number}_${icu4j.version.number}.html
+copyright=Copyright (c) 2000-2008, International Business Machines Corporation and others. All Rights Reserved.
+corp=IBM Corporation
+default.target.rt.version=J2SE15
+
+icu4j.plugin.impl.version.string=3.9.2
+copyright.eclipse=Licensed Materials - Property of IBM \n (C) Copyright IBM Corp. 2000, 2008. All Rights Reserved. \n IBM is a registered trademark of IBM Corp.
\ No newline at end of file
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..0020499
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,2218 @@
+<!--
+/*
+*******************************************************************************
+* Copyright (C) 1997-2008, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+* This is the ant build file for ICU4J. See readme.html for more information.
+*/
+-->
+
+<project name="icu4j" default="core" basedir=".">
+
+ <!-- ### Begin Clover 1.2 setup ### -->
+ <typedef resource="clovertypes" onerror="ignore" />
+ <taskdef resource="clovertasks" onerror="ignore" />
+
+ <target name="with.clover">
+ <mkdir dir="tmp/" />
+ <clover-setup initString="tmp/icu4j.db">
+ <files>
+ <exclude name="**/dev/**/*.java" />
+ </files>
+ <methodContext name="API" regexp="(.* )?public .*" />
+ </clover-setup>
+ </target>
+
+ <target name="clover.summary" depends="with.clover">
+ <clover-report>
+ <current outfile="icu4j_html" summary="true">
+ <format type="html" srcLevel="false" />
+ </current>
+ </clover-report>
+ </target>
+ <target name="clover.report" depends="with.clover">
+ <clover-report>
+ <current outfile="icu4j_html">
+ <format type="html" />
+ </current>
+ </clover-report>
+ </target>
+ <target name="clover.log" depends="with.clover">
+ <clover-log level="method" filter="private">
+ <package name="com.ibm.icu.text" />
+ <package name="com.ibm.icu.lang" />
+ <package name="com.ibm.icu.math" />
+ <package name="com.ibm.icu.util" />
+ <package name="com.ibm.icu.charset" />
+ </clover-log>
+ </target>
+
+ <!-- ### End Clover 1.2 setup ### -->
+
+ <target name="checkAntVersion">
+ <condition property="supported.ant.version">
+ <or>
+ <contains string="${ant.version}" substring="1.6." />
+ <contains string="${ant.version}" substring="1.7." />
+ <contains string="${ant.version}" substring="1.8." /> <!-- just in case -->
+ </or>
+ </condition>
+ <antcall target="warnAntVersion" />
+ </target>
+
+ <target name="warnAntVersion" unless="supported.ant.version">
+ <echo message="####################### WARNING #######################" />
+ <echo message="The version of ant used by the current configuration" />
+ <echo message="may not work well with this build script file. Please" />
+ <echo message="use ant 1.6 or later version." />
+ <echo message="#######################################################" />
+ </target>
+
+ <target name="init" depends="initBase,initSrc"/>
+
+ <target name="initBase"
+ depends="checkAntVersion"
+ description="Initialized ICU4J build environment">
+ <tstamp />
+ <property name="src.dir" value="src" />
+ <property name="build.dir" value="classes" />
+ <property name="api.dir" value="${src.dir}/com/ibm/icu/dev/tool/docs" />
+ <property name="doc.dir" value="doc" />
+ <property name="icudatajar.file" value="${src.dir}/com/ibm/icu/impl/data/icudata.jar" />
+ <property name="testjar.file" value="icu4jtests.jar" />
+ <property name="jar.file" value="icu4j.jar" />
+ <property name="demos-jar.file" value="icu4jdemos.jar" />
+ <property name="charsets.jar.file" value="icu4j-charsets.jar" />
+ <property name="jarSrc.file" value="icu4jsrc.jar" />
+ <property name="zipTestSrc.file" value="icu4jtsrc.zip" />
+ <property name="jdk.wrapper.jar.file" value="icu4jwrapper.jar" />
+ <property name="fragment.jar.file" value="icu4jfragment.jar" />
+ <property name="eclipse.dir" value="${src.dir}/com/ibm/icu/dev/eclipse" />
+ <property name="eclipse.projects.dir" value="eclipseProjects" />
+ <property name="wrapper.build.dir" value="classes.wrapper" />
+ <property name="jarDocs.file" value="icu4jdocs.jar" />
+ <property name="ime.translit.jar.file" value="icutransime.jar" />
+ <property name="ime.indic.jar.file" value="icuindicime.jar" />
+ <property name="ime.translit.manifest" value="${src.dir}/com/ibm/icu/dev/tool/ime/translit/manifest.stub" />
+ <property name="ime.indic.manifest" value="${src.dir}/com/ibm/icu/dev/tool/ime/indic/manifest.stub" />
+ <property name="zip.file" value="../icu4j${DSTAMP}.zip" />
+ <property name="zipSrc.file" value="../icu4jSrc${DSTAMP}.zip" />
+ <property name="tzu.src.path" value="com/ibm/icu/dev/tool/tzu" />
+ <property name="tzu.bin.dir" value="${basedir}/icu4jtzu" />
+ <property name="tzu.jar.file" value="icutzu.jar" />
+ <property name="tzu.test.dir" value="${basedir}/icu4jtzu/test" />
+ <property name="tzu.temp.dir" value="${basedir}/icu4jtzu/Temp" />
+ <property name="tzu.src.zip.file" value="icutzu-src.zip" />
+ <property name="tzu.bin.zip.file" value="icutzu-bin.zip" />
+ <property file="build.properties" />
+ <!-- fix the data folder every time there is a version update-->
+
+ <property name="icu4j.data.path" value="com/ibm/icu/impl/data/icudt${icu4j.data.version.number}b" />
+ <property name="icu4j.testdata.path" value="com/ibm/icu/dev/data/testdata" />
+ <property name="icu4j.javac.source" value="1.3" />
+ <property name="icu4j.javac.target" value="1.3" />
+
+ <path id="build.classpath">
+ <pathelement path="${build.dir}" />
+ </path>
+
+ <property name="richedit.dir" value="richedit" />
+ <property name="richedit.doc.dir" value="${richedit.dir}/doc" />
+ <property name="richedit.jar.file" value="${richedit.dir}/richedit.jar" />
+ <property name="richedit.zip.file" value="${richedit.dir}/richedit.zip" />
+ <property name="richedit.manifest" value="${src.dir}/com/ibm/richtext/manifest.stub" />
+
+ <!-- Load environment variables -->
+ <property environment="env" />
+
+ <!-- Capture the computer name in a cross-platform manner -->
+ <property name="env.COMPUTERNAME" value="${env.HOSTNAME}" />
+
+ <!-- JavaDoc params -->
+ <property name="doc.params" value="-breakiterator -use -tagletpath ./classes -taglet com.ibm.icu.dev.tool.docs.ICUTaglet -group 'ICU Core' 'com.ibm.icu.lang*:com.ibm.icu.math*:com.ibm.icu.text*:com.ibm.icu.util*:com.ibm.icu.charset' -group 'ICU Tests' 'com.ibm.icu.dev.test*' -group 'Demos' 'com.ibm.icu.dev.demo*' -group 'ICU Tools' 'com.ibm.icu.dev*'" />
+ <property name="richeditdoc.params" value="-breakiterator -use -tagletpath ./classes -taglet com.ibm.icu.dev.tool.docs.ICUTaglet -group 'Rich Text Editing' 'com.ibm.richtext*'" />
+
+ <mkdir dir="${build.dir}" />
+
+ <echo message="java home: ${java.home}" />
+ <echo message="java version: ${java.version}" />
+ <echo message="ant java version: ${ant.java.version}" />
+ <echo message="${ant.version}" />
+ <echo message="${env.COMPUTERNAME} with ${os.name} ${os.version} on ${os.arch}" />
+ <echo message="clover initstring = '${clover.initstring}'" />
+
+ <!-- ## Java version dependent build configuration ##-->
+
+ <!-- JRE lib version used for building ICU4J -->
+ <condition property="target.rt.version" value="J2SE13">
+ <contains string="${java.version}" substring="1.3." />
+ </condition>
+ <condition property="target.rt.version" value="J2SE14">
+ <contains string="${java.version}" substring="1.4." />
+ </condition>
+ <condition property="target.rt.version" value="J2SE15">
+ <contains string="${java.version}" substring="1.5." />
+ </condition>
+ <condition property="target.rt.version" value="JAVASE6">
+ <contains string="${java.version}" substring="1.6." />
+ </condition>
+
+ <fail message="Failed to detect Java runtime library version used for building ICU4J"
+ unless="target.rt.version" />
+ <echo message="target runtime environment: ${target.rt.version}" />
+
+ <condition property="default.java.build.target">
+ <equals arg1="${target.rt.version}" arg2="${default.target.rt.version}" />
+ </condition>
+
+ <!-- Specification title in manifest file -->
+ <condition property="manifest.specification.title" value="ICU4J Compatible Build for ${target.rt.version}">
+ <not>
+ <isset property="default.java.build.target" />
+ </not>
+ </condition>
+ <property name="manifest.specification.title" value="ICU4J" />
+
+ <condition property="before.java14">
+ <or>
+ <equals arg1="${target.rt.version}" arg2="FOUNDATION10"/>
+ <equals arg1="${target.rt.version}" arg2="J2SE13"/>
+ </or>
+ </condition>
+
+ <condition property="before.java15">
+ <or>
+ <equals arg1="${target.rt.version}" arg2="FOUNDATION10"/>
+ <equals arg1="${target.rt.version}" arg2="J2SE13"/>
+ <equals arg1="${target.rt.version}" arg2="J2SE14"/>
+ </or>
+ </condition>
+
+ <!-- JVM arguments for running test cases -->
+ <condition property="test.jvm.args" value="-Xms192m -Xmx224m">
+ <isset property="before.java14" />
+ </condition>
+ <condition property="test.jvm.args" value="-ea -Xms192m -Xmx224m -Xdisableexcessivegc">
+ <and>
+ <not>
+ <isset property="before.java15" />
+ </not>
+ <equals arg1="${java.vm.vendor}" arg2="IBM Corporation"/>
+ </and>
+ </condition>
+ <condition property="test.jvm.args" value="-ea -Xms192m -Xmx224m">
+ <not>
+ <isset property="test.jvm.args" />
+ </not>
+ </condition>
+
+ <uptodate property="icu4j.resources" targetfile="${src.dir}/com/ibm/icu/impl/data">
+ <!-- note must not have '/' before 'com' in srcfiles includes arg! -->
+ <srcfiles dir="${build.dir}" includes="${icu4j.data.path}/res_index.res" />
+ </uptodate>
+
+ <uptodate property="icu4j.testdata.resources" targetfile="${src.dir}/com/ibm/icu/dev/data">
+ <!-- note must not have '/' before 'com' in srcfiles includes arg! -->
+ <srcfiles dir="${build.dir}" includes="${icu4j.testdatadata.path}/root.res" />
+ </uptodate>
+
+ <uptodate property="icu4j.module.resources" targetfile="${icudatajar.file}">
+ <srcfiles dir="${build.dir}" includes="${icu4j.data.path}/*.icu" />
+ </uptodate>
+ <!-- <echo message="icu4j.module.resources result: ${icu4j.module.resources}" /> -->
+ <tstamp>
+ <format property="date.time" pattern="yyyy-MM-dd 'at' hh:mm:ss z" locale="en,US" />
+ </tstamp>
+
+ <echo message="Initialized at ${date.time}" />
+ </target>
+
+ <target name="displayBuildEnvWarning" unless="default.java.build.target">
+ <echo message="####################### WARNING #######################" />
+ <echo message="The JDK version used by the current build environment" />
+ <echo message="does not match the reference JDK version for this" />
+ <echo message="ICU4J release. The build outputs may not be identical" />
+ <echo message="to the official ICU4J binary distribution." />
+ <echo message="#######################################################" />
+ </target>
+
+ <target name="initSrc"
+ depends="buildMangle"
+ description="Run the preprocessor tool to modify Java sources for the Java runtime lib currently used.">
+ <antcall target="displayBuildEnvWarning" />
+ <!-- Build CodeMangler -->
+ <javac srcdir="${src.dir}" destdir="${build.dir}" classpathref="build.classpath" source="${icu4j.javac.source}" target="${icu4j.javac.target}">
+ <include name="com/ibm/icu/dev/tool/docs/CodeMangler.java" />
+ </javac>
+ <!-- Run CodeMangler -->
+ <antcall target="doMangle">
+ <param name="target.defs" value="${target.rt.version}" />
+ <param name="input.file" value="@preprocessor.txt" />
+ </antcall>
+ </target>
+
+ <target name="normSrc"
+ depends="buildMangle"
+ description="Run the preprocessor to normalize Java sources to the ICU source repository target JDK version">
+ <antcall target="doMangle">
+ <param name="target.defs" value="${default.target.rt.version}" />
+ <param name="input.file" value="@preprocessor.txt" />
+ </antcall>
+ </target>
+
+ <target name="buildMangle" depends="initBase">
+ <javac srcdir="${src.dir}" destdir="${build.dir}" classpathref="build.classpath" source="${icu4j.javac.source}" target="${icu4j.javac.target}">
+ <include name="com/ibm/icu/dev/tool/docs/CodeMangler.java" />
+ </javac>
+ </target>
+
+ <target name="doMangle">
+ <echo message="Running source code preprocessor for [${target.defs}]"/>
+ <java classname="com.ibm.icu.dev.tool.docs.CodeMangler" classpath="${build.dir}" logError="true">
+ <arg value="-d${target.defs}" />
+ <arg value="${input.file}" />
+ </java>
+ </target>
+
+ <!-- build everything but dist-related stuff -->
+ <target name="all" depends="core,tests,tools,richedit,demos,jar,docs" description="build all primary targets" />
+
+ <target name="resources" depends="icudata,coreData,durationdata,testdata" description="builds all the resources" />
+
+ <target name="icudata" depends="initBase" if="icu4j.resources">
+ <!-- use this target to force resources to be rebuilt -->
+ <unjar src="${icudatajar.file}" dest="${build.dir}" />
+ <touch file="${build.dir}/${icu4j.data.path}/res_index.res" />
+ <copy todir="${build.dir}/META-INF">
+ <fileset dir="${src.dir}/META-INF" includes="**/*" />
+ </copy>
+ </target>
+
+ <target name="durationdata" depends="initBase">
+ <copy todir="${build.dir}/com/ibm/icu/impl/duration/impl/data">
+ <fileset dir="${src.dir}/com/ibm/icu/impl/duration/impl/data" includes="index.txt, pfd*.xml"/>
+ </copy>
+ </target>
+
+ <!--
+ use this target to conditionally build resources only if icu4j.resources is set
+ <target name="icu" depends="init" if="icu4j.resources">
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/ICULocaleData.jar" dest="${build.dir}"/>
+ <touch file="${build.dir}/com/ibm/icu/impl/data/LocaleElements_index.class"/>
+ </target>
+ -->
+
+ <!-- core does not build richedit or tests -->
+ <target name="core" depends="init,coreData,icudata,durationdata" description="build core classes and data">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off"
+ encoding="ascii">
+ <include name="com/ibm/icu/charset/**/*.java" unless="before.java14" />
+ <include name="com/ibm/icu/impl/**/*.java" />
+ <include name="com/ibm/icu/lang/**/*.java" />
+ <include name="com/ibm/icu/math/**/*.java" />
+ <include name="com/ibm/icu/text/**/*.java" />
+ <include name="com/ibm/icu/util/**/*.java" />
+ </javac>
+ </target>
+
+ <target name="tests" depends="core,testdata" description="build tests">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <exclude name="com/ibm/icu/dev/test/charset/**/*.java" if="before.java14" />
+ <exclude name="com/ibm/icu/dev/test/perf/**/*.java" if="before.java14" />
+ <include name="com/ibm/icu/dev/test/**/*.java" />
+ </javac>
+
+ </target>
+
+ <target name="demos" depends="core,testdata" unless="before.java14" description="build demos">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/dev/demo/**/*.java" />
+ <include name="com/ibm/icu/dev/demo/*.java" />
+ </javac>
+ </target>
+
+ <target name="indices" depends="icudata,build_indexgenerator">
+ <java classname="com.ibm.icu.dev.tool.index.IndexGenerator" classpath="${build.dir}" logError="true">
+ <arg value="${build.dir}/${icu4j.data.path}" />
+ </java>
+ <java classname="com.ibm.icu.dev.tool.index.IndexGenerator" classpath="${build.dir}" logError="true">
+ <arg value="${build.dir}/${icu4j.data.path}/rbnf" />
+ </java>
+ <java classname="com.ibm.icu.dev.tool.index.IndexGenerator" classpath="${build.dir}" logError="true">
+ <arg value="${build.dir}/${icu4j.data.path}/coll" />
+ </java>
+ <java classname="com.ibm.icu.dev.tool.index.IndexGenerator" classpath="${build.dir}" logError="true">
+ <arg value="${build.dir}/${icu4j.data.path}/translit" />
+ </java>
+ </target>
+
+ <target name="tools"
+ depends="core"
+ unless="before.java14"
+ description="build tools">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <exclude name="com/ibm/icu/dev/tool/cldr/*" />
+ <exclude name="com/ibm/icu/dev/tool/localeconverter/*" if="before.java15" />
+ <include name="com/ibm/icu/dev/tool/**/*.java" />
+ </javac>
+ </target>
+
+ <target name="build_indexgenerator" depends="initBase">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/dev/tool/index/IndexGenerator.java" />
+ </javac>
+ </target>
+
+ <target name="docs" depends="init,docs13,docs14plus" description="build user javadoc" />
+
+ <target name="docs13" if="before.java14">
+ <mkdir dir="${doc.dir}" />
+ <javadoc packagenames="com.ibm.icu.lang,com.ibm.icu.text,com.ibm.icu.util,com.ibm.icu.math"
+ sourcepath="${src.dir}"
+ destdir="${doc.dir}"
+ nodeprecatedlist="true"
+ windowtitle="icu4j"
+ doctitle="icu4j"
+ encoding="iso-8859-1"
+ docencoding="iso-8859-1"
+ bottom="<font size=-1>Copyright (c) ${current.year} IBM Corporation and others.</font>"
+ link="http://java.sun.com/j2se/1.3/docs/api"
+ source="1.4" />
+ </target>
+
+ <target name="docs14plus" depends="tools" unless="before.java14">
+ <echo message="doc params: ${doc.params}" />
+ <mkdir dir="${doc.dir}" />
+ <javadoc packagenames="com.ibm.icu.lang,com.ibm.icu.text,com.ibm.icu.util,com.ibm.icu.charset,com.ibm.icu.math,com.ibm.icu.dev"
+ sourcepath="${src.dir}"
+ destdir="${doc.dir}"
+ nodeprecatedlist="true"
+ windowtitle="icu4j"
+ doctitle="icu4j"
+ encoding="iso-8859-1"
+ docencoding="iso-8859-1"
+ bottom="<font size=-1>Copyright (c) ${current.year} IBM Corporation and others.</font>"
+ additionalparam="${doc.params}"
+ link="http://java.sun.com/j2se/1.5/docs/api"
+ source="1.4" />
+ </target>
+
+ <target name="fulldocs" depends="tools" unless="before.java14" description="build all javadoc">
+ <echo message="doc params: ${doc.params}" />
+ <mkdir dir="${doc.dir}" />
+ <javadoc packagenames="com.ibm.icu.*"
+ sourcepath="${src.dir}"
+ destdir="${doc.dir}"
+ nodeprecatedlist="true"
+ windowtitle="icu4j"
+ doctitle="icu4j"
+ encoding="iso-8859-1"
+ docencoding="iso-8859-1"
+ bottom="<font size=-1>Copyright (c) ${current.year} IBM Corporation and others.</font>"
+ additionalparam="${doc.params}"
+ link="http://java.sun.com/j2se/1.5/docs/api"
+ source="1.4" />
+ </target>
+
+ <target name="coreData" depends="initBase">
+ <copy todir="${build.dir}/com/ibm/icu/impl/data">
+ <fileset dir="${src.dir}/com/ibm/icu/impl/data" includes="*.icu,*.spp,*.brk" excludes="Transliterator_Han_Latin_*.txt" />
+ </copy>
+ <copy file="${src.dir}/com/ibm/icu/ICUConfig.properties" todir="${build.dir}/com/ibm/icu"/>
+ </target>
+
+ <target name="testdata" depends="initBase">
+ <copy file="${src.dir}/com/ibm/icu/dev/data/rbbi/english.dict" todir="${build.dir}/com/ibm/icu/dev/data/rbbi" />
+ <copy file="${src.dir}/com/ibm/icu/dev/test/rbbi/rbbitst.txt" todir="${build.dir}/com/ibm/icu/dev/test/rbbi" />
+ <copy file="${src.dir}/com/ibm/icu/dev/test/charsetdet/CharsetDetectionTests.xml" todir="${build.dir}/com/ibm/icu/dev/test/charsetdet" />
+ <copy todir="${build.dir}/com/ibm/icu/dev/test/serializable/data">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/test/serializable/data">
+ <include name="**/*.dat" />
+ </fileset>
+ </copy>
+ <copy file="${src.dir}/com/ibm/icu/dev/data/riwords.txt" todir="${build.dir}/com/ibm/icu/dev/data" />
+ <copy file="${src.dir}/com/ibm/icu/dev/data/IDNATestInput.txt" todir="${build.dir}/com/ibm/icu/dev/data" />
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/dev/data/*.java" />
+ <include name="com/ibm/icu/dev/data/resources/*.java" />
+ </javac>
+ <copy todir="${build.dir}/com/ibm/icu/dev/data">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/data" includes="*.spp,*.txt" />
+ </copy>
+ <copy todir="${build.dir}/com/ibm/icu/dev/data/unicode/">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/data/unicode/" includes="*.txt" />
+ </copy>
+ <copy file="${src.dir}/com/ibm/icu/dev/test/format/NumberFormatTestCases.txt" todir="${build.dir}/com/ibm/icu/dev/test/format" />
+ <copy todir="${build.dir}/com/ibm/icu/dev/data/resources/">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/data/resources/" includes="*.properties" />
+ </copy>
+ <copy todir="${build.dir}/com/ibm/icu/dev/test/duration/testdata">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/test/duration/testdata/" includes="testdata*.txt" />
+ </copy>
+ <!-- use this target to force resources to be rebuilt -->
+ <unjar src="${src.dir}/com/ibm/icu/dev/data/testdata.jar" dest="${build.dir}" />
+ <touch file="${build.dir}/${icu4j.testdata.path}/root.res" />
+ </target>
+
+ <!-- builds richedit and richedit tests -->
+ <target name="richedit" depends="init" description="build richedit classes and tests">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/richtext/**/*.java" />
+ </javac>
+ <copy todir="${build.dir}/com/ibm/richtext/textapps/resources" overwrite="yes" includeEmptyDirs="no">
+ <fileset dir="${src.dir}/com/ibm/richtext/textapps/resources" includes="*.red" />
+ </copy>
+ </target>
+
+ <!-- Creates the 3 release jar archives for distribution -->
+ <target name="jarRelease" depends="jar,jarSrc,jarDocs" />
+
+ <target name="jar"
+ depends="core,indices,charsetsJar"
+ description="build 'icu4j.jar' jar file">
+ <jar jarfile="${jar.file}" compress="true">
+ <fileset dir="${basedir}" includes="license.html,unicode-license.txt" />
+ <fileset dir="${src.dir}" includes="com/ibm/icu/ICUConfig.properties" />
+ <fileset dir="${build.dir}" includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*" excludes="META-INF/services/**/*,com/ibm/icu/charset/**/*,${icu4j.data.path}/*.cnv,${icu4j.data.path}/cnvalias.icu" />
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="${manifest.specification.title}" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value=" ICU for Java" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="charsetsJar"
+ depends="core,indices"
+ unless="before.java14"
+ description="build 'icu4j-charstes.jar' jar file">
+ <echo message="${icu4j.data.path}/cnvalias.icu"/>
+ <jar jarfile="${charsets.jar.file}" compress="true">
+ <fileset dir="${basedir}" includes="license.html, unicode-license.txt" />
+ <fileset dir="${build.dir}"
+ includes="META-INF/services/**/*,com/ibm/icu/charset/**/*,${icu4j.data.path}/*.cnv,${icu4j.data.path}/cnvalias.icu" />
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Charsets" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value=" ICU for Java Charsets" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="jarTests" depends="tests,jar" description="build runtime 'icu4jtests.jar' jar file">
+ <jar jarfile="${testjar.file}" compress="true">
+ <fileset dir="${build.dir}" includes="com/ibm/icu/dev/test/**/*" />
+ <fileset dir="${build.dir}" includes="com/ibm/icu/dev/data/**/*" />
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Tests" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value=" ICU for Java Tests" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="jarSrc" depends="init,normSrc" description="build source 'icu4jsrc.jar' jar file">
+ <!--Create a jar archive of just the source for distribution. The
+ jar file will be created in the directory above the root ICU4J
+ directory. The exclude pattern ${src.dir}/com/ibm/icu/dev/data/unicode/UnicodeData-*.txt
+ and ${src.dir}/com/ibm/icu/dev/data/unicode/SpecialCasing-*.txt
+ eliminates old archive copies like SpecialCasing-2.txt and
+ UnicodeData-2.1.8.txt -->
+ <jar jarfile="${jarSrc.file}"
+ compress="true">
+ <fileset dir=".">
+ <exclude name="${richedit.dir}/**/*" />
+ <exclude name="test_*" />
+ <exclude name="${src.dir}/com/ibm/icu/dev/data/unicode/UnicodeData-*.txt" />
+ <exclude name="${src.dir}/com/ibm/icu/dev/data/unicode/SpecialCasing-*.txt" />
+ <exclude name="${src.dir}/com/ibm/icu/dev/data/unicode/CompositionExclusions-*.txt" />
+ <exclude name="${obsolete.dir}/**" />
+ <exclude name="**/*~" />
+ <exclude name="${src.dir}/**/*.class" />
+ <exclude name="${build.dir}/**" />
+ <exclude name="${doc.dir}/**" />
+ <exclude name="*.jar" />
+ <exclude name="*.zip" />
+ <exclude name="*.gz" />
+ </fileset>
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Sources" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value=" ICU for Java source files" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="jarDocs" depends="docs" description="build documentation 'icu4jdocs.jar' jar file">
+ <jar jarfile="${jarDocs.file}" compress="true" basedir="${doc.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Documents" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value=" ICU for Java documents" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="jarDemos" depends="demos" description="build demos to 'icu4jdemos.jar' jar file">
+ <jar jarfile="${demos-jar.file}" compress="true">
+ <fileset dir="${build.dir}" includes="com/ibm/icu/dev/demo/**" />
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <attribute name="Main-Class" value="com.ibm.icu.dev.demo.Launcher" />
+ <attribute name="Class-Path" value="icu4j.jar" />
+ <section name="common">
+ <attribute name="Specification-Title" value="${manifest.specification.title}" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value=" ICU for Java Demos" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="translitIMEJar" depends="collator, transliterator" description="build transliterator IME 'icutransime.jar' jar file">
+ <javac includes="com/ibm/icu/dev/tool/ime/translit/*.java" excludes="**/CVS/**/*" srcdir="${src.dir}" destdir="${build.dir}" classpathref="build.classpath" source="${icu4j.javac.source}" target="${icu4j.javac.target}" debug="on" deprecation="off" />
+ <copy file="${src.dir}/com/ibm/icu/dev/tool/ime/translit/Transliterator.properties" todir="${build.dir}/com/ibm/icu/dev/tool/ime/translit" />
+ <jar jarfile="${ime.translit.jar.file}" compress="true" basedir="${build.dir}" includes="com/ibm/icu/dev/tool/ime/translit/**/*" manifest="${ime.translit.manifest}">
+ <metainf dir="${src.dir}/com/ibm/icu/dev/tool/ime/translit" includes="services/*" />
+ </jar>
+ </target>
+
+ <target name="indicIMEJar" depends="init" description="build indic IME 'icuindicime.jar' jar file">
+ <javac includes="com/ibm/icu/dev/tool/ime/indic/*.java" excludes="**/CVS/**/*" srcdir="${src.dir}" destdir="${build.dir}" classpathref="build.classpath" source="${icu4j.javac.source}" target="${icu4j.javac.target}" debug="on" deprecation="off" />
+ <copy file="${src.dir}/com/ibm/icu/dev/tool/ime/indic/DisplayNames.properties" todir="${build.dir}/com/ibm/icu/dev/tool/ime/indic" />
+ <jar jarfile="${ime.indic.jar.file}" compress="true" basedir="${build.dir}" includes="com/ibm/icu/dev/tool/ime/indic/**/*" manifest="${ime.indic.manifest}">
+ <metainf dir="${src.dir}/com/ibm/icu/dev/tool/ime/indic" includes="services/*" />
+ </jar>
+ </target>
+
+ <!--
+ <target name="translitimeStandaloneJar" depends="collator, transliterator">
+ <javac includes="com/ibm/icu/dev/tool/ime/translit/*.java"
+ excludes="**/CVS/**/*"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off"/>
+ <copy toDir="${build.dir}/com/ibm/icu/dev/tool/ime/translit">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/tool/ime/translit" includes="*.properties"/>
+ </copy>
+ <jar jarfile="${ime.jar.file}"
+ compress="true"
+ basedir="${build.dir}"
+ includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*,com/ibm/icu/dev/tool/ime/translit/**/*"
+ manifest="${ime.translit.manifest}">
+ <metainf dir="${src.dir}/com/ibm/icu/dev/tool/ime/translit" includes="services/*"/>
+ </jar>
+ </target>
+-->
+
+ <!-- jars up richedit but without tests -->
+ <target name="richeditJar" depends="richedit" description="build richedit runtime 'richedit.jar' jar file">
+ <mkdir dir="${richedit.dir}" />
+ <jar jarfile="${richedit.jar.file}" compress="true" includes="com/ibm/richtext/**/*" excludes="com/ibm/richtext/test/**/*" basedir="${build.dir}" manifest="${richedit.manifest}" />
+ </target>
+
+ <!--
+ Note: I used to use -linkoffline http://java.sun.com/products/jdk/1.2/docs/api/ C:\jdk1.2.2\docs\api
+ This links to core docs on Sun's site - a very nice feature. But it requires the 1.2 docs to be
+ on the build machine at a known location.
+ -->
+
+ <!-- the 'public' docs for richedit -->
+ <target name="richeditDocs" depends="init" description="build richedit javadoc">
+ <mkdir dir="${richedit.doc.dir}" />
+ <javadoc packagenames="com.ibm.richtext.demo,com.ibm.richtext.awtui,com.ibm.richtext.swingui,com.ibm.richtext.textpanel,com.ibm.richtext.styledtext,com.ibm.richtext.textlayout.attributes,com.ibm.richtext.print"
+ sourcepath="${src.dir}"
+ destdir="${richedit.doc.dir}"
+ nodeprecatedlist="true"
+ windowtitle="RichEdit Control"
+ doctitle="RichEdit Control"
+ encoding="iso-8859-1"
+ docencoding="iso-8859-1"
+ bottom="<font size=-1>Copyright (c) 1998-2004 IBM Corporation and others.</font>" />
+ </target>
+
+ <!-- richedit alphaworks distribution - jar and docs, but no source -->
+ <target name="richeditZip" depends="richeditJar,richeditDocs" description="build richedit zip file">
+ <!--Create a zip archive of the richedit jar and readme -->
+ <copy file="license.html" todir="${richedit.dir}" />
+ <zip zipfile="${richedit.zip.file}" basedir="${richedit.dir}" includes="doc/**/*,richedit.jar,license.html" />
+ </target>
+ <target name="deleteCore" depends="initBase">
+ <delete failonerror="no">
+ <fileset dir="${build.dir}/com/ibm/icu/text" />
+ <fileset dir="${build.dir}/com/ibm/icu/impl" />
+ <fileset dir="${build.dir}/com/ibm/icu/math" />
+ <fileset dir="${build.dir}/com/ibm/icu/lang" />
+ <fileset dir="${build.dir}/com/ibm/icu/util" />
+ <fileset dir="${build.dir}/com/ibm/icu/charset" />
+ </delete>
+ </target>
+
+ <target name="check" depends="tests, jar, deleteCore" description="run standard icu4j test suite">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <jvmarg line="${test.jvm.args}"/>
+ <arg value="-n" />
+ <classpath>
+ <pathelement path="${java.class.path}/" />
+ <pathelement location="${jar.file}" />
+ <pathelement location="${charsets.jar.file}" />
+ <pathelement location="clover.jar" />
+ <pathelement path="${build.dir}" />
+ </classpath>
+ </java>
+ </target>
+
+ <target name="jdktzCheck" depends="tests, jar, deleteCore" description="run icu4j test suite with JDK timezone">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <jvmarg line="${test.jvm.args} -Dcom.ibm.icu.util.TimeZone.DefaultTimeZoneType=JDK"/>
+ <arg value="-n" />
+ <classpath>
+ <pathelement path="${java.class.path}/" />
+ <pathelement location="${jar.file}" />
+ <pathelement location="${charsets.jar.file}" />
+ <pathelement location="clover.jar" />
+ <pathelement path="${build.dir}" />
+ </classpath>
+ </java>
+ </target>
+
+ <target name="exhaustiveCheck" depends="tests, jar, deleteCore" description="run standard icu4j test suite in exhaustive mode">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <jvmarg line="${test.jvm.args}"/>
+ <arg value="-n" />
+ <arg value="-e10" />
+ <classpath>
+ <pathelement path="${java.class.path}/" />
+ <pathelement location="${jar.file}" />
+ <pathelement location="${charsets.jar.file}" />
+ <pathelement location="clover.jar" />
+ <pathelement path="${build.dir}" />
+ </classpath>
+ </java>
+ </target>
+
+ <target name="secure" depends="jarTests" description="builds icu4j.jar and icu4jtests.jar" />
+
+ <target name="secureCheck" depends="secure" description="run secure (applet-like) icu4j test suite">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <jvmarg line="${test.jvm.args}"/>
+ <!-- jvmarg value="-verbose"/ -->
+ <!-- (use for debugging, LOTS of output) <jvmarg value="-Djava.security.debug=access:trace"/ -->
+ <!-- <jvmarg value="-Djava.security.debug=access:failure,domain"/> -->
+ <jvmarg value="-Djava.security.manager" />
+ <jvmarg value="-Djava.security.policy=${src.dir}/com/ibm/icu/dev/test/security.policy" />
+ <!--<jvmarg value="-Djava.security.debug=access:failure"/>-->
+ <arg value="-w" />
+ <arg value="-nothrow" />
+ <classpath>
+ <pathelement location="clover.jar" />
+ <pathelement location="${jar.file}" />
+ <pathelement location="${charsets.jar.file}" />
+ <pathelement location="${testjar.file}" />
+ </classpath>
+ </java>
+ </target>
+
+ <target name="noData" depends="core, tests" description="builds ICU4J without any data">
+ <delete failonerror="no">
+ <fileset dir="${build.dir}/${icu4j.data.path}/../" includes="Holiday*.class" />
+ <fileset dir="${build.dir}/${icu4j.data.path}/../" includes="BreakIterator*.class" />
+ <fileset dir="${build.dir}/${icu4j.data.path}" />
+ <fileset dir="${build.dir}/${icu4j.testdata.path}" />
+ <fileset dir="${build.dir}/${icu4j.testdata.path}/../" />
+ </delete>
+ </target>
+ <target name="noDataCheck" depends="noData" description="runs the tests when no data is present">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <arg value="-nothrow" />
+ <arg value="-nodata" />
+ <classpath>
+ <pathelement location="clover.jar" />
+ <pathelement location="${build.dir}" />
+ </classpath>
+ </java>
+ </target>
+
+ <target name="_requires14" if="before.java14">
+ <echo message="One of your targets requires JAVA_HOME to be set to JDK version 1.4 or newer" />
+ </target>
+
+ <target name="_checktags" depends="tools" unless="before.java14">
+ <echo message="doc params: ${doc.params}" />
+ <javadoc sourcepath="${src.dir}" packagenames="com.ibm.icu.*" excludepackagenames="com.ibm.icu.dev.*,com.ibm.icu.impl.*" classpath="${build.dir}" source="1.4">
+ <doclet name="com.ibm.icu.dev.tool.docs.CheckTags" path="${build.dir}">
+ <!-- <param name="-short"/> -->
+ </doclet>
+ </javadoc>
+ </target>
+
+ <target name="checktags" depends="_requires14,_checktags" description="check API tags before release" />
+
+ <target name="gatherapi" depends="tools" unless="before.java14" description="run API database generator tool">
+ <javadoc classpath="${build.dir}" sourcepath="${src.dir}" packagenames="com.ibm.icu.lang,com.ibm.icu.math,com.ibm.icu.text,com.ibm.icu.util" source="1.4">
+ <doclet name="com.ibm.icu.dev.tool.docs.GatherAPIData" path="${build.dir}">
+ <param name="-name" value="ICU4J ${icu4j.impl.version.string}" />
+ <param name="-output" value="${api.dir}/icu4j${icu4j.version.number}.api" />
+ <param name="-internal" />
+ <param name="-gzip" />
+ </doclet>
+ </javadoc>
+ </target>
+
+ <target name="gatherapi.with.version" depends="tools" unless="before.java14" description="run API database generator tool">
+ <javadoc classpath="${build.dir}" sourcepath="${src.dir}" packagenames="com.ibm.icu.lang,com.ibm.icu.math,com.ibm.icu.text,com.ibm.icu.util" source="1.4">
+ <doclet name="com.ibm.icu.dev.tool.docs.GatherAPIData" path="${build.dir}">
+ <param name="-name" value="ICU4J ${icu4j.impl.version.string}" />
+ <param name="-output" value="${api.dir}/icu4j${icu4j.version.number}.stver.api" />
+ <param name="-internal" />
+ <param name="-version" />
+ </doclet>
+ </javadoc>
+ </target>
+
+ <target name="apireport" depends="tools, gatherapi" unless="beofre.java14" description="run API report generator tool">
+ <java classname="com.ibm.icu.dev.tool.docs.ReportAPI" classpath="${build.dir}" failonerror="true">
+ <arg value="-old:" />
+ <arg value="${api.dir}/icu4j${icu4j.previous.version.number}.api.gz" />
+ <arg value="-new:" />
+ <arg value="${api.dir}/icu4j${icu4j.version.number}.api.gz" />
+ <arg value="-html" />
+ <arg value="-internal" />
+ <arg value="-out:" />
+ <arg value="${api.report.out}" />
+ </java>
+ </target>
+
+ <target name="clean" depends="initBase" description="remove all build targets">
+ <delete dir="${build.dir}" />
+ <delete dir="${doc.dir}" />
+ <delete file="${jar.file}" />
+ <delete file="${charsets.jar.file}" />
+ <delete dir="${richedit.dir}" />
+ <delete file="${testjar.file}" />
+ <delete file="${jarSrc.file}" />
+
+ <delete dir="${eclipse.projects.dir}" />
+ <delete dir="${wrapper.build.dir}" />
+ <delete file="${zipTestSrc.file}" />
+ <delete file="${jdk.wrapper.jar.file}" />
+ <delete file="${fragment.jar.file}"/>
+
+ <delete dir="${tzu.bin.dir}" />
+ <delete file="${tzu.bin.zip.file}" />
+ <delete file="${tzu.src.zip.file}" />
+ </target>
+
+ <target name="moduleJar" depends="initBase" description="modular build of 'icu4j.jar' jar file">
+ <jar jarfile="${jar.file}" compress="true" includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*" basedir="${build.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Modularized Build" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value=" ICU for Java Module" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <patternset id="common.test.sources">
+ <include name="com/ibm/icu/dev/test/TestFmwk.java" />
+ <include name="com/ibm/icu/dev/test/TestLog.java" />
+ <include name="com/ibm/icu/dev/test/TestUtil.java" />
+ <include name="com/ibm/icu/dev/test/UTF16Util.java" />
+ <include name="com/ibm/icu/dev/test/TestAll.java" />
+ </patternset>
+
+ <!-- Module: BreakIterator -->
+ <target name="breakIterator" depends="init,copyFullPropsData" description="modular build of break iterator services">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/impl/UCharacterName.java"/>
+ <include name="com/ibm/icu/impl/data/*Break*.java"/>
+ <include name="com/ibm/icu/lang/**/*.java"/>
+ <include name="com/ibm/icu/text/*BreakIterator*.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ </javac>
+ <copy file="${src.dir}/com/ibm/icu/impl/data/th.brk"
+ todir="${build.dir}/com/ibm/icu/impl/data/"/>
+ </target>
+
+ <target name="breakIteratorTests" depends="breakIterator" description="test breakIterator modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+ <patternset refid="common.test.sources" />
+ <include name="com/ibm/icu/dev/test/rbbi/**/*.java"/>
+ </javac>
+ <copy file="${src.dir}/com/ibm/icu/dev/test/rbbi/rbbitst.txt"
+ todir="${build.dir}/com/ibm/icu/dev/test/rbbi"/>
+ </target>
+
+ <!-- Module: Calendar -->
+ <target name="copyCalendarData" depends="initBase">
+ <!-- Calendar does not require Collation data, BreakIterator data -->
+ <unjar src="${icudatajar.file}" dest="${build.dir}">
+ <patternset>
+ <include name="**/pnames.icu" />
+ <include name="**/ucase.icu" />
+ <include name="**/unorm.icu" />
+ <include name="**/uprops.icu" />
+ <include name="**/unames.icu" />
+ <include name="**/*.res" />
+ <exclude name="**/coll/*.res" />
+ <exclude name="**/translit/*.res" />
+ <exclude name="**/rbnf/*.res" />
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu" />
+ </target>
+
+ <target name="calendar" depends="init,copyCalendarData" description="modular build of calendar services">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/impl/data/*Holiday*.java"/>
+ <include name="com/ibm/icu/lang/UCharacter.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/text/NumberFormatServiceShim.java"/>
+ <include name="com/ibm/icu/util/*Calendar*.java"/>
+ </javac>
+ </target>
+
+ <target name="calendarTests" depends="calendar,testdata" description="test calendar modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <patternset refid="common.test.sources" />
+ <include name="com/ibm/icu/dev/test/calendar/**/*.java" />
+ </javac>
+ </target>
+
+ <!-- Module: Collator -->
+ <target name="copyCollatorData" depends="initBase">
+ <unjar src="${icudatajar.file}" dest="${build.dir}">
+ <patternset>
+ <include name="**/*.icu" />
+ <include name="**/coll/*.res" />
+ <include name="**/*.brk" />
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu" />
+ </target>
+
+ <target name="collator" depends="init,copyCollatorData" description="modular build of collator services">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/impl/data/*Break*.java"/>
+ <include name="com/ibm/icu/text/*BreakIterator*.java"/>
+ <include name="com/ibm/icu/text/*Collation*.java"/>
+ <include name="com/ibm/icu/text/*Collator*.java"/>
+ <include name="com/ibm/icu/text/Normalizer.java"/>
+ <include name="com/ibm/icu/text/Replaceable.java"/>
+ <include name="com/ibm/icu/text/ReplaceableString.java"/>
+ <include name="com/ibm/icu/text/UnicodeFilter.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/text/UTF16.java"/>
+ </javac>
+ </target>
+
+ <target name="collatorTests" depends="collator,testdata" description="test collator modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <patternset refid="common.test.sources" />
+ <include name="com/ibm/icu/dev/test/collator/**/*.java"/>
+ <include name="java,com/ibm/icu/dev/test/search/**/*.java"/>
+ <exclude name="com/ibm/icu/dev/test/collator/RandomCollator.java"/>
+ </javac>
+ </target>
+
+ <!-- Module: Compression -->
+ <target name="compression" depends="init" description="modular build of compression services">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/text/SCSU.java"/>
+ <include name="com/ibm/icu/text/UnicodeCompressor.java"/>
+ <include name="com/ibm/icu/text/UnicodeDecompressor.java"/>
+ </javac>
+ </target>
+
+ <target name="compressionTests" depends="compression" description="test compression modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <patternset refid="common.test.sources" />
+ <include name="com/ibm/icu/dev/test/compression/**/*.java"/>
+ </javac>
+ </target>
+
+ <!-- Module: Format -->
+ <target name="copyFormatData" depends="initBase" if="icu4j.module.resources">
+ <!-- Format does not require BreakIterator data -->
+ <unjar src="${icudatajar.file}" dest="${build.dir}">
+ <patternset>
+ <include name="**/*.icu" />
+ <include name="**/*.res" />
+ <exclude name="**/translit/*.res" />
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu" />
+ </target>
+
+ <target name="format" depends="init,copyFormatData" description="modular build of formatting services">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/lang/UCharacter.java"/>
+ <include name="com/ibm/icu/text/*Collator*.java"/>
+ <include name="com/ibm/icu/text/*Format*.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/util/CalendarServiceShim.java"/> </javac>
+ </target>
+
+ <target name="formatTests" depends="format,testdata" description="test format modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <patternset refid="common.test.sources" />
+ <include name="com/ibm/icu/dev/test/format/**/*.java"/>
+ <exclude name="com/ibm/icu/dev/test/format/GlobalizationPreferencesTest.java"/>
+ </javac>
+ <copy file="${src.dir}/com/ibm/icu/dev/test/format/NumberFormatTestCases.txt"
+ todir="${build.dir}/com/ibm/icu/dev/test/format"/>
+ </target>
+
+ <!-- Module: Normalizer -->
+ <target name="normalizerData" depends="initBase">
+ <unjar src="${icudatajar.file}" dest="${build.dir}">
+ <patternset>
+ <include name="**/ucase.icu" />
+ <include name="**/unorm.icu" />
+ <include name="**/uprops.icu" />
+ <include name="**/pnames.icu" />
+ <include name="**/unames.icu" />
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu" />
+ </target>
+
+ <target name="normalizer" depends="init,normalizerData" description="modular build of normalizer services">
+ <javac includes="com/ibm/icu/text/Normalizer.java,com/ibm/icu/text/CanonicalIterator.java,com/ibm/icu/text/UTF16.java,com/ibm/icu/text/UCharacterIterator.java,com/ibm/icu/text/UForwardCharacterIterator.java,com/ibm/icu/text/Replaceable.java,com/ibm/icu/text/ReplaceableString.java,com/ibm/icu/text/UnicodeFilter.java,com/ibm/icu/text/UnicodeSetIterator.java,com/ibm/icu/lang/**/*"
+ excludes="**/CVS/**/*,com/ibm/icu/lang/UScriptRun.java, com/ibm/icu/impl/ICUListResourceBundle.java, com/ibm/icu/impl/UtilityExtensions.java,com/ibm/icu/impl/TrieBuilder.java,com/ibm/icu/impl/IntTrieBuilder.java,com/ibm/icu/impl/BOCU.java,com/ibm/icu/impl/UnicodeCharacterIterator.java"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/lang/**/*"/>
+ <include name="com/ibm/icu/text/CanonicalIterator.java"/>
+ <include name="com/ibm/icu/text/Normalizer.java"/>
+ <include name="com/ibm/icu/text/Replaceable.java"/>
+ <include name="com/ibm/icu/text/ReplaceableString.java"/>
+ <include name="com/ibm/icu/text/UCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UForwardCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UnicodeFilter.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/text/UTF16.java"/>
+ </javac>
+ </target>
+
+ <target name="normalizerTests" depends="normalizer">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <patternset refid="common.test.sources" />
+ <include name="com/ibm/icu/dev/test/normalizer/**/*.java"/>
+ <exclude name="com/ibm/icu/dev/test/normalizer/TestDeprecatedNormalizerAPI.java"/>
+ </javac>
+ </target>
+
+ <!-- Module: Basic Properties / Full Properties -->
+ <target name="copyBasicPropsData" depends="initBase">
+ <unjar src="${icudatajar.file}" dest="${build.dir}">
+ <patternset>
+ <include name="**/unorm.icu" />
+ <include name="**/uprops.icu" />
+ <include name="**/ubidi.icu" />
+ <include name="**/ucase.icu" />
+ <include name="**/pnames.icu" />
+ <include name="**/unames.icu" />
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu" />
+ </target>
+
+ <target name="copyFullPropsData" depends="initBase">
+ <unjar src="${icudatajar.file}" dest="${build.dir}">
+ <patternset>
+ <include name="**/unorm.icu" />
+ <include name="**/uprops.icu" />
+ <include name="**/ubidi.icu" />
+ <include name="**/ucase.icu" />
+ <include name="**/unames.icu" />
+ <include name="**/pnames.icu" />
+ <include name="**/*.res" />
+ <include name="**/*.brk" />
+ <exclude name="**/coll/*.res" />
+ <exclude name="**/translit/*.res" />
+ <exclude name="**/rbnf/*.res" />
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu" />
+ </target>
+
+ <target name="propertiesBasic" depends="init,propertiesClasses,copyBasicPropsData" description="modular build of basic character properties">
+ </target>
+
+ <target name="propertiesFull" depends="init,propertiesClasses,copyFullPropsData" description="modular build of full character properties">
+ </target>
+
+ <target name="propertiesBasicTests" depends="propertiesBasic,propertiesTests" description="modular build of basic character properties">
+ </target>
+
+ <target name="propertiesFullTests" depends="propertiesFull,propertiesTests" description="modular build of full character properties">
+ </target>
+
+ <target name="propertiesClasses">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/impl/data/*Break*.java"/>
+ <include name="com/ibm/icu/lang/**/*"/>
+ <include name="com/ibm/icu/text/*BreakDictionary*.java"/>
+ <include name="com/ibm/icu/text/*BreakIterator*.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ </javac>
+ </target>
+
+ <target name="propertiesTests">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <patternset refid="common.test.sources" />
+ <include name="com/ibm/icu/dev/test/lang/**/*.java"/>
+ <exclude name="com/ibm/icu/dev/test/lang/TestUScriptRun.java"/>
+ </javac>
+ <copy file="${src.dir}/com/ibm/icu/dev/data/unicode/SpecialCasing.txt"
+ todir="${build.dir}/com/ibm/icu/dev/data/unicode/" />
+ </target>
+
+ <!-- Module: Transliterator -->
+ <target name="copyTranslitData" depends="initBase">
+ <!-- Calendar does not require Collation data, BreakIterator data -->
+ <unjar src="${icudatajar.file}" dest="${build.dir}">
+ <patternset>
+ <include name="**/unorm.icu" />
+ <include name="**/uprops.icu" />
+ <include name="**/ubidi.icu" />
+ <include name="**/ucase.icu" />
+ <include name="**/unames.icu" />
+ <include name="**/pnames.icu" />
+ <include name="**/*.brk" />
+ <include name="**/translit/*.res" />
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu" />
+ </target>
+
+ <target name="transliterator" depends="init,propertiesClasses,copyTranslitData" description="modular build of unicode transform services">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/impl/*Iterator*.java"/>
+ <include name="com/ibm/icu/impl/*Property*.java"/>
+ <include name="com/ibm/icu/impl/data/ResourceReader.java"/>
+ <include name="com/ibm/icu/impl/UtilityExtensions.java"/>
+ <include name="com/ibm/icu/text/*Transliterator*.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ </javac>
+ </target>
+
+ <target name="transliteratorTests" depends="transliterator" description = "test transliterator modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <patternset refid="common.test.sources" />
+ <include name="com/ibm/icu/dev/test/translit/**/*.java"/>
+ <exclude name="com/ibm/icu/dev/test/translit/UnicodeFilterLogic*.java"/>
+ </javac>
+ </target>
+
+ <!-- Module: StringPrep, IDNA -->
+ <target name="copyStringPrepData" depends="initBase">
+ <unjar src="${icudatajar.file}" dest="${build.dir}">
+ <patternset>
+ <include name="**/*.spp" />
+ <include name="**/ubidi.icu" />
+ <include name="**/ucase.icu" />
+ <include name="**/unorm.icu" />
+ <include name="**/uprops.icu" />
+ <include name="**/pnames.icu" />
+ <include name="**/unames.icu" />
+ </patternset>
+ </unjar>
+ </target>
+
+ <target name="stringPrep" depends="init,normalizer,copyStringPrepData" description="modular build of stringprep services">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off">
+ <include name="com/ibm/icu/impl/*StringPrep*.java"/>
+ <include name="com/ibm/icu/text/*IDNA*.java"/>
+ <include name="com/ibm/icu/text/*StringPrep*.java"/>
+ </javac>
+ </target>
+
+ <target name="stringPrepTests" depends="stringPrep" description="test stringPrep modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on"
+ deprecation="off">
+ <patternset refid="common.test.sources" />
+ <include name="com/ibm/icu/dev/test/stringprep/**/*.java"/>
+ </javac>
+ <unjar src="${src.dir}/com/ibm/icu/dev/data/testdata.jar" dest="${build.dir}">
+ <patternset>
+ <include name="**/*.spp"/>
+ <include name="**/idna_rules.res"/>
+ </patternset>
+ </unjar>
+ </target>
+
+
+
+ <target name="excludeCharset" depends="core" description="excludes charset module from ICU4J">
+ <delete failonerror="yes">
+ <fileset dir="${build.dir}/com/ibm/icu/impl/data/icudt${icu4j.data.version.number}b/">
+ <include name="*.cnv" />
+ <include name="cnvalias.icu" />
+ </fileset>
+ <fileset dir="${build.dir}/com/ibm/icu/charset" />
+ <fileset dir="${build.dir}/META-INF/" />
+ </delete>
+ </target>
+ <target name="excludeCharsetTests" depends="tests" description="excludes charset test from ICU4J">
+ <delete failonerror="yes">
+ <fileset dir="${build.dir}/com/ibm/icu/dev/test/charset" />
+ </delete>
+ </target>
+
+ <target name="moduleCheck" depends="initBase" description="run tests for module jar">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <jvmarg line="${test.jvm.args}"/>
+ <arg value="-nothrow" />
+ <arg value="-nodata" />
+ <classpath>
+ <pathelement location="${build.dir}" />
+ </classpath>
+ </java>
+ </target>
+
+ <!-- build the extra Transliterator data pertaining to Han-Latin transliteration -->
+ <target name="extraTransliteratorData" depends="initBase" description="build han-to-latin transliterator data (normally omitted)">
+ <copy todir="${build.dir}/com/ibm/icu/impl/data">
+ <fileset dir="${src.dir}/com/ibm/icu/impl/data" includes="Transliterator_Han_Latin_*.txt" />
+ </copy>
+ </target>
+
+ <!-- Target for builing XLIFF2ICUConverter Jar -->
+
+ <target name="xliff" depends="init" description="build xliff converter tool">
+ <javac sourcepath="" srcdir="${src.dir}" destdir="${build.dir}" classpathref="build.classpath" source="${icu4j.javac.source}" target="${icu4j.javac.target}" debug="on" deprecation="off">
+
+ <include name="com/ibm/icu/dev/tool/localeconverter/CalculateCRC32.java" />
+ <include name="com/ibm/icu/dev/tool/localeconverter/XLIFF2ICUConverter.java" />
+ <include name="com/ibm/icu/dev/tool/UOption.java" />
+ <include name="com/ibm/icu/dev/tool/xmlcomparator/XMLValidator.java" />
+ </javac>
+ <jar jarfile="xliff.jar" compress="true" includes="com/ibm/icu/dev/tool/localeconverter/XLIFF2ICUConverter*.class,com/ibm/icu/dev/tool/localeconverter/CalculateCRC32.class,com/ibm/icu/dev/tool/UOption.class" basedir="${build.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="XLIFF To ICU Converter" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value="XLIFF2ICUConverter" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+
+ <jar jarfile="xliff-src.jar" compress="true" includes="com/ibm/icu/dev/tool/localeconverter/XLIFF2ICUConverter.java,com/ibm/icu/dev/tool/localeconverter/CalculateCRC32.java,com/ibm/icu/dev/tool/UOption.java" basedir="${src.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="XLIFF To ICU Converter Sources" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value="XLIFF2ICUConverter Sources" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <!-- Target for builing Utilities Jar for CLDR -->
+
+ <target name="cldrUtil" depends="init" description="build Utilities for CLDR">
+ <javac srcdir="${src.dir}" destdir="${build.dir}" source="${icu4j.javac.source}" target="${icu4j.javac.target}" classpathref="build.classpath" debug="on" deprecation="off">
+
+ <include name="com/ibm/icu/dev/test/TestFmwk.java" />
+ <include name="com/ibm/icu/dev/test/util/*.java" />
+ <include name="com/ibm/icu/dev/tool/UOption.java" />
+ </javac>
+ <jar jarfile="utilities.jar" compress="true" includes="com/ibm/icu/dev/test/util/*.class,com/ibm/icu/dev/test/TestFmwk*.class,com/ibm/icu/dev/test/AbstractTest*.class,com/ibm/icu/dev/test/TestLog*.class,com/ibm/icu/dev/tool/UOption*.class" basedir="${build.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="Utilities for CLDR Tools" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value="CLDR Utilities" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <!--Target for building jars -->
+ <target name="distJars" depends="jarSrc, jar, jarDocs" />
+
+ <!-- Target for checking the loading of bundles from the default package.
+ This is hack to get around Eclipse's build problems.
+ -->
+ <target name="defaultPackage" depends="tests" description="Tests for loading resources in the default package">
+ <copy file="${src.dir}/com/ibm/icu/dev/test/util/TestDefaultPackageLoading.jpp" tofile="${src.dir}/TestDefaultPackageLoading.java" />
+ <copy file="${src.dir}/com/ibm/icu/dev/test/util/TestData_en.jpp" tofile="${src.dir}/TestData_en.java" />
+ <javac srcdir="${src.dir}" destdir="${build.dir}" classpathref="build.classpath" source="${icu4j.javac.source}" target="${icu4j.javac.target}" debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*" />
+ <include name="TestData_en.java" />
+ <include name="TestDefaultPackageLoading.java" />
+ </javac>
+ <!-- copy the file to default package and test if we can load it -->
+ <copy file="${build.dir}/com/ibm/icu/dev/data/testdata/te.res" todir="${build.dir}/" />
+ <!-- copy the file up one level and test if java style base name loading works -->
+ <copy file="${build.dir}/com/ibm/icu/dev/data/testdata/te.res" tofile="${build.dir}/com/ibm/icu/dev/data/TestData_bge.res" />
+ </target>
+
+ <target name="defaultPackageCheck" depends="tests, defaultPackage" description="Run Tests for loading resources in the default package">
+ <!--run the tests -->
+ <java classname="TestDefaultPackageLoading" fork="yes" failonerror="true">
+ <jvmarg line="${test.jvm.args}"/>
+ <arg value="-n" />
+ <classpath>
+ <pathelement path="${java.class.path}/" />
+ <pathelement location="clover.jar" />
+ <pathelement path="${build.dir}" />
+ </classpath>
+ </java>
+ </target>
+
+ <!-- convert @deprecated @draft tags to @provisional -->
+ <target name="swatDeprecated" depends="initBase, tools" unless="before.java14">
+ <java classname="com.ibm.icu.dev.tool.docs.SwatDeprecated" classpath="${build.dir}" failonerror="true">
+ <arg value="-src" />
+ <arg value="${src.dir}" />
+ <arg value="-dst" />
+ <arg value="${src.dir}" />
+ <arg value="-overwrite" />
+ <arg value="-verbose" />
+ </java>
+ </target>
+
+ <!-- convert @provisional tags to @deprecated -->
+ <target name="restoreDeprecated" depends="initBase, tools" unless="before.java14">
+ <java classname="com.ibm.icu.dev.tool.docs.SwatDeprecated" classpath="${build.dir}" failonerror="true">
+ <arg value="-prov" />
+ <arg value="-src" />
+ <arg value="${src.dir}" />
+ <arg value="-dst" />
+ <arg value="${src.dir}" />
+ <arg value="-overwrite" />
+ <arg value="-verbose" />
+ </java>
+ </target>
+
+<!-- Eclipse plug-in build targets -->
+ <!-- for building eclipse distribution -->
+ <target name="initIcuEclipseVersion" depends="initBase">
+ <tstamp>
+ <format property="build.date" pattern="yyyyMMdd"/>
+ </tstamp>
+ <property name="icu4j.eclipse.build.version.string" value="${icu4j.plugin.impl.version.string}.v${build.date}"/>
+ </target>
+
+ <target name="eclipseProjects"
+ depends="eclipseCoreProject,eclipseTestProject,eclipseWrapperProject,eclipseFragmentProject"
+ description="create all eclipse icu projects"/>
+
+ <target name="eclipsePDEBuild"
+ depends="initEclipsePDEBuildEnv, eclipseProjects"
+ description="build icu4j plugin files">
+
+ <!-- copy OSGi jar file to baseLocation -->
+ <mkdir dir="${eclipse.projects.dir}/baseLocation/features"/>
+ <mkdir dir="${eclipse.projects.dir}/baseLocation/plugins"/>
+ <copy toDir="${eclipse.projects.dir}/baseLocation/plugins" file="${eclipse.osgi.jar}"/>
+
+ <!-- copy PDE build script files and run the build -->
+ <pathconvert property="eclipse.projects.dir.full" dirsep="/">
+ <path location="${basedir}/${eclipse.projects.dir}"/>
+ </pathconvert>
+ <antcall target="runEclipsePDEBuild">
+ <param name="icu.plugin.id" value="com.ibm.icu"/>
+ </antcall>
+ <antcall target="runEclipsePDEBuild">
+ <param name="icu.plugin.id" value="com.ibm.icu.base"/>
+ </antcall>
+ <antcall target="runEclipsePDEBuild">
+ <param name="icu.plugin.id" value="com.ibm.icu.jse4"/>
+ </antcall>
+ </target>
+
+ <target name="initEclipsePDEBuildEnv"
+ depends="initBase,initEclipseHome,initEclipseLauncher,initEclipseOSGiJar,initEclipsePDE"
+ description="Initialize eclipse PDE build environment">
+ <echo message="[PDE build configuration properties]"/>
+ <echo message=" Eclipse home: ${eclipse.home}"/>
+ <echo message=" Launcher jar: ${eclipse.launcher}"/>
+ <echo message=" OSGi bundle jar: ${eclipse.osgi.jar}"/>
+ <echo message=" Base OS: ${eclipse.baseos}"/>
+ <echo message=" Base WS: ${eclipse.basews}"/>
+ <echo message=" Base ARCH: ${eclipse.basearch}"/>
+ </target>
+
+ <target name="initEclipseHome"
+ if="env.ECLIPSE_HOME"
+ unless="eclipse.home"
+ description="Initialize the property eclipse.home from the environment variable ECLIPSE_HOME">
+ <property name="eclipse.home" value="${env.ECLIPSE_HOME}"/>
+ </target>
+
+ <target name="initEclipseLauncher"
+ if="eclipse.home"
+ description="Locate org.eclipse.equinox.launcher jar file for eclipse 3.3 and beyond">
+ <first id="equinox.launcher">
+ <fileset dir="${eclipse.home}/plugins">
+ <include name="org.eclipse.equinox.launcher_*.jar"/>
+ </fileset>
+ </first>
+ <pathconvert property="eclipse.launcher" dirsep="/" refid="equinox.launcher"/>
+ </target>
+
+ <target name="initEclipseOSGiJar"
+ if="eclipse.home"
+ description="Locate org.eclipse.osgi plugin jar file">
+ <first id="osgi.bundle">
+ <fileset dir="${eclipse.home}/plugins">
+ <include name="org.eclipse.osgi_*.jar"/>
+ </fileset>
+ </first>
+ <pathconvert property="eclipse.osgi.jar" dirsep="/" refid="osgi.bundle"/>
+ </target>
+
+ <target name="initEclipsePDE"
+ depends="locateEclipsePDE"
+ if="eclipse.pde.dir"
+ description="Set up PDE runtime arguments">
+ <property file="${eclipse.pde.dir}/templates/headless-build/build.properties" prefix="pde.template"/>
+ <property name="eclipse.baseos" value="${pde.template.baseos}"/>
+ <property name="eclipse.basews" value="${pde.template.basews}"/>
+ <property name="eclipse.basearch" value="${pde.template.basearch}"/>
+ </target>
+
+ <target name="locateEclipsePDE"
+ if="eclipse.home"
+ description="Locate org.eclipse.pde.build plug-in and set the property 'eclipse.pde.dir'">
+ <first id="eclipse.pde.plugin.dir">
+ <dirset dir="${eclipse.home}/plugins">
+ <include name="org.eclipse.pde.build_*"/>
+ </dirset>
+ </first>
+ <pathconvert property="eclipse.pde.dir" dirsep="/" refid="eclipse.pde.plugin.dir"/>
+ </target>
+
+ <target name="runEclipsePDEBuild">
+ <mkdir dir="${eclipse.projects.dir}/buildScripts/${icu.plugin.id}"/>
+ <copy toDir="${eclipse.projects.dir}/buildScripts/${icu.plugin.id}">
+ <fileset dir="${eclipse.dir}/pdebuild" includes="**/*"/>
+ <filterset>
+ <filter token="PLUGIN_ID" value="${icu.plugin.id}"/>
+ <filter token="BUILD_DIR" value="${eclipse.projects.dir.full}"/>
+ <filter token="BUILD_TYPE" value="ICU4J"/>
+ <filter token="BUILD_ID" value="${icu.plugin.id}"/>
+ <filter token="BASE_LOCATION" value="${eclipse.projects.dir.full}/baseLocation"/>
+ <filter token="BASE_OS" value="${eclipse.baseos}"/>
+ <filter token="BASE_WS" value="${eclipse.basews}"/>
+ <filter token="BASE_ARCH" value="${eclipse.basearch}"/>
+ </filterset>
+ </copy>
+
+ <java jar="${eclipse.launcher}" fork="true" failonerror="true">
+ <arg value="-application"/>
+ <arg value="org.eclipse.ant.core.antRunner"/>
+ <arg value="-buildfile"/>
+ <arg value="${eclipse.pde.dir}/scripts/build.xml"/>
+ <arg value="-Dbuilder=${eclipse.projects.dir.full}/buildScripts/${icu.plugin.id}"/>
+ </java>
+ </target>
+
+ <target name="eclipseMangle" depends="buildMangle">
+ <antcall target="doMangle">
+ <param name="target.defs" value="FOUNDATION10" />
+ <param name="input.file" value="@preprocessor.txt" />
+ </antcall>
+ </target>
+
+ <target name="eclipseCore" depends="coreData,icudata,durationdata,eclipseMangle">
+ <javac includes="com/ibm/icu/util/**/*.java,com/ibm/icu/text/**/*.java,com/ibm/icu/math/**/*.java,com/ibm/icu/impl/**/*.java,com/ibm/icu/lang/*.java"
+ compiler="javac1.3"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ bootclasspath="ee.foundation.jar"
+ source="1.3"
+ target="1.1"
+ debug="on"
+ deprecation="off"
+ encoding="ascii" />
+ <antcall target="indices" />
+ <jar jarfile="${jar.file}" compress="true">
+ <fileset dir="${basedir}" includes="license.html, unicode-license.txt" />
+ <fileset dir="${build.dir}">
+ <exclude name="META-INF/services/**/*" />
+ <exclude name="com/ibm/icu/charset/**/*" />
+ <exclude name="${icu4j.data.path}/*.cnv" />
+ <exclude name="${icu4j.data.path}/cnvalias.icu" />
+ <include name="com/ibm/icu/util/**/*" />
+ <include name="com/ibm/icu/text/**/*" />
+ <include name="com/ibm/icu/math/**/*" />
+ <include name="com/ibm/icu/impl/**/*" />
+ <include name="com/ibm/icu/lang/**/*" />
+ </fileset>
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Eclipse Build" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value="ICU for Java Eclipse plug-in" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="eclipseCoreProject"
+ depends="eclipseCore,initIcuEclipseVersion"
+ description="gather eclipse icu core project files">
+ <delete failonerror="no">
+ <fileset dir="${eclipse.projects.dir}/plugins/com.ibm.icu" />
+ <fileset dir="${eclipse.projects.dir}/features/com.ibm.icu" />
+ </delete>
+
+ <!-- PLUGIN FILES -->
+ <!-- icu source -->
+ <copy toDir="${eclipse.projects.dir}/plugins/com.ibm.icu/src/com/ibm/icu">
+ <fileset dir="src/com/ibm/icu"
+ includes="impl/**/*,lang/**/*,math/**/*,text/**/*,util/**/*"
+ excludes="**/.svn/**/*,**/*.jar,**/Transliterator_Han_Latin*.txt" />
+ </copy>
+ <!-- icu data -->
+ <copy todir="${eclipse.projects.dir}/plugins/com.ibm.icu/src/com/ibm/icu/impl/data">
+ <fileset dir="${build.dir}/com/ibm/icu/impl/data"
+ includes="**/*.brk,**/*.icu,**/*.res,**/*.spp,**/*.txt"
+ excludes="**/Transliterator_Han_Latin*.txt" />
+ </copy>
+ <!-- plugin project -->
+ <copy todir="${eclipse.projects.dir}/plugins/com.ibm.icu">
+ <fileset dir="${eclipse.dir}/plugins/com.ibm.icu" excludes="**/.svn/**/*" />
+ <filterset>
+ <filter token="BUILD_VERSION" value="${icu4j.eclipse.build.version.string}" />
+ <filter token="COPYRIGHT" value="${copyright.eclipse}" />
+ <filter token="IMPL_VERSION" value="${icu4j.impl.version.string}" />
+ <filter token="DATA_VERSION_NUMBER" value="${icu4j.data.version.number}" />
+ </filterset>
+ </copy>
+ <!-- license -->
+ <copy file="license.html"
+ todir="${eclipse.projects.dir}/plugins/com.ibm.icu/about_files" />
+ <!-- ucd terms -->
+ <copy file="src/com/ibm/icu/dev/data/unicode/ucdterms.txt"
+ todir="${eclipse.projects.dir}/plugins/com.ibm.icu/about_files" />
+ <!-- about -->
+ <copy file="${eclipse.dir}/misc/about_icu.html"
+ tofile="${eclipse.projects.dir}/plugins/com.ibm.icu/about.html" />
+
+ <!-- FEATURE FILES -->
+ <copy todir="${eclipse.projects.dir}/features/com.ibm.icu">
+ <fileset dir="${eclipse.dir}/features/com.ibm.icu" excludes="**/.svn/**/*" />
+ <filterset>
+ <filter token="BUILD_VERSION" value="${icu4j.eclipse.build.version.string}" />
+ <filter token="COPYRIGHT" value="${copyright.eclipse}" />
+ <filter token="DATA_VERSION_NUMBER" value="${icu4j.data.version.number}" />
+ </filterset>
+ </copy>
+ <!-- common eclipse about file -->
+ <copy file="${eclipse.dir}/misc/about.html"
+ todir="${eclipse.projects.dir}/features/com.ibm.icu/sourceTemplatePlugin" />
+ </target>
+
+ <!-- ensure eclipse wrapper code builds with foundation1.0/java1.3 -->
+ <target name="eclipseWrapper" depends="initBase">
+ <mkdir dir="${wrapper.build.dir}" />
+ <javac includes="com/ibm/icu/util/**/*.java,com/ibm/icu/text/**/*.java"
+ compiler="javac1.3"
+ srcdir="${eclipse.dir}/plugins/com.ibm.icu.base/src"
+ destdir="${wrapper.build.dir}"
+ classpathref="build.classpath"
+ bootclasspath="ee.foundation.jar"
+ source="1.3"
+ target="1.1"
+ debug="on"
+ deprecation="off"
+ encoding="ascii" />
+ <jar jarfile="${jdk.wrapper.jar.file}"
+ compress="true"
+ basedir="${wrapper.build.dir}" />
+ </target>
+
+ <target name="eclipseWrapperTest" depends="eclipseWrapper">
+ <!-- TODO: build the wrapper test code this ant target -->
+ </target>
+
+ <target name="eclipseWrapperProject"
+ depends="eclipseWrapper,eclipseWrapperTest,initIcuEclipseVersion"
+ description="gather eclipse icu core project files">
+ <delete failonerror="no">
+ <fileset dir="${eclipse.projects.dir}/plugins/com.ibm.icu.base" />
+ <fileset dir="${eclipse.projects.dir}/plugins/com.ibm.icu.base.tests" />
+ <fileset dir="${eclipse.projects.dir}/features/com.ibm.icu.base" />
+ </delete>
+ <!-- PLUGIN FILES -->
+ <!-- source and project files -->
+ <copy todir="${eclipse.projects.dir}/plugins">
+ <fileset dir="${eclipse.dir}/plugins"
+ includes="com.ibm.icu.base/**/*,com.ibm.icu.base.tests/**/*"
+ excludes="**/.svn/**/*" />
+ <filterset>
+ <filter token="BUILD_VERSION" value="${icu4j.eclipse.build.version.string}" />
+ <filter token="COPYRIGHT" value="${copyright.eclipse}" />
+ <filter token="IMPL_VERSION" value="${icu4j.impl.version.string}" />
+ <filter token="DATA_VERSION_NUMBER" value="${icu4j.data.version.number}" />
+ </filterset>
+ </copy>
+ <!-- license -->
+ <copy file="license.html"
+ todir="${eclipse.projects.dir}/plugins/com.ibm.icu.base/about_files" />
+ <!-- about -->
+ <copy file="${eclipse.dir}/misc/about_icu.html"
+ tofile="${eclipse.projects.dir}/plugins/com.ibm.icu.base/about.html" />
+
+ <!-- FEATURE FILES -->
+ <!-- project files -->
+ <copy todir="${eclipse.projects.dir}/features/com.ibm.icu.base">
+ <fileset dir="${eclipse.dir}/features/com.ibm.icu.base"
+ excludes="**/.svn/**/*"/>
+ <filterset>
+ <filter token="BUILD_VERSION" value="${icu4j.eclipse.build.version.string}" />
+ <filter token="COPYRIGHT" value="${copyright.eclipse}" />
+ <filter token="DATA_VERSION_NUMBER" value="${icu4j.data.version.number}" />
+ </filterset>
+ </copy>
+ <!-- common eclipse about file -->
+ <copy file="${eclipse.dir}/misc/about.html"
+ todir="${eclipse.projects.dir}/features/com.ibm.icu.base/sourceTemplatePlugin" />
+ </target>
+
+ <target name="eclipseTests"
+ depends="eclipseCore,testdata,eclipseMangle"
+ description="build tests trimmed for eclipse API">
+ <!-- TODO: we should actually use ee.foundation.jar for eclipse env -->
+ <javac includes="com/ibm/icu/dev/test/**/*.java"
+ excludes="com/ibm/icu/dev/test/charset/**/*.*,com/ibm/icu/dev/test/perf/**/*.*"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="1.3"
+ target="1.3"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off" />
+ <antcall target="indices"/>
+ <jar jarfile="${testjar.file}" compress="true">
+ <fileset dir="${build.dir}" includes="com/ibm/icu/dev/test/**/*" />
+ <fileset dir="${build.dir}" includes="com/ibm/icu/dev/data/**/*" />
+ <manifest>
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Eclipse Tests"/>
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value="ICU for Java Eclipse plug-in Tests"/>
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="eclipseTestProject" depends="eclipseTests,eclipseZipTestSrc" description="gather eclipse icu test project files">
+ <delete failonerror="no">
+ <fileset dir="${eclipse.projects.dir}/plugins/com.ibm.icu.tests"/>
+ </delete>
+ <!-- copy eclipse test source and project files -->
+ <copy toDir="${eclipse.projects.dir}/plugins/com.ibm.icu.tests">
+ <fileset dir="${eclipse.dir}/plugins/com.ibm.icu.tests"
+ excludes="**/.svn/**/"/>
+ <filterset>
+ <filter token="COPYRIGHT" value="${copyright.eclipse}"/>
+ </filterset>
+ </copy>
+ <!-- copy icu test jar -->
+ <copy toDir="${eclipse.projects.dir}/plugins/com.ibm.icu.tests" file="${testjar.file}"/>
+ <!-- copy icu test source zip -->
+ <copy toDir="${eclipse.projects.dir}/plugins/com.ibm.icu.tests" file="${zipTestSrc.file}"/>
+ </target>
+
+ <target name="eclipseFragmentMangle" depends="buildMangle">
+ <antcall target="initSrc"/>
+ <antcall target="doMangle">
+ <param name="target.defs" value="ECLIPSE_FRAGMENT" />
+ <param name="input.file" value="@eclipseFragment.txt" />
+ </antcall>
+ </target>
+
+ <target name="eclipseFragment" depends="eclipseCore,eclipseFragmentMangle">
+ <javac compiler="javac1.4"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="1.3"
+ target="1.2"
+ debug="on"
+ deprecation="off"
+ encoding="ascii"
+ listfiles="true">
+ <include name="com/ibm/icu/impl/DateNumberFormat.java" />
+ <include name="com/ibm/icu/impl/duration/BasicDurationFormat.java" />
+ <include name="com/ibm/icu/impl/ICUResourceBundleReader.java" />
+ <include name="com/ibm/icu/impl/PatternTokenizer.java" />
+ <include name="com/ibm/icu/impl/Utility.java" />
+ <include name="com/ibm/icu/lang/UCharacter.java" />
+ <include name="com/ibm/icu/math/BigDecimal.java" />
+ <include name="com/ibm/icu/text/Bidi.java" />
+ <include name="com/ibm/icu/text/ChineseDateFormat.java" />
+ <include name="com/ibm/icu/text/DateFormat.java" />
+ <include name="com/ibm/icu/text/DateTimePatternGenerator.java" />
+ <include name="com/ibm/icu/text/DecimalFormat.java" />
+ <include name="com/ibm/icu/text/DigitList.java" />
+ <include name="com/ibm/icu/text/MessageFormat.java" />
+ <include name="com/ibm/icu/text/NumberFormat.java" />
+ <include name="com/ibm/icu/text/RuleBasedBreakIterator.java" />
+ <include name="com/ibm/icu/text/RuleBasedNumberFormat.java" />
+ <include name="com/ibm/icu/text/SimpleDateFormat.java" />
+ <include name="com/ibm/icu/text/UnicodeSet.java" />
+ <include name="com/ibm/icu/text/UTF16.java" />
+ </javac>
+
+ <!-- collect fragment specific files -->
+ <jar jarfile="${fragment.jar.file}" compress="true">
+ <fileset dir="${build.dir}">
+ <include name="com/ibm/icu/impl/DateNumberFormat.class" />
+ <include name="com/ibm/icu/impl/duration/BasicDurationFormat.class" />
+ <include name="com/ibm/icu/impl/ICUResourceBundleReader.class" />
+ <include name="com/ibm/icu/impl/PatternTokenizer.class" />
+ <include name="com/ibm/icu/impl/Utility.class" />
+ <include name="com/ibm/icu/lang/UCharacter.class" />
+ <include name="com/ibm/icu/math/BigDecimal.class" />
+ <include name="com/ibm/icu/text/Bidi.class" />
+ <include name="com/ibm/icu/text/ChineseDateFormat.class" />
+ <include name="com/ibm/icu/text/DateFormat.class" />
+ <include name="com/ibm/icu/text/DateTimePatternGenerator.class" />
+ <include name="com/ibm/icu/text/DecimalFormat.class" />
+ <include name="com/ibm/icu/text/DigitList.class" />
+ <include name="com/ibm/icu/text/MessageFormat.class" />
+ <include name="com/ibm/icu/text/NumberFormat.class" />
+ <include name="com/ibm/icu/text/RuleBasedBreakIterator.class" />
+ <include name="com/ibm/icu/text/RuleBasedNumberFormat.class" />
+ <include name="com/ibm/icu/text/SimpleDateFormat.class" />
+ <include name="com/ibm/icu/text/UnicodeSet.class" />
+ <include name="com/ibm/icu/text/UTF16.class" />
+
+ <include name="com/ibm/icu/lang/UCharacter$*.class" />
+ <include name="com/ibm/icu/text/Bidi$*.class" />
+ <include name="com/ibm/icu/text/ChineseDateFormat$*.class" />
+ <include name="com/ibm/icu/text/DateFormat$*.class" />
+ <include name="com/ibm/icu/text/DateTimePatternGenerator$*.class" />
+ <include name="com/ibm/icu/text/MessageFormat$*.class" />
+ <include name="com/ibm/icu/text/NumberFormat$*.class" />
+ <include name="com/ibm/icu/text/SimpleDateFormat$*.class" />
+ <include name="com/ibm/icu/text/UnicodeSet$*.class" />
+ <include name="com/ibm/icu/text/UTF16$*.class" />
+ </fileset>
+ </jar>
+ </target>
+
+ <target name="eclipseFragmentProject"
+ depends="eclipseFragment,initIcuEclipseVersion"
+ description="gather eclipse icu fragment project files">
+ <delete failonerror="no">
+ <fileset dir="${eclipse.projects.dir}/plugins/com.ibm.icu.jse4" />
+ <fileset dir="${eclipse.projects.dir}/features/com.ibm.icu.jse4" />
+ </delete>
+
+ <!-- PLUGIN FILES -->
+ <!-- source -->
+ <copy toDir="${eclipse.projects.dir}/plugins/com.ibm.icu.jse4/src">
+ <!-- TODO: Better way to collect files? -->
+ <fileset dir="${src.dir}">
+ <include name="com/ibm/icu/impl/DateNumberFormat.java" />
+ <include name="com/ibm/icu/impl/duration/BasicDurationFormat.java" />
+ <include name="com/ibm/icu/impl/ICUResourceBundleReader.java" />
+ <include name="com/ibm/icu/impl/PatternTokenizer.java" />
+ <include name="com/ibm/icu/impl/Utility.java" />
+ <include name="com/ibm/icu/lang/UCharacter.java" />
+ <include name="com/ibm/icu/math/BigDecimal.java" />
+ <include name="com/ibm/icu/text/Bidi.java" />
+ <include name="com/ibm/icu/text/ChineseDateFormat.java" />
+ <include name="com/ibm/icu/text/DateFormat.java" />
+ <include name="com/ibm/icu/text/DateTimePatternGenerator.java" />
+ <include name="com/ibm/icu/text/DecimalFormat.java" />
+ <include name="com/ibm/icu/text/DigitList.java" />
+ <include name="com/ibm/icu/text/MessageFormat.java" />
+ <include name="com/ibm/icu/text/NumberFormat.java" />
+ <include name="com/ibm/icu/text/RuleBasedBreakIterator.java" />
+ <include name="com/ibm/icu/text/RuleBasedNumberFormat.java" />
+ <include name="com/ibm/icu/text/SimpleDateFormat.java" />
+ <include name="com/ibm/icu/text/UnicodeSet.java" />
+ <include name="com/ibm/icu/text/UTF16.java" />
+ </fileset>
+ </copy>
+ <!-- project files -->
+ <copy todir="${eclipse.projects.dir}/plugins/com.ibm.icu.jse4">
+ <fileset dir="${eclipse.dir}/plugins/com.ibm.icu.jse4"
+ excludes="**/.svn/**/*" />
+ <filterset>
+ <filter token="BUILD_VERSION" value="${icu4j.eclipse.build.version.string}" />
+ <filter token="COPYRIGHT" value="${copyright.eclipse}" />
+ <filter token="IMPL_VERSION" value="${icu4j.impl.version.string}" />
+ <filter token="DATA_VERSION_NUMBER" value="${icu4j.data.version.number}" />
+ </filterset>
+ </copy>
+ <!-- license -->
+ <copy file="license.html"
+ todir="${eclipse.projects.dir}/plugins/com.ibm.icu.jse4/about_files" />
+ <!-- ucd terms -->
+ <copy file="src/com/ibm/icu/dev/data/unicode/ucdterms.txt"
+ todir="${eclipse.projects.dir}/plugins/com.ibm.icu.jse4/about_files" />
+
+ <!-- FEATURE FILES -->
+ <!-- project files -->
+ <copy todir="${eclipse.projects.dir}/features/com.ibm.icu.jse4">
+ <fileset dir="${eclipse.dir}/features/com.ibm.icu.jse4"
+ excludes="**/.svn/**/*" />
+ <filterset>
+ <filter token="BUILD_VERSION" value="${icu4j.eclipse.build.version.string}" />
+ <filter token="COPYRIGHT" value="${copyright.eclipse}" />
+ <filter token="DATA_VERSION_NUMBER" value="${icu4j.data.version.number}" />
+ </filterset>
+ </copy>
+ <!-- common eclipse about -->
+ <copy file="${eclipse.dir}/misc/about.html"
+ todir="${eclipse.projects.dir}/features/com.ibm.icu.jse4/sourceTemplatePlugin" />
+ </target>
+
+ <target name="eclipseZipTestSrc" depends="eclipseMangle" description="build zip of test sources for debugging">
+ <!-- for eclipse -->
+ <zip destfile="${zipTestSrc.file}" compress="true" basedir="${src.dir}" includes="com/ibm/icu/dev/test/**/*" excludes="com/ibm/icu/dev/test/cldr/**/*, com/ibm/icu/dev/test/perf/**/*" />
+ </target>
+
+ <target name="gatherICU4JWrapperAPI" depends="tools" unless="before.java14">
+ <javadoc classpath="${build.dir}" sourcepath="${src.dir}" packagenames="com.ibm.icu.text,com.ibm.icu.util" source="1.4">
+ <doclet name="com.ibm.icu.dev.tool.docs.GatherAPIData" path="${build.dir}">
+ <param name="-name" value="ICU4J 3.6" />
+ <param name="-base" value="com.ibm.icu" />
+ <param name="-output" value="${api.dir}/icu4j36w_i.api" />
+ <param name="-filter" value="BreakIterator|CollationKey|Collator|DateFormat|DecimalFormat|MessageFormat|NumberFormat|SimpleDateFormat|StringTokenizer|Calendar|TimeZone|ULocale|DateFormatSymbols|DecimalFormatSymbols|Calendar|TimeZone" />
+ <!-- param name="-gzip"/ -->
+ </doclet>
+ </javadoc>
+ </target>
+
+ <target name="gatherEclipseWrapperAPI" depends="tools" unless="before.java14">
+ <javadoc classpath="${build.dir}" sourcepath="${src.dir}/com/ibm/icu/dev/eclipse/com.ibm.icu.base/src" packagenames="com.ibm.icu.text,com.ibm.icu.util" source="1.4">
+ <doclet name="com.ibm.icu.dev.tool.docs.GatherAPIData" path="${build.dir}">
+ <param name="-name" value="ICU4J 3.6 Wrapper" />
+ <param name="-base" value="com.ibm.icu" />
+ <param name="-output" value="${api.dir}/icu4j36w_e.api" />
+ <!-- param name="-gzip"/ -->
+ </doclet>
+ </javadoc>
+ </target>
+
+ <target name="reportWrapperAPI" depends="tools" unless="before.java14">
+ <java classname="com.ibm.icu.dev.tool.docs.ReportAPI" classpath="${build.dir}" failonerror="true">
+ <arg value="-old:" />
+ <arg value="${api.dir}/icu4j${icu4j.previous.version.number}w_i.api" />
+ <arg value="-new:" />
+ <arg value="${api.dir}/icu4j${icu4j.version.number}w_e.api" />
+ <arg value="-html" />
+ <arg value="-out:" />
+ <arg value="${api.dir}/icu4j_compare_${icu4j.previous.version.number}_wrapper_java.html" />
+ </java>
+ </target>
+
+<!-- Test suite launcher targets -->
+ <target name="runCheck" depends="tests">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <jvmarg line="${test.jvm.args}"/>
+ <arg value="-n" />
+ <classpath>
+ <pathelement path="${java.class.path}/" />
+ <pathelement location="${jar.file}" />
+ <pathelement location="${charsets.jar.file}" />
+ <pathelement location="clover.jar" />
+ <pathelement path="${build.dir}" />
+ </classpath>
+ </java>
+ </target>
+
+ <target name="eclipseRunCheck" depends="eclipseTests">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <jvmarg line="${test.jvm.args}"/>
+ <arg value="-n" />
+ <classpath>
+ <pathelement path="${java.class.path}/" />
+ <pathelement location="icu4j.jar" />
+ <pathelement location="clover.jar" />
+ <pathelement path="${build.dir}" />
+ </classpath>
+ </java>
+ </target>
+
+ <target name="eclipseCompat" depends="initBase, eclipseCore" />
+ <target name="eclipseCompatTests" depends="initBase, eclipseTests" />
+ <target name="eclipseCompatCheck" depends="initBase, eclipseCompat, eclipseCompatTests, deleteCore, eclipseRunCheck" />
+
+<!-- ICU TimeZone Update Utility targets -->
+ <target name="icutzu" depends="init" description="build ICU4J TimeZone Update Utility Classes">
+ <javac includes="${tzu.src.path}/**/*.java" excludes="**/.svn/**/*" srcdir="${src.dir}" destdir="${build.dir}" classpathref="build.classpath" source="${icu4j.javac.source}" target="${icu4j.javac.target}" debug="on" deprecation="off" encoding="ascii" />
+ </target>
+
+ <target name="icutzudata" depends="initBase, jar">
+ <mkdir dir="${tzu.bin.dir}" />
+ <copy todir="${tzu.bin.dir}">
+ <fileset dir="${src.dir}/${tzu.src.path}">
+ <include name="*.cmd" />
+ <include name="*.bat" />
+ <include name="*.sh" />
+ <include name="*.gif" />
+ <include name="*.txt" />
+ <include name="*.html" />
+ <include name="*.css" />
+ </fileset>
+ <fileset file="${basedir}/${jar.file}" />
+ <fileset file="${build.dir}/${icu4j.data.path}/zoneinfo.res" />
+ </copy>
+ </target>
+
+ <target name="icutzujar" depends="initBase, icutzu, icutzudata" description="build ICU4J TimeZone Update Utility classes">
+ <jar jarfile="${tzu.bin.dir}/${tzu.jar.file}" compress="true">
+ <fileset dir="${basedir}" includes="license.html" />
+ <fileset dir="${build.dir}" includes="${tzu.src.path}/**/*.class" />
+ <manifest>
+ <attribute name="Main-Class" value="com.ibm.icu.dev.tool.tzu.ICUTZUMain" />
+ <attribute name="Built-By" value="${corp}" />
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J TimeZone Update Utility" />
+ <attribute name="Specification-Version" value="${icu4j.spec.version.string}" />
+ <attribute name="Specification-Vendor" value="ICU" />
+ <attribute name="Implementation-Title" value="ICUTZU" />
+ <attribute name="Implementation-Version" value="${icu4j.impl.version.string}" />
+ <attribute name="Implementation-Vendor" value="${corp}" />
+ <attribute name="Implementation-Vendor-Id" value="com.ibm" />
+ <attribute name="Copyright-Info" value="${copyright}" />
+ <attribute name="Sealed" value="false" />
+ </section>
+ </manifest>
+ </jar>
+ <echo message="Please see ${tzu.bin.dir} directory for the files." />
+ </target>
+
+ <target name="icutzucheck" depends="icutzujar" description="check ICU4J TimeZone Update Utility">
+ <echo>Testing ICUTZU ...</echo>
+
+ <mkdir dir="${tzu.temp.dir}" />
+ <move todir="${tzu.temp.dir}">
+ <fileset file="${tzu.bin.dir}/DirectorySearch.txt" />
+ <fileset file="${tzu.bin.dir}/zoneinfo.res" />
+ </move>
+
+ <echo file="${tzu.bin.dir}/DirectorySearch.txt">+${tzu.test.dir}</echo>
+ <get dest="${tzu.bin.dir}/zoneinfo.res" src="http://icu-project.org/tzdata/2006a/be/zoneinfo.res" />
+ <copy todir="${tzu.test.dir}" file="${tzu.bin.dir}/icu4j.jar" />
+
+ <echo>Running ICUTZU in Discovery Mode ...</echo>
+ <exec dir="${tzu.bin.dir}" executable="${java.home}/bin/java">
+ <arg value="-cp" />
+ <arg path="${tzu.bin.dir}/${jar.file};${tzu.bin.dir}/${tzu.jar.file}" />
+ <arg value="-Dnogui=true" />
+ <arg value="-Ddiscoveronly=true" />
+ <arg value="-Dsilentpatch=true" />
+ <arg value="-Doffline=true" />
+ <arg value="com.ibm.icu.dev.tool.tzu.ICUTZUMain" />
+ <arg file="${tzu.bin.dir}" />
+ <arg value="DirectorySearch.txt" />
+ <arg value="ICUList.txt" />
+ <arg value="zoneinfo.res" />
+ <arg value="Temp" />
+ <arg value="icu.gif" />
+ </exec>
+
+ <echo>Running ICUTZU in Patch Mode ...</echo>
+ <exec dir="${tzu.bin.dir}" executable="${java.home}/bin/java">
+ <arg value="-cp" />
+ <arg path="${tzu.bin.dir}/${jar.file};${tzu.bin.dir}/${tzu.jar.file}" />
+ <arg value="-Dnogui=true" />
+ <arg value="-Ddiscoveronly=false" />
+ <arg value="-Dsilentpatch=true" />
+ <arg value="-Doffline=true" />
+ <arg value="com.ibm.icu.dev.tool.tzu.ICUTZUMain" />
+ <arg file="${tzu.bin.dir}" />
+ <arg value="DirectorySearch.txt" />
+ <arg value="ICUList.txt" />
+ <arg value="zoneinfo.res" />
+ <arg value="Temp" />
+ <arg value="icu.gif" />
+ </exec>
+
+ <echo>Comparing results ...</echo>
+ <unjar src="${tzu.test.dir}/${jar.file}" dest="${tzu.test.dir}">
+ <patternset>
+ <include name="**/zoneinfo.res" />
+ </patternset>
+ </unjar>
+ <copy todir="${tzu.test.dir}" file="${tzu.test.dir}/${icu4j.data.path}/zoneinfo.res" />
+ <condition property="tzu.zoneinfo.match">
+ <filesmatch file1="${tzu.bin.dir}/zoneinfo.res" file2="${tzu.test.dir}/zoneinfo.res" />
+ </condition>
+
+ <fail unless="tzu.zoneinfo.match">ICUTZU test failed. ${tzu.bin.dir}/zoneinfo.res does not match ${tzu.test.dir}/zoneinfo.res</fail>
+
+ <echo>Cleaning up ...</echo>
+ <move todir="${tzu.bin.dir}">
+ <fileset file="${tzu.temp.dir}/DirectorySearch.txt" />
+ <fileset file="${tzu.temp.dir}/zoneinfo.res" />
+ </move>
+ <delete includeEmptyDirs="true">
+ <fileset file="${tzu.temp.dir}" />
+ <fileset dir="${tzu.test.dir}" />
+ </delete>
+
+ <echo>ICUTZU test successful.</echo>
+ </target>
+
+ <target name="icutzusrc-dist" depends="init" description="ICUTZU source distributable zip">
+ <zip zipfile="${basedir}/${tzu.src.zip.file}" basedir="${basedir}" includes="${src.dir}/${tzu.src.path}/**/*" />
+ </target>
+
+ <target name="icutzubin-dist" depends="icutzujar" description="ICUTZU binaries distributable zip">
+ <zip zipfile="${basedir}/${tzu.bin.zip.file}" basedir="${tzu.bin.dir}" includes="*" />
+ </target>
+</project>
diff --git a/eclipseFragment.txt b/eclipseFragment.txt
new file mode 100644
index 0000000..fd80d93
--- /dev/null
+++ b/eclipseFragment.txt
@@ -0,0 +1,10 @@
+# Copyright (C) 2006-2007, International Business Machines Corporation and
+# others. All Rights Reserved.
+
+# These files contains special code blocks for the ICU Eclipse fragment
+src/com/ibm/icu/impl/ByteBuffer.java
+src/com/ibm/icu/impl/ICUResourceBundleImpl.java
+src/com/ibm/icu/lang/UCharacter.java
+src/com/ibm/icu/text/DecimalFormat.java
+src/com/ibm/icu/text/RuleBasedCollator.java
+src/com/ibm/icu/util/UResourceBundle.java
diff --git a/eclipseProjectMisc/initSrc.launch b/eclipseProjectMisc/initSrc.launch
new file mode 100644
index 0000000..9a1b84e
--- /dev/null
+++ b/eclipseProjectMisc/initSrc.launch
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.ant.AntLaunchConfigurationType">
+<stringAttribute key="org.eclipse.ant.ui.ATTR_BUILD_SCOPE" value="${none}"/>
+<booleanAttribute key="org.eclipse.ant.ui.DEFAULT_VM_INSTALL" value="true"/>
+<stringAttribute key="org.eclipse.debug.core.ATTR_REFRESH_SCOPE" value="${working_set:<?xml version="1.0" encoding="UTF-8"?> <launchConfigurationWorkingSet editPageId="org.eclipse.ui.resourceWorkingSetPage" factoryID="org.eclipse.ui.internal.WorkingSetFactory" label="working set" name="working set"> <item factoryID="org.eclipse.ui.internal.model.ResourceFactory" path="/icu4j/src" type="2"/> </launchConfigurationWorkingSet>}"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/icu4j/build.xml"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<listAttribute key="org.eclipse.debug.ui.favoriteGroups">
+<listEntry value="org.eclipse.ui.externaltools.launchGroup"/>
+</listAttribute>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.ant.ui.AntClasspathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.eclipse.ant.internal.ui.antsupport.InternalAntRunner"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="icu4j"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.ant.ui.AntClasspathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_INSTALL_NAME" value="jre1.6.0"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_INSTALL_TYPE_ID" value="org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_ANT_TARGETS" value="initSrc,"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/icu4j/build.xml}"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY" value="${workspace_loc:/icu4j}"/>
+<stringAttribute key="process_factory_id" value="org.eclipse.ant.ui.remoteAntProcessFactory"/>
+</launchConfiguration>
diff --git a/eclipseProjectMisc/normSrc.launch b/eclipseProjectMisc/normSrc.launch
new file mode 100644
index 0000000..179d952
--- /dev/null
+++ b/eclipseProjectMisc/normSrc.launch
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.ant.AntLaunchConfigurationType">
+<stringAttribute key="org.eclipse.ant.ui.ATTR_BUILD_SCOPE" value="${none}"/>
+<booleanAttribute key="org.eclipse.ant.ui.DEFAULT_VM_INSTALL" value="true"/>
+<stringAttribute key="org.eclipse.debug.core.ATTR_REFRESH_SCOPE" value="${working_set:<?xml version="1.0" encoding="UTF-8"?> <launchConfigurationWorkingSet editPageId="org.eclipse.ui.resourceWorkingSetPage" factoryID="org.eclipse.ui.internal.WorkingSetFactory" label="working set" name="working set"> <item factoryID="org.eclipse.ui.internal.model.ResourceFactory" path="/icu4j/src" type="2"/> </launchConfigurationWorkingSet>}"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/icu4j/build.xml"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.core.appendEnvironmentVariables" value="true"/>
+<listAttribute key="org.eclipse.debug.ui.favoriteGroups">
+<listEntry value="org.eclipse.ui.externaltools.launchGroup"/>
+</listAttribute>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.ant.ui.AntClasspathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.eclipse.ant.internal.ui.antsupport.InternalAntRunner"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="icu4j"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.ant.ui.AntClasspathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_INSTALL_NAME" value="jre1.6.0"/>
+<stringAttribute key="org.eclipse.jdt.launching.VM_INSTALL_TYPE_ID" value="org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_ANT_TARGETS" value="normSrc,"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/icu4j/build.xml}"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY" value="${workspace_loc:/icu4j}"/>
+<stringAttribute key="process_factory_id" value="org.eclipse.ant.ui.remoteAntProcessFactory"/>
+</launchConfiguration>
diff --git a/ee.foundation.jar b/ee.foundation.jar
new file mode 100644
index 0000000..60168e8
--- /dev/null
+++ b/ee.foundation.jar
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f730fd4a8ca71fcddc61a4f62490cea7805efffc7eb2b8669942f6d123f7249
+size 1059077
diff --git a/license.html b/license.html
new file mode 100644
index 0000000..7c52568
--- /dev/null
+++ b/license.html
@@ -0,0 +1,51 @@
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"></meta>
+<title>ICU License - ICU 1.8.1 and later</title>
+</head>
+
+<body BGCOLOR="#ffffff">
+<h2>ICU License - ICU 1.8.1 and later</h2>
+
+<p>COPYRIGHT AND PERMISSION NOTICE</p>
+
+<p>
+Copyright (c) 1995-2007 International Business Machines Corporation and others
+</p>
+<p>
+All rights reserved.
+</p>
+<p>
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies
+of the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+</p>
+<p>
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
+THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
+OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+</p>
+<p>
+Except as contained in this notice, the name of a copyright holder shall not be
+used in advertising or otherwise to promote the sale, use or other dealings in
+this Software without prior written authorization of the copyright holder.
+</p>
+
+<hr>
+<p><small>
+All trademarks and registered trademarks mentioned herein are the property of their respective owners.
+</small></p>
+</body>
+</html>
diff --git a/preprocessor.txt b/preprocessor.txt
new file mode 100644
index 0000000..4a0bc91
--- /dev/null
+++ b/preprocessor.txt
@@ -0,0 +1,81 @@
+# Copyright (C) 2007-2008, International Business Machines Corporation and
+# others. All Rights Reserved.
+
+## core sources
+src/com/ibm/icu/impl/ByteBuffer.java
+src/com/ibm/icu/impl/DateNumberFormat.java
+src/com/ibm/icu/impl/duration/BasicDurationFormat.java
+src/com/ibm/icu/impl/ICUResourceBundleImpl.java
+src/com/ibm/icu/impl/ICUResourceBundleReader.java
+src/com/ibm/icu/impl/JavaTimeZone.java
+src/com/ibm/icu/impl/PatternTokenizer.java
+src/com/ibm/icu/impl/Utility.java
+src/com/ibm/icu/lang/UCharacter.java
+src/com/ibm/icu/math/BigDecimal.java
+src/com/ibm/icu/text/Bidi.java
+src/com/ibm/icu/text/ChineseDateFormat.java
+src/com/ibm/icu/text/DateFormat.java
+src/com/ibm/icu/text/DateTimePatternGenerator.java
+src/com/ibm/icu/text/DecimalFormat.java
+src/com/ibm/icu/text/DigitList.java
+src/com/ibm/icu/text/MessageFormat.java
+src/com/ibm/icu/text/NumberFormat.java
+src/com/ibm/icu/text/RuleBasedBreakIterator.java
+src/com/ibm/icu/text/RuleBasedCollator.java
+src/com/ibm/icu/text/RuleBasedNumberFormat.java
+src/com/ibm/icu/text/SimpleDateFormat.java
+src/com/ibm/icu/text/UnicodeSet.java
+src/com/ibm/icu/text/UTF16.java
+src/com/ibm/icu/util/ByteArrayWrapper.java
+src/com/ibm/icu/util/UResourceBundle.java
+
+## test sources
+src/com/ibm/icu/dev/test/bidi/TestAll.java
+src/com/ibm/icu/dev/test/bidi/TestCompatibility.java
+src/com/ibm/icu/dev/test/bigdec/DiagBigDecimal.java
+src/com/ibm/icu/dev/test/charsetdet/TestCharsetDetector.java
+src/com/ibm/icu/dev/test/cldr/TestCLDRVsICU.java
+src/com/ibm/icu/dev/test/collator/RandomCollator.java
+src/com/ibm/icu/dev/test/collator/TestAll.java
+src/com/ibm/icu/dev/test/duration/ICUDurationTest.java
+src/com/ibm/icu/dev/test/format/BigNumberFormatTest.java
+src/com/ibm/icu/dev/test/format/DateFormatTest.java
+src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java
+src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatAPIC.java
+src/com/ibm/icu/dev/test/format/IntlTestNumberFormatAPI.java
+src/com/ibm/icu/dev/test/format/NumberFormatRegressionTest.java
+src/com/ibm/icu/dev/test/format/NumberFormatTest.java
+src/com/ibm/icu/dev/test/format/NumberRegression.java
+src/com/ibm/icu/dev/test/format/RbnfTest.java
+src/com/ibm/icu/dev/test/format/TestAll.java
+src/com/ibm/icu/dev/test/format/TestMessageFormat.java
+src/com/ibm/icu/dev/test/ResourceModule.java
+src/com/ibm/icu/dev/test/serializable/FormatTests.java
+src/com/ibm/icu/dev/test/serializable/SerializableTest.java
+src/com/ibm/icu/dev/test/TestAll.java
+src/com/ibm/icu/dev/test/TestDataModule.java
+src/com/ibm/icu/dev/test/TestFmwk.java
+src/com/ibm/icu/dev/test/TestUtil.java
+src/com/ibm/icu/dev/test/timezone/TimeZoneAliasTest.java
+src/com/ibm/icu/dev/test/timezone/TimeZoneRegression.java
+src/com/ibm/icu/dev/test/translit/UnicodeMapTest.java
+src/com/ibm/icu/dev/test/util/BagFormatter.java
+src/com/ibm/icu/dev/test/util/BNF.java
+src/com/ibm/icu/dev/test/util/CollectionUtilities.java
+src/com/ibm/icu/dev/test/util/DataInputCompressor.java
+src/com/ibm/icu/dev/test/util/DataOutputCompressor.java
+src/com/ibm/icu/dev/test/util/FileUtilities.java
+src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java
+src/com/ibm/icu/dev/test/util/ICUResourceBundleTest.java
+src/com/ibm/icu/dev/test/util/TestBagFormatter.java
+src/com/ibm/icu/dev/test/util/TestBNF.java
+src/com/ibm/icu/dev/test/util/TestUtilities.java
+src/com/ibm/icu/dev/test/util/Tokenizer.java
+src/com/ibm/icu/dev/test/util/TransliteratorUtilities.java
+src/com/ibm/icu/dev/test/util/UnicodeMap.java
+src/com/ibm/icu/dev/test/util/UnicodeProperty.java
+src/com/ibm/icu/dev/test/util/UtilityTest.java
+src/com/ibm/icu/dev/test/perf/NormalizerPerformanceTest.java
+
+## tool sources
+src/com/ibm/icu/dev/tool/docs/ICUTaglet.java
\ No newline at end of file
diff --git a/readme.html b/readme.html
new file mode 100644
index 0000000..87540d5
--- /dev/null
+++ b/readme.html
@@ -0,0 +1,1821 @@
+<!DOCTYPE html PUBLIC "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+ <meta http-equiv="Content-Style-Type" content="text/css2">
+ <title>ReadMe for ICU4J</title>
+ <meta name="COPYRIGHT" content="Copyright 2000-2007, International Business Machines Corporation and others. All Rights Reserved.">
+ <style type="text/css">
+h3.doc { background: #CCCCFF }
+ </style>
+</head>
+<body style="background-color: rgb(255, 255, 255);" lang="EN-US"
+ link="#0000ff" vlink="#800080">
+<h2>International Components for Unicode for Java (ICU4J)</h2>
+<h3>Read Me for ICU4J 3.8</h3>
+<hr size="2" width="100%">
+<p><b>Release Date</b><br>
+September 14, 2007<br>
+</p>
+<p><b>Note:</b> This is major release of ICU4J. It contains bug fixes
+and adds implementations of inherited API and introduces new API
+or functionality.
+</p>
+<p>For the most recent release, see the <a
+ href="http://www.icu-project.org/download/"> ICU4J
+download site</a>. </p>
+<h3 class="doc">Contents</h3>
+<ul type="disc">
+ <li><a href="#introduction">Introduction to ICU4J</a></li>
+ <li><a href="#news">What Is New In This Release?</a></li>
+ <li><a href="#license">License Information</a></li>
+ <li><a href="#PlatformDependencies">Platform Dependencies</a></li>
+ <li><a href="#download">How to Download ICU4J</a></li>
+ <li><a href="#WhatContain">The Structure and Contents of ICU4J</a></li>
+ <li><a href="#API">Where to Get Documentation</a></li>
+ <li><a href="#HowToInstallJavac">How to Install and Build</a></li>
+ <li><a href="#HowToModularize">How to modularize ICU4J</a></li>
+ <li><a href="#tryingout">Trying Out ICU4J</a></li>
+ <li><a href="#resources">ICU4J Resource Information</a></li>
+ <li><a href="#WhereToFindMore">Where to Find More Information</a></li>
+ <li><a href="#SubmittingComments">Submitting Comments, Requesting
+Features and Reporting Bugs</a></li>
+</ul>
+<h3 class="doc"><a name="introduction"></a>Introduction to ICU4J</h3>
+<p>The International Components for Unicode (ICU) library provides
+robust and
+full-featured Unicode services on a wide variety of platforms. ICU
+supports the
+most current version of the Unicode standard, including support for
+supplementary characters (needed for GB 18030 repertoire support).</p>
+<p>Java provides a strong foundation for global programs, and IBM and
+the
+ICU team played a key role in providing globalization technology to
+Java. But because of its long release schedule, Java cannot always keep
+up with evolving standards. The ICU team continues to extend Java's
+Unicode and internationalization support, focusing on improving
+performance,
+keeping current with the Unicode standard, and providing richer APIs,
+while
+remaining as compatible as possible with the original Java text and
+internationalization API design.</p>
+<p>ICU4J is an add-on to the regular JRE that provides:
+</p>
+<ul>
+ <li><a
+ href="http://www.icu-project.org/userguide/Collate_Intro.html"><b>Collation</b></a>
+– rule-based, up-to-date Unicode Collation Algorithm (UCA) sorting order<br>
+ For fast multilingual string comparison; faster
+and more complete than
+the J2SE implementation</li>
+ <li><a href="http://www.icu-project.org/userguide/strings.html"><b>Supplementary
+Characters</b></a> – String manipulation and character properties<br>
+ Required for proper GB 18030 and JIS X 0213
+repertoire support</li>
+ <li><a href="http://www.icu-project.org/userguide/charsetDetection.html"><b>Charset
+Detection</b></a> – Recognition of various single and multibyte charsets<br>
+ Useful for recognizing untagged text data</li>
+ <li><a
+ href="http://www.icu-project.org/userguide/unicodeSet.html"><b>UnicodeSet</b></a>
+– standard set operations optimized for sets of Unicode characters<br>
+ UnicodeSets can be built from string patterns
+using any Unicode properties.</li>
+ <li><a href="http://www.icu-project.org/userguide/Transform.html"><b>Transforms</b></a>
+– a flexible mechanism for Unicode text conversions<br>
+ Including Full/Halfwidth conversions,
+Normalization, Case conversions, Hex
+conversions, and transliterations between scripts (50+ pairs)</li>
+ <li><a
+ href="http://www.icu-project.org/userguide/normalization.html"><b>Unicode
+Normalization</b></a> – NFC, NFD, NFKD, NFKC<br>
+ For canonical text representations, needed for
+XML and the net</li>
+ <li><a
+ href="http://www.icu-project.org/userguide/dateCalendar.html"><b>International
+Calendars</b></a> – Arabic, Buddhist, Chinese, Hebrew, Japanese, Ethiopic, Islamic, Coptic and other calendars<br>
+ Required for correct presentation of dates in
+certain countries</li>
+ <li><a
+ href="http://www.icu-project.org/userguide/formatNumbers.html"><b>Number
+Format
+Enhancements</b></a> – Scientific Notation, Spelled-out, etc.<br>
+ Enhancements to the normal Java number
+formatting. The spell-out format is
+used for checks and similar documents</li>
+ <li><a
+ href="http://www.icu-project.org/userguide/boundaryAnalysis.html"><b>Enhanced
+Word-Break Detection</b></a> – Rule-based, supports Thai<br>
+ Required for correct support of Thai</li>
+ <li><a
+ href="http://www.icu-project.org/userguide/compression.html"><b>Unicode
+Text
+Compression</b></a> – Standard compression of Unicode text<br>
+ Suitable for large numbers of small fields,
+where LZW and similar schemes
+do not apply</li>
+ <li><a
+ href="http://www.icu-project.org/userguide/conversion.html"><b>Charset Conversion</b></a> – Conversion to and from different charsets.<br>
+ Plugs into Java CharsetProvider Service Provider Interface (SPI)</li>
+
+</ul>
+<blockquote>
+ <p><b>Note:</b> We continue to provide assistance to Sun, and in some
+cases, ICU4J support has been rolled into a later release of Java. For
+example, the Thai word-break is now in Java 1.4. However, the most
+current and complete version is always found in ICU4J.</p>
+</blockquote>
+
+<h3 class="doc"><a name="news"></a>What Is New In This Release?</h3>
+
+<p><b>Changes to J2SE version requirement for building ICU4J</b>
+<p>Previous version of ICU4J was successfully built with J2SE SDK 1.4 or later
+versions. In this release, there is a new feature which uses a new type introduced
+in J2SE 5.0. Although the new feature does not need the type available at run time,
+it requires J2SE 5.0 or newer version of Java class library at the build time to enable
+the feature. The binary distribution version of ICU4J available at the ICU download
+page was built with J2SE SDK 5.0 and this version should work well on JRE 1.4 or later
+versions. If you want to build your own copy of ICU4J binaries with J2SE SDK 1.4,
+you can still run all of the Ant standard build targets in build.xml and make ICU4J
+binaries. The build script detects the current Java version and comments out code
+blocks including references to J2SE 5.0 only types.</p>
+<p><b>Changes to timezone formatting and parsing</b>
+<p>In ICU 3.8, the behavior of date formatting and parsing has changed
+significantly, perhaps requiring recoding on your part depending on your
+usage. For more information, see <a href="http://icu-project.org/userguide/formatDateTime.html">
+Formatting Dates and Times</a> in the User Guide.
+</p>
+<p><b>Status of ICU4J charset converter</b>
+<p>The ICU4J implementation of java.nio.charset.Charset is included as a Technology
+Preview. Not all functionality from the java.nio.Charset interfaces is operational,
+and some converters are known to mis-handle Unicode supplementary characters. Use with caution.
+</p>
+<p><b>New features</b></p>
+<p>
+See the <a href="http://www.icu-project.org/download/">ICU 3.8 download page</a> about new features in this release.
+</p>
+<h3 class="doc"><a name="license"></a>License Information</h3>
+<p>
+The ICU projects (ICU4C and ICU4J) use the X license. The X
+license is <b>suitable for commercial use</b> and is a recommended free software license
+that is compatible with the GNU GPL license. This became
+effective with release 1.8.1 of ICU4C and release 1.3.1 of ICU4J in
+mid-2001. All new ICU releases will adopt the X license; previous ICU
+releases continue to utilize the IPL (IBM Public License). Users
+of previous releases of ICU who want to adopt new ICU releases will
+need to accept the terms and conditions of the X license.
+</p>
+<p>
+The main effect of the change is to provide GPL compatibility.
+The X license is listed as GPL compatible, see the GNU page at
+<a href="http://www.gnu.org/philosophy/license-list.html#GPLCompatibleLicenses">
+http://www.gnu.org/philosophy/license-list.html#GPLCompatibleLicenses</a>.
+This means that GPL projects can now use ICU code, it does <b>not</b>
+mean that projects using ICU become subject to GPL.
+</p>
+<p>
+ The IBM version contains the essential text of the license, omitting the
+X-specific trademarks and copyright notices. The full copy of <a
+ href="license.html">ICU's license</a> is included in the download
+package.
+</p>
+<h3 class="doc"><a name="PlatformDependencies"></a>Platform Dependencies</h3>
+<p> By default ICU4J depends on functionality that is only available
+in J2SE 1.4 or later releases. Some new ICU4J features support types
+introduced in J2SE 5, you can still use the same ICU4J binaries on JRE
+1.4. We provide the ability to build a variant of ICU4J that will run
+on JRE 1.3, but not all build targets work on that platform. Currently
+1.1.x and 1.2.x JREs are unsupported and untested, and you use the
+components on these JREs at your own risk.
+<p>The table below shows operating systems and JRE/JDK versions currently
+used by the ICU development team.
+</p>
+<table bgcolor="#CCCCFF">
+<tr>
+ <th rowspan="2" bgcolor="#FFFFFF">Operating System</th>
+ <th colspan="5" bgcolor="#FFFFFF">Sun Java SE</th>
+ <th colspan="3" bgcolor="#FFFFFF">IBM Java SE</th>
+</tr>
+<tr>
+ <th bgcolor="#FFFFFF">1.6.0</th>
+ <th bgcolor="#FFFFFF">1.5.0</th>
+ <th bgcolor="#FFFFFF">1.4.2</th>
+ <th bgcolor="#FFFFFF">1.4.1</th>
+ <th bgcolor="#FFFFFF">1.4.0</th>
+ <th bgcolor="#FFFFFF">1.5.0</th>
+ <th bgcolor="#FFFFFF">1.4.2</th>
+ <th bgcolor="#FFFFFF">1.4.1</th>
+</tr>
+<tr>
+ <th bgcolor="#FFFFFF">AIX 5.2</th>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+</tr>
+<tr>
+ <th bgcolor="#FFFFFF">AIX 5.3</th>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#CCCCFF"><em><b>Reference platform</b></em></td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+</tr>
+<tr>
+ <th bgcolor="#FFFFFF">HP-UX 11 (PA-RISC)</th>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+</tr>
+<tr>
+ <th bgcolor="#FFFFFF">HP-UX 11 (IA64)</th>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+</tr>
+<tr>
+ <th bgcolor="#FFFFFF">Redhat Enterprise Linux 4 (x86)</th>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+</tr>
+<tr bgcolor="#FFFFFF">
+ <th>Redhat Enterprise Linux 5 (x86)</th>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+</tr>
+<tr bgcolor="#FFFFFF">
+ <th>Solaris 9 (SPARC)</th>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+</tr>
+<tr bgcolor="#FFFFFF">
+ <th>Solaris 10 (SPARC)</th>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#CCCCFF"><em><b>Reference platform</b></em></td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+</tr>
+<tr>
+ <th bgcolor="#FFFFFF">Windows XP</th>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#CCCCFF"><em><b>Reference platform</b></em></td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+</tr>
+<tr>
+ <th bgcolor="#FFFFFF">Windows Vista</th>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#FFFFFF">-</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#DDDDFF">Regularly tested</td>
+ <td align="center" bgcolor="#EEEEFF">Rarely tested</td>
+</tr>
+</table>
+
+<h3 class="doc"><a name="download"></a>How to Download ICU4J</h3>
+<p>There are two ways to download the ICU4J releases.
+</p>
+<ul type="disc">
+ <li><b>Official Release Snapshot:</b><br>
+If you want to use ICU4J (as opposed to developing it), your best bet
+is to download an official, packaged version of the ICU4J source
+code. These versions are tested more thoroughly than day-to-day
+development builds, and they are packaged in jar files for convenient
+download. These packaged files can be found at the <a
+ href="http://www.icu-project.org/download/">ICU Downloads page</a>.
+A packaged snapshot is named <b>icu4j-XXX-src.jar</b>, where XXX
+is the release version number. Please unjar this file. It
+will reconstruct the source directory.</li>
+</ul>
+<ul type="disc">
+ <li><b>Subversion Source Repository:</b><br>
+If you are interested in developing features, patches, or bug fixes for
+ICU4J, you should probably be working with the latest version of the
+ICU4J source code. You will need to check the code out of our Subversion
+repository to ensure that you have the most recent version of all of
+the files. There are several ways to do this. Please follow the
+directions that are contained on the <a
+ href="http://www.icu-project.org/repository/">Source
+ Repository page</a> for details.
+ </li>
+</ul>
+<p>For more details on how to download ICU4J directly from the web
+site, please see the ICU downloads page at <a
+ href="http://www.icu-project.org/download/">http://www.icu-project.org/download/</a>
+</p>
+<h3 class="doc"><a name="WhatContain"></a>The Structure and Contents of
+ICU4J</h3>
+<p>Below, <b>$icu4j_root</b> is the placement of the icu directory in your
+file system, like
+"drive:\...\icu4j" in your environment. "drive:\..." stands for any
+drive and any directory on that drive that you chose to install icu4j
+into. </p>
+<p><b>Information and build files:</b></p>
+<table bgcolor="#ccccff" cellpadding="3" frame="void" width="623">
+ <tbody>
+ <tr>
+ <td align="right" bgcolor="#ffffff" valign="baseline"><b>readme.html</b><br>
+(this file)</td>
+ <td bgcolor="#ffffff" valign="baseline">A description of ICU4J
+(International Components for Unicode for Java)</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">license.html</th>
+ <td bgcolor="#ffffff" valign="baseline">The X license, used by
+ICU4J</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">build.xml</th>
+ <td bgcolor="#ffffff" valign="baseline">Ant build file. See <a
+ href="#HowToInstallJavac">How to Install and Build</a> for more
+information</td>
+ </tr>
+ </tbody>
+</table>
+<p><b>The source directories mirror the package structure of the code.</b><br>
+<font color="red">Core</font> packages become part of the ICU4J jar
+file.<br>
+<font color="red">Charset</font> packages become part of the ICU4J charset jar
+file.<br>
+<font color="red">API</font> packages contain classes with supported
+API. <br>
+<font color="red">RichText</font> classes are Core and API, but can be
+removed from icu4j.jar, and can be built into their own jar.</p>
+<table bgcolor="#ccccff" border="0" cellpadding="3" frame="void"
+ width="623">
+ <tbody>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/src/com/ibm/icu/charset<br>
+ <font color="red">Charset, API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Packages that provide Charset conversion
+ </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/src/com/ibm/icu/dev<br>
+ <font color="red">Non-Core, Non-API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Packages used for
+internal development:
+ <ul>
+ <li>Data: data used by tests and in building ICU</li>
+ <li>Demos: Calendar, Holiday, Break Iterator, Rule-based Number
+Format, Transformations<br>
+(See <a href="#tryingout">below</a> for more information about the
+demos.)</li>
+ <li>Tests: API and coverage tests of all functionality.<br>
+For information about running the tests, see
+$icu4j_root/src/com/ibm/icu/dev/test/TestAll.java.</li>
+ <li>Tools: tools used to build data tables, etc.</li>
+ </ul>
+ </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/src/com/ibm/icu/impl<br>
+ <font color="red">Core, Non-API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">These are utility classes
+used from different ICU4J core packages.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/src/com/ibm/icu/lang<br>
+ <font color="red">Core, API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Character properties
+package.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/src/com/ibm/icu/math<br>
+ <font color="red">Core, API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Additional math classes.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/src/com/ibm/icu/text<br>
+ <font color="red">Core, API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Additional text classes.
+These add to, and in some cases replace, related core Java classes:
+ <ul>
+ <li>Arabic shaping </li>
+ <li>Break iteration </li>
+ <li>Date formatting </li>
+ <li>Number formatting </li>
+ <li>Transliteration </li>
+ <li>Normalization </li>
+ <li>String manipulation </li>
+ <li>Collation </li>
+ <li>String search </li>
+ <li>Unicode compression </li>
+ <li>Unicode sets </li>
+ </ul>
+ </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/src/com/ibm/icu/util<br>
+ <font color="red">Core, API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Additional utility
+classes:
+ <ul>
+ <li>Calendars - Gregorian, Buddhist, Coptic, Ethiopic, Hebrew, Islamic, Japanese, Chinese and others</li>
+ <li>Holiday</li>
+ <li>TimeZone</li>
+ <li>VersionInfo</li>
+ <li>Iteration</li>
+ </ul>
+ </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/src/com/ibm/richtext<br>
+ <font color="red">RichText</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Styled text editing
+package. This includes demos, tests, and GUIs for editing and
+displaying styled text. The richtext package provides a scrollable
+display, typing, arrow-key support, tabs, alignment and justification,
+word- and sentence-selection (by double-clicking and triple-clicking,
+respectively), text styles, clipboard operations (cut, copy and paste)
+and a log of changes for undo-redo. Richtext uses Java's TextLayout and
+complex text support (provided to Sun by the ICU4J team).</td>
+ </tr>
+ </tbody>
+</table>
+<p><b>Building ICU4J creates and populates the following directories:</b></p>
+<table bgcolor="#ccccff" cellpadding="3" frame="void" width="623">
+ <tbody>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/classes</th>
+ <td bgcolor="#ffffff" valign="baseline">contains all class files</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$icu4j_root/doc</th>
+ <td bgcolor="#ffffff" valign="baseline">contains JavaDoc for all
+packages</td>
+ </tr>
+ </tbody>
+</table>
+<br>
+<p><b>ICU4J data is stored in the following locations:</b></p>
+<table bgcolor="#ccccff" border="0" cellpadding="3" frame="void"
+ width="623">
+ <tbody>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline"><code>com.ibm.icu.impl.data</code></th>
+ <td bgcolor="#ffffff" valign="baseline">Holds data used by the
+ICU4J core packages (<code>com.ibm.icu.lang</code>, <code>com.ibm.icu.text</code>,
+ <code>com.ibm.icu.util</code>, <code>com.ibm.icu.math</code> and
+ <code>com.ibm.icu.text</code>). In particular, all resource
+information is stored here.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline"><code>com.ibm.icu.dev.data</code></th>
+ <td bgcolor="#ffffff" valign="baseline">Holds data that is not
+part of ICU4J core, but rather part of a test, sample, or demo.</td>
+ </tr>
+ </tbody>
+</table>
+<br>
+<h3 class="doc"><a name="API"></a>Where to get Documentation</h3>
+<p>The <a href="http://www.icu-project.org/userguide/">ICU user's
+guide</a> contains lots of general information about ICU, in its C,
+C++, and Java incarnations.</p>
+<p>The complete API documentation for ICU4J (javadoc) is available on
+the ICU4J web site, and can be built from the sources:
+</p>
+<ul>
+ <li><a href="http://www.icu-project.org/apiref/icu4j/">Index
+to all ICU4J API</a></li>
+ <li><a href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/CharsetDetector.html">Charset Detector</a> – Detection of charset from a byte stream</li>
+ <li>International Calendars –
+ <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/util/BuddhistCalendar.html">Buddhist</a>,
+ <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/util/ChineseCalendar.html">Chinese</a>,
+ <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/util/CopticCalendar.html">Coptic</a>,
+ <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/util/EthiopicCalendar.html">Ethiopic</a>,
+ <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/util/GregorianCalendar.html">Gregorian</a>,
+ <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/util/HebrewCalendar.html">Hebrew</a>,
+ <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/util/IslamicCalendar.html">Islamic</a>,
+ <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/util/JapaneseCalendar.html">Japanese</a>.</li>
+ <li><a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/Normalizer.html">Unicode
+Normalization</a> – Canonical text representation for W3C.</li>
+ <li><a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/NumberFormat.html">Number
+Format Enhancements</a> – Scientific Notation, Spelled out.</li>
+ <li><a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/BreakIterator.html">Enhanced
+word-break detection</a> – Rule-based, supports Thai</li>
+ <li><a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/Transliterator.html">Transliteration</a>
+– A general framework for converting text from one format to another,
+e.g. Cyrillic to Latin, or Hex to Unicode. </li>
+ <li>Unicode Text <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeCompressor.html">Compression</a>
+& <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeDecompressor.html">Decompression</a>
+– 2:1 compression on English Unicode text.</li>
+ <li>Collation - <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/RuleBasedCollator.html">Rule-based
+sorting</a>, <a
+ href="http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/StringSearch.html">Efficient
+multi-lingual searching</a> </li>
+</ul>
+<h3 class="doc"><a name="HowToInstallJavac"></a>How to Install and Build</h3>
+<p>To install ICU4J, simply place the prebuilt jar file <strong>icu4j.jar</strong>
+on your Java CLASSPATH. If you need Charset API support please place
+<strong>icu4j-charsets.jar</strong> on your class path. No other files are needed.</p>
+<p><b>Eclipse users:</b> See the ICU4J site for information on<a
+ href="http://www.icu-project.org/docs/eclipse_howto/eclipse_howto.html">
+how to configure Eclipse</a> to build ICU4J.</p>
+<p>To build ICU4J, you will need a J2SE SDK and the Ant build system.
+We strongly recommend using the Ant build system to build ICU4J.
+It's recommended to install both the J2SE SDK and Ant somewhere <em>outside</em>
+the ICU4J directory. For example, on Linux you might install these in
+/usr/local.</p>
+<ul>
+ <li>Install J2SE SDK 5.0. (You can use any version of J2SE SDK 1.4 or newer,
+ but for J2SE SDK 5.0 is required to enable the all available ICU4J features.)</li>
+ <li>Install the <a href="http://ant.apache.org/"><strong>Ant</strong></a>
+build system. Ant is a portable, Java-based build system similar to
+make. ICU4J uses Ant because it introduces no other dependencies, it's
+portable, and it's easier to manage than a collection of makefiles. We
+currently build ICU4J using a single makefile on all platforms Ant.
+The build system requires Ant 1.6 or later.
+ <p>Installing Ant is straightforward. Download it (see <a
+ href="http://ant.apache.org/bindownload.cgi">http://ant.apache.org/bindownload.cgi</a>),
+extract it onto your system, set some environment variables, and add
+its bin directory to your path. For example: </p>
+ <pre>
+ set JAVA_HOME=C:\jdk1.5.0
+ set ANT_HOME=C:\ant
+ set PATH=%PATH%;%ANT_HOME%\bin</pre>
+ <p>See the current Ant documentation for details.</p>
+ </li>
+</ul>
+<p>Once the J2SE SDK and Ant are installed, building is just a matter of
+typing <strong>ant</strong> in the ICU4J root directory. This causes
+the Ant build system to perform a build as specified by the file
+<strong>build.xml</strong>, located in the ICU4J root directory. You
+can give Ant options like -verbose, and you can specify targets. Ant
+will only build what's been changed and will resolve dependencies
+properly. For example:</p>
+<blockquote>
+<pre>C:\icu4j>ant
+Buildfile: build.xml
+
+checkAntVersion:
+
+warnAntVersion:
+
+initBase:
+ [mkdir] Created dir: C:\icu4j\classes
+ [echo] java home: C:\jdk1.5.0
+ [echo] java version: 1.5.0
+ [echo] ant java version: 1.5
+ [echo] Apache Ant version 1.7.0 compiled on December 13 2006
+ [echo] ICU4JDEV with Windows XP 5.1 build 2600 Service Pack 2 on x86
+ [echo] clover initstring = '${clover.initstring}'
+ [echo] target runtime environment: J2SE15
+ [echo] Initialized at 2007-08-30 at 04:14:09 EDT
+
+buildMangle:
+ [javac] Compiling 1 source file to C:\icu4j\classes
+
+initSrc:
+
+displayBuildEnvWarning:
+
+doMangle:
+ [echo] Running source code preprocessor for [J2SE15]
+
+init:
+
+coreData:
+ [copy] Copying 1 file to C:\icu4j\classes\com\ibm\icu\impl\data
+
+icudata:
+ [unjar] Expanding: C:\icu4j\src\com\ibm\icu\impl\data\icudata.jar into C:\ic
+u4j\classes
+ [copy] Copying 1 file to C:\icu4j\classes\META-INF
+
+durationdata:
+ [copy] Copying 16 files to C:\icu4j\classes\com\ibm\icu\impl\duration\impl\
+data
+
+core:
+ [javac] Compiling 317 source files to C:\icu4j\classes
+ [javac] Note: * uses or overrides a deprecated API.
+ [javac] Note: Recompile with -Xlint:deprecation for details.
+
+BUILD SUCCESSFUL
+Total time: 10 seconds</pre>
+</blockquote>
+<I>Note: The above output is an example. The numbers are likely to be different with the current version ICU4J.</I>
+<p>The following are some targets that you can provide to <b>ant</b>.
+For more targets run <code>ant -projecthelp</code> or see the build.xml file.</p>
+<table bgcolor="#ccccff" border="0" cellpadding="3" frame="void"
+ width="623">
+ <tbody>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">all</th>
+ <td bgcolor="#ffffff" valign="baseline">Build all targets.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">core</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the main class
+files in the subdirectory <strong>classes</strong>. If no target is
+specified, core is assumed.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">tests</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the test class
+files.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">demos</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the demos.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">tools</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the tools.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">docs</th>
+ <td bgcolor="#ffffff" valign="baseline">Run javadoc over the main
+class files, generating an HTML documentation tree in the subdirectory <strong>doc</strong>.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">jar</th>
+ <td bgcolor="#ffffff" valign="baseline">Create a jar archive <strong>icu4j.jar</strong>
+in the root ICU4J directory containing the main class files.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">jarSrc</th>
+ <td bgcolor="#ffffff" valign="baseline">Like the <strong>jar</strong>
+target, but containing only the source files. </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">jarDocs</th>
+ <td bgcolor="#ffffff" valign="baseline">Like the <strong>jar</strong>
+target, but containing only the docs. </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">richedit</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the richedit core
+class files and tests. </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">richeditJar</th>
+ <td bgcolor="#ffffff" valign="baseline">Create the richedit jar
+file (which contains only the richedit core class files). The file <strong>richedit.jar</strong>
+will be created in the <strong>./richedit</strong> subdirectory. Any
+existing file of that name will be overwritten.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">richeditZip</th>
+ <td bgcolor="#ffffff" valign="baseline">Create a zip archive of
+the richedit docs and jar file for distribution. The zip file <strong>richedit.zip</strong>
+will be created in the <strong>./richedit</strong> subdirectory. Any
+existing file of that name will be overwritten.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">clean</th>
+ <td bgcolor="#ffffff" valign="baseline">Remove all built targets,
+leaving the source.</td>
+ </tr>
+ </tbody>
+</table>
+<p>For more information, read the Ant documentation and the <strong>build.xml</strong>
+file.</p>
+<p>After doing a build it is a good idea to run all the icu4j tests by
+typing<br>
+<tt>"ant check"</tt> or
+"java -classpath classes com.ibm.icu.dev.test.TestAll -nothrow".</p>
+<h3 class="doc"><a name="HowToModularize"></a>How to modularize ICU4J</h3>
+<p>Some clients may not wish to ship all of ICU4J with their
+application, since the application might only use a small part of
+ICU4J.
+ICU4J release 2.6 and later provide build options to build individual
+ICU4J 'modules' for a more compact distribution.
+The modules are based on a service and the APIs that define it, e.g.,
+the normalizer module supports all the APIs of the Normalizer class
+(and some others). Tests can be run to verify that the APIs supported
+by the module function correctly.
+Because of internal code dependencies, a module contains extra classes
+that are not part of the module's core service API. Some or most of the
+APIs of these extra classes will not work. <b>Only the module's core
+service API is guaranteed.</b> Other APIs may work partially or not at
+all, so client code should avoid them.</p>
+<p>
+Individual modules are not built directly into their own separate jar
+files. Since their dependencies
+often overlap, using separate modules to 'add on' ICU4J functionality
+would result in
+unwanted duplication of class files. Instead, building a module causes
+a subset of ICU4J's
+classes to be built and put into ICU4J's standard build directory.
+After one or more module targets are built, the 'moduleJar' target can
+then be
+built, which packages the class files into a 'module jar.' Other than
+the fact that it
+contains fewer class files, little distinguishes this jar file from a
+full ICU4J jar file,
+and in fact they share the same name.</p>
+<p>
+Currently ICU4J can be divided into the following modules:
+</p>
+<p><b>Key:</b></p>
+<table bgcolor="#ccccff" cellpadding="3" frame="void" width="750">
+ <tbody>
+ <tr>
+
+ <th align="left" valign="baseline"><b>Module Name</b></th>
+ <th align="left" valign="baseline"><b>Ant Targets</b></th>
+ <th align="left" valign="baseline"><b>Test Package Supported</b></th>
+ <th align="right" valign="baseline"><b>Size‡</b></th>
+ </tr>
+ <tr bgcolor="#ffffff">
+
+ <td colspan="4">
+ <table>
+ <tbody>
+ <tr>
+ <td valign="baseline">Package*</td>
+ <td valign="baseline">Main Classes†</td>
+ </tr>
+ </tbody>
+
+ </table>
+ </td>
+ </tr>
+ </tbody>
+</table>
+<b><font size="2">* com.ibm. should be prepended to the package names
+listed.
+<br>
+† Class name in bold indicates core service API. Only APIs in these classes are
+fully supported.
+<br>
+‡ Sizes are of the compressed jar file containing only this module.
+These sizes are approximate for release 3.6.
+</font></b>
+<p><b>Modules:</b></p>
+
+<table bgcolor="#ccccff" cellpadding="3" frame="void" width="750">
+
+ <tbody>
+ <tr>
+ <th align="left" valign="baseline">Normalizer</th>
+ <td align="left" valign="baseline">normalizer, normalizerTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.normalizer</td>
+ <td align="right" valign="baseline">465 KB</td>
+
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakIterator,
+ CanonicalIterator,
+ <b>Normalizer</b>,
+ Replaceable,
+ ReplaceableString,
+ SymbolTable,
+ UCharacterIterator,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceBundleIterator,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">Collator</th>
+ <td align="left" valign="baseline">collator, collatorTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.collator</td>
+ <td align="right" valign="baseline">1,911 KB</td>
+
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakDictionary,
+ BreakIterator,
+ CanonicalIterator,
+ <b>CollationElementIterator</b>,
+ <b>CollationKey</b>,
+ <b>Collator</b>,
+ DictionaryBasedBreakIterator,
+ Normalizer,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedBreakIterator,
+ <b>RuleBasedCollator</b>,
+ SymbolTable,
+ UCharacterIterator,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ ByteArrayWrapper,
+ CompactByteArray,
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceBundleIterator,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">Calendar</th>
+ <td align="left" valign="baseline">calendar, calendarTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.calendar</td>
+ <td align="right" valign="baseline">2,176 KB</td>
+
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.math:</td>
+ <td valign="baseline">
+ BigDecimal,
+ MathContext
+ </td>
+ </tr>
+
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakIterator,
+ CanonicalIterator,
+ <b>ChineseDateFormat</b>,
+ <b>ChineseDateFormatSymbols</b>,
+ CollationElementIterator,
+ CollationKey,
+ Collator,
+ <b>DateFormat</b>,
+ <b>DateFormatSymbols</b>,
+ DecimalFormat,
+ DecimalFormatSymbols,
+ MessageFormat,
+ Normalizer,
+ NumberFormat,
+ PluralFormat,
+ PluralRules,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedCollator,
+ RuleBasedNumberFormat,
+ RuleBasedTransliterator,
+ <b>SimpleDateFormat</b>,
+ SymbolTable,
+ UCharacterIterator,
+ UFormat,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ AnnualTimeZoneRule,
+ <b>BasicTimeZone</b>,
+ <b>BuddhistCalendar</b>,
+ ByteArrayWrapper,
+ <b>Calendar</b>,
+ <b>ChineseCalendar</b>,
+ <b>CopticCalendar</b>,
+ Currency,
+ CurrencyAmount,
+ <b>DateRule</b>,
+ DateTimeRule,
+ <b>EasterHoliday</b>,
+ <b>EthiopicCalendar</b>,
+ Freezable,
+ <b>GregorianCalendar</b>,
+ <b>HebrewCalendar</b>,
+ <b>HebrewHoliday</b>,
+ <b>Holiday</b>,
+ <b>IndianCalendar</b>,
+ InitialTimeZoneRule,
+ <b>IslamicCalendar</b>,
+ <b>JapaneseCalendar</b>,
+ Measure,
+ MeasureUnit,
+ <b>RangeDateRule</b>,
+ RangeValueIterator,
+ <b>SimpleDateRule</b>,
+ <b>SimpleHoliday</b>,
+ <b>SimpleTimeZone</b>,
+ StringTokenizer,
+ <b>TaiwanCalendar</b>,
+ <b>TimeZone</b>,
+ TimeZoneRule,
+ TimeZoneTransition,
+ ULocale,
+ UResourceBundle,
+ UResourceBundleIterator,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">BreakIterator</th>
+ <td align="left" valign="baseline">breakIterator,
+breakIteratorTests</td>
+
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.breakiterator</td>
+ <td align="right" valign="baseline">1,889 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ <b>BreakDictionary</b>,
+ <b>BreakIterator</b>,
+ CanonicalIterator,
+ <b>DictionaryBasedBreakIterator</b>,
+ Normalizer,
+ Replaceable,
+ ReplaceableString,
+ <b>RuleBasedBreakIterator</b>,
+ SymbolTable,
+ Transliterator,
+ UCharacterIterator,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ CompactByteArray,
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceBundleIterator,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+
+ <th align="left" valign="baseline">Basic Properties</th>
+ <td align="left" valign="baseline">propertiesBasic,
+propertiesBasicTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.lang</td>
+ <td align="right" valign="baseline">554 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ <b>UCharacter</b>,
+ <b>UCharacterCategory</b>,
+ <b>UCharacterDirection</b>,
+ <b>UCharacterEnums</b>,
+ <b>UProperty</b>,
+ <b>UScript</b>,
+ <b>UScriptRun</b>
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakDictionary,
+ BreakIterator,
+ DictionaryBasedBreakIterator,
+ Normalizer,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedBreakIterator,
+ SymbolTable,
+ UCharacterIterator,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ <b>UTF16</b>
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ CompactByteArray,
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceBundleIterator,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">Full Properties</th>
+ <td align="left" valign="baseline">propertiesFull,
+propertiesFullTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.lang</td>
+ <td align="right" valign="baseline">1,829 KB</td>
+
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ <b>UCharacter</b>,
+ <b>UCharacterCategory</b>,
+ <b>UCharacterDirection</b>,
+ <b>UCharacterEnums</b>,
+ <b>UProperty</b>,
+ <b>UScript</b>,
+ <b>UScriptRun</b>
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakDictionary,
+ BreakIterator,
+ DictionaryBasedBreakIterator,
+ <b>Normalizer</b>,
+ <b>Replaceable</b>,
+ <b>ReplaceableString</b>,
+ RuleBasedBreakIterator,
+ SymbolTable,
+ <b>UCharacterIterator</b>,
+ <b>UForwardCharacterIterator</b>,
+ <b>UnicodeFilter</b>,
+ <b>UnicodeMatcher</b>,
+ <b>UnicodeSet</b>,
+ <b>UnicodeSetIterator</b>,
+ <b>UTF16</b>
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ CompactByteArray,
+ Freezable,
+ <b>RangeValueIterator</b>,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceBundleIterator,
+ UResourceTypeMismatchException,
+ <b>ValueIterator</b>,
+ <b>VersionInfo</b>
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">Formatting</th>
+
+ <td align="left" valign="baseline">format, formatTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.format</td>
+ <td align="right" valign="baseline">3,443 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.math:</td>
+ <td valign="baseline">
+ <b>BigDecimal</b>,
+ MathContext
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakIterator,
+ CanonicalIterator,
+ <b>ChineseDateFormat</b>,
+ <b>ChineseDateFormatSymbols</b>,
+ CollationElementIterator,
+ CollationKey,
+ Collator,
+ <b>DateFormat</b>,
+ <b>DateFormatSymbols</b>,
+ <b>DecimalFormat</b>,
+ <b>DecimalFormatSymbols</b>,
+ <b>DurationFormat</b>,
+ MeasureFormat,
+ <b>MessageFormat</b>,
+ Normalizer,
+ <b>NumberFormat</b>,
+ <b>PluralFormat</b>,
+ <b>PluralRules</b>,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedCollator,
+ <b>RuleBasedNumberFormat</b>,
+ <b>SimpleDateFormat</b>,
+ SymbolTable,
+ <b>UCharacterIterator</b>,
+ UFormat,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ AnnualTimeZoneRule,
+ BasicTimeZone,
+ <b>BuddhistCalendar</b>,
+ ByteArrayWrapper,
+ <b>Calendar</b>,
+ <b>ChineseCalendar</b>,
+ <b>CopticCalendar</b>,
+ <b>Currency</b>,
+ CurrencyAmount,
+ DateTimeRule,
+ <b>EthiopicCalendar</b>,
+ Freezable,
+ <b>GregorianCalendar</b>,
+ <b>HebrewCalendar</b>,
+ <b>IndianCalendar</b>,
+ InitialTimeZoneRule,
+ <b>IslamicCalendar</b>,
+ <b>JapaneseCalendar</b>,
+ Measure,
+ MeasureUnit,
+ RangeValueIterator,
+ <b>SimpleTimeZone</b>,
+ StringTokenizer,
+ <b>TaiwanCalendar</b>,
+ TimeArrayTimeZoneRule,
+ <b>TimeZone</b>,
+ TimeZoneRule,
+ TimeZoneTransition,
+ ULocale,
+ UResourceBundle,
+ UResourceBundleIterator,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">StringPrep, IDNA</th>
+ <td align="left" valign="baseline">stringPrep, stringPrepTests</td>
+
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.stringprep</td>
+ <td align="right" valign="baseline">488 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ <b>StringPrep</b>,
+ <b>StringParseException</b>,
+ SymbolTable,
+ UCharacterIterator,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceBundleIterator,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+
+ <th align="left" valign="baseline">Transforms</th>
+ <td align="left" valign="baseline">transliterator, transliteratorTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.translit</td>
+ <td align="right" valign="baseline">890 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakDictionary,
+ BreakIterator,
+ DictionaryBasedBreakIterator,
+ Normalizer,
+ <b>Replaceable</b>,
+ <b>ReplaceableString</b>,
+ RuleBasedBreakIterator,
+ RuleBasedCollator,
+ <b>RuleBasedTransliterator</b>,
+ StringTransform,
+ SymbolTable,
+ <b>Transliterator</b>,
+ UCharacterIterator,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ <b>UnicodeSet</b>,
+ <b>UnicodeSetIterator</b>,
+ <b>UTF16</b>
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ CaseInsensitiveString,
+ CompactByteArray,
+ Freezable,
+ <b>RangeValueIterator</b>,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceBundleIterator,
+ UResourceTypeMismatchException,
+ <b>ValueIterator</b>,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+<!-- tr>
+
+ <th align="left" valign="baseline"><b>Module Name</b></th>
+
+ <th align="left" valign="baseline"><b>Ant Targets</b></th>
+
+ <th align="left" valign="baseline"><b>Test Package Supported</b></th>
+
+ <th align="right" valign="baseline"><b>Size</b></th>
+
+</tr -->
+ </tbody>
+</table>
+<p>Building any of these modules is as easy as specifying a build
+target to the Ant build system, e.g:
+<br>
+To build a module that contains only the Normalizer API: </p>
+<ol>
+ <li> Build the module. <br>
+ <code> ant normalizer </code> </li>
+ <li> Build the jar containing the module. <br>
+ <code>ant moduleJar </code> </li>
+ <li> Build the tests for the module. <br>
+ <code> ant normalizerTests </code> </li>
+ <li> Run the tests and verify that the self tests pass. <br>
+ <code> java -classpath classes com.ibm.icu.dev.test.TestAll -nothrow -w </code> </li>
+</ol>
+If more than one module is required, the module build targets can be
+concatenated, e.g:
+<ol>
+ <li> Build the modules. <br>
+ <code> ant normalizer collator </code> </li>
+ <li> Build the jar containing the modules. <br>
+ <code>ant moduleJar </code> </li>
+ <li> Build the tests for the module. <br>
+ <code> ant normalizerTests collatorTests </code> </li>
+ <li> Run the tests and verify that they pass. <br>
+ <code> java -classpath classes com.ibm.icu.dev.test.TestAll -nothrow -w </code> </li>
+</ol>
+The jar should be built before the tests, since for some targets
+building the tests will cause additional classes to be compiled that
+are not strictly necessary for the module itself.
+<h5> Notes: </h5>
+<ul>
+ <li>Regardless of whether ICU4J is built as a whole or as modules,
+the jar file produced is named <em>icu4j.jar</em>.</li>
+ <li>To ascertain if an icu4j.jar contains all of ICU4J or not, please
+see the manifest file in the jar</li>
+ <li>The target moduleJar does not depend on any other target. It just
+creates a jar of all class files under
+$icu4j_root/classes/com/ibm/icu/, excluding the classes files in
+$icu4j_root/classes/com/ibm/icu/dev folder</li>
+ <li>The list of module build targets can be obtained by running the
+command: <code>ant -projecthelp</code></li>
+</ul>
+<h3 class="doc"><a name="tryingout"></a>Trying Out ICU4J</h3>
+<p><strong>Note:</strong> the demos provided with ICU4J are for the
+most part undocumented. This list can show you where to look, but
+you'll
+have to experiment a bit. The demos (with the
+exception of richedit) are <strong>unsupported</strong> and may change
+or disappear without notice.</p>
+<p>The icu4j.jar file contains only the core ICU4J classes, not the
+demo classes, so unless you build ICU4J there is little to try out.
+</p>
+<h4>Charset</h4>
+To try out the <strong>Charset</strong> package, build <strong>icu4j.jar</strong> and <strong>icu4j-charsets.jar</strong> using 'jar' target.
+You can use the charsets by placing these files on your classpath.
+<blockquote><tt>java -cp $icu4j_root/icu4j.jar:$icu4j_root/icu4j-charsets.jar <your program></tt></blockquote>
+<h4>Rich Edit</h4>
+To try out the <strong>richedit</strong> package, first build the
+richeditJar target.
+This is a 'runnable' jar file. To run the richedit demo, type:
+<blockquote><tt>java -jar $icu4j_root/richedit/richedit.jar</tt></blockquote>
+This will present an empty edit pane with an awt interface.
+<p>With a fuller command line you can try out other options, for
+example:</p>
+<blockquote><tt>java -classpath $icu4j_root/richedit/richedit.jar
+com.ibm.richtext.demo.EditDemo [-swing][file]</tt></blockquote>
+<p>This will use an awt GUI, or a swing GUI if
+<tt>-swing</tt> is passed on the command line. It will open a text
+file if one is provided, otherwise it will open a blank page. Click
+to type.</p>
+<p>
+You can add tabs to the tab ruler by clicking in the ruler while
+holding down the control key.
+Clicking on an existing tab changes between left, right, center, and
+decimal tabs. Dragging
+a tab moves it, dragging it off the ruler removes it.</p>
+<p>
+You can experiment with complex text by using the keymap functions.
+Please note that these are mainly for demo purposes, for real work
+with Arabic or Hebrew you will want to use an input method. You will
+need to use a font that supports Arabic or Hebrew, 'Lucida Sans'
+(provided
+with Java) supports these languages.</p>
+<h4>Other demos</h4>
+<p>The other demo programs are <strong>not supported</strong> and
+exist only to let you
+experiment with the ICU4J classes. First, build ICU4J using <tt>ant all</tt>.
+Then try
+one of the following:
+</p>
+<ul>
+ <li><tt>java -classpath classes com.ibm.icu.dev.demo.calendar.CalendarApp</tt></li>
+ <li><tt>java -classpath classes com.ibm.icu.dev.demo.holiday.HolidayCalendarDemo</tt></li>
+ <li><tt>java -classpath classes com.ibm.icu.dev.demo.rbnf.RbnfDemo</tt></li>
+ <li><tt>java -classpath classes com.ibm.icu.dev.demo.translit.Demo</tt></li>
+</ul>
+<h3 class="doc"><a name="resources">ICU4J Resource Information</a></h3>
+Starting with release 2.1, ICU4J includes its own
+resource information
+which is completely independent of the JRE resource information. (Note,
+ICU4J 2.8 to 3.4, time zone information depends on the underlying JRE).
+The new ICU4J information is equivalent to the information in ICU4C and
+many resources are, in fact, the same binary files that ICU4C uses.
+<p>
+By default the ICU4J distribution includes all of the standard resource
+information. It is located under the directory com/ibm/icu/impl/data.
+Depending on the service, the data is in different locations and in
+different formats. <strong>Note:</strong> This will continue to change
+from release to release, so clients should not depend on the exact
+organization
+of the data in ICU4J.</p>
+<ul>
+ <li>The primary <b>locale data</b> is under the directory <tt>icudt38b</tt>,
+as a set of <tt>".res"</tt> files whose names are the locale identifiers.
+Locale naming is documented the <code>com.ibm.icu.util.ULocale</code>
+class, and the use of these names in searching for resources is documented
+in <code>com.ibm.icu.util.UResourceBundle</code>.
+ </li>
+ <li>The <b>collation data</b> is under the directory <tt>icudt38b/coll</tt>,
+as a set of <tt>".res"</tt> files.</li>
+ <li>The <b>rule-based transliterator data</b> is under the directory
+<tt>icudt38b/translit</tt> as a set of <tt>".res"</tt> files. (<b>Note:</b> the
+Han transliterator test data is no longer included in the core icu4j.jar
+file by default.)</li>
+ <li>The <b>rule-based number format data</b> is under the directory
+<tt>icudt38b/rbnf</tt> as a set of <tt>".res"</tt> files.
+ <li>The <b>break iterator data</b> is directly under the data
+directory, as a set of <tt>".brk"</tt> files, named according to the
+type of break and the locale where there are locale-specific versions.</li>
+ <li>The <b>holiday data</b> is under the <tt>data</tt> directory,
+as a set of <tt>".class"</tt> files, named <tt>"HolidayBundle_"</tt>
+followed by the locale ID.</li>
+ <li>The <b>character property data</b> as well as assorted <b>normalization
+data</b> and default <b>unicode collation algorithm (UCA) data</b>
+is found under the <tt>data</tt> directory as a set of <tt>".icu"</tt>
+files. </li>
+ <li>The <b>character set converter data</b> is under the directory
+ <tt>icudt38b</tt>, as a set of <tt>".cnv"</tt> files. These files are
+ currently included only in icu-charset.jar.</li>
+ <li>The <b>time zone data</b> is named <tt>zoneinfo.res</tt> under
+ the directory <tt>icudt38b</tt>.</li>
+</ul>
+<p>
+Some of the data files alias or otherwise reference data from other
+data files. One reason for this is because some locale names have
+changed. For example, <tt>he_IL</tt> used to be <tt>iw_IL</tt>. In
+order to support both names but not duplicate the data, one of the
+resource files refers to the other file's data. In other cases, a
+file may alias a portion of another file's data in order to save
+space. Currently ICU4J provides no tool for revealing these
+dependencies.</p>
+<blockquote><strong>Note:</strong> Java's <code>Locale</code> class
+silently converts the language code <tt>"he"</tt> to <tt>"iw"</tt>
+when you construct the Locale (for versions of Java through Java 5). Thus
+Java cannot be used to locate resources that use the <tt>"he"</tt>
+language code. ICU, on the other hand, does not perform this
+conversion in ULocale, and instead uses aliasing in the locale data to
+represent the same set of data under different locale
+ids.</blockquote>
+<p>
+Resource files that use locale ids form a hierarchy, with up to four
+levels: a root, language, region (country), and variant. Searches for
+locale data attempt to match as far down the hierarchy as possible,
+for example, <tt>"he_IL"</tt> will match <tt>he_IL</tt>, but
+<tt>"he_US"</tt> will match <tt>he</tt> (since there is no <tt>US</tt>
+variant for he, and <tt>"xx_YY</tt> will match root (the
+default fallback locale) since there is no <tt>xx</tt> language code
+in the locale hierarchy. Again, see
+<code>java.util.ResourceBundle</code> for more information.
+</p>
+<p>
+<strong>Currently ICU4J provides no tool for revealing these
+dependencies</strong> between data files, so trimming the data
+directly in the ICU4J project is a hit-or-miss affair. The key point
+when you remove data is to make sure to remove all dependencies on
+that data as well. For example, if you remove <tt>he.res</tt>, you
+need to remove <tt>he_IL.res</tt>, since it is lower in the hierarchy,
+and you must remove iw.res, since it references <tt>he.res</tt>, and
+<tt>iw_IL.res</tt>, since it depends on it (and also references
+<tt>he_IL.res</tt>).
+</p>
+<p>
+Unfortunately, the jar tool in the JDK provides no way to remove items
+from a jar file. Thus you have to extract the resources, remove the
+ones you don't want, and then create a new jar file with the remining
+resources. See the jar tool information for how to do this. Before
+'rejaring' the files, be sure to thoroughly test your application with
+the remaining resources, making sure each required resource is
+present.
+</p>
+<h4>Using additional resource files with ICU4J</h4>
+<blockquote>
+ <table cellpadding="3" frame="border" rules="none" width="50%">
+ <tbody>
+ <tr>
+ <td><b><font color="red" size="+1">Warning:</font> Resource
+file formats can change across releases of ICU4J!</b></td>
+ </tr>
+ <tr>
+ <td>The format of ICU4J resources is not part of the API.
+Clients who develop their own resources for use with ICU4J should be
+prepared to
+regenerate them when they move to new releases of ICU4J.</td>
+ </tr>
+ </tbody>
+ </table>
+</blockquote>
+<p>
+We are still developing ICU4J's resource mechanism. Currently it
+is not possible to mix icu's new binary <tt>.res</tt>
+resources
+with traditional java-style <tt>.class</tt> or <tt>.txt</tt>
+resources. We might
+allow for this in a future release, but since the resource data and
+format is not formally
+supported, you run the risk of incompatibilities with future releases
+of ICU4J.
+</p>
+<p>
+Resource data in ICU4J is checked in to the repository as a jar file
+containing the resource binaries, <tt>icudata.jar</tt>. This
+means that inspecting the contents of these resources is difficult.
+They currently are compiled from ICU4C <tt>.txt</tt> file data. You
+can view the contents of the ICU4C text resource files to understand
+the contents of the ICU4J resources.
+</p>
+<p>
+The files in <tt>icudata.jar</tt> get extracted to <tt>com/ibm/icu/impl/data</tt>
+in
+the build directory when the 'core' target is built.
+Building the <tt>'resources'</tt> target will force the
+resources to once again be extracted. Extraction will
+overwrite any corresponding resource files already in that directory.
+</p>
+<h4><a name="resourcesICU4C">Building ICU4J Resources from ICU4C</a></h4>
+<h5>Requirements</h5>
+<ul>
+ <li><a
+ href="http://www.icu-project.org/download/">ICU4C</a></li>
+ <li>Compilers and tools required for <a
+ href="http://source.icu-project.org/repos/icu/icu/tags/release-3-8/readme.html#HowToBuild">building ICU4C</a>.</li>
+ <li>J2SE SDK version 1.4 or above (5.0 is recommended)</li>
+ <li>Perl version 5 or above.</li>
+</ul>
+<h5> Procedure</h5>
+<ol>
+ <li> Download and build ICU4C on a Windows machine. For instructions on
+downloading and building ICU4C, please click <a
+ href="http://source.icu-project.org/repos/icu/icu/tags/release-3-8/readme.html#HowToBuild">here</a>.</li>
+ <li> Change directory to <I>$icu4c_root</I>/source/tools/genrb. <I>$icu4c_root</I>
+ is the root directory of ICU4C source package.</li>
+ <li> Launch gendtjar.pl from that directory itself with the command <br>
+gendtjar.pl --icu-root=<I>$icu4c_root</I> --jar=<I>$jdk_home/bin</I>
+--icu4j-root=<I>$icu4j_root</I> --version=<I>$icu_version</I> <br>
+e.g: gendtjar.pl --icu-root=\work\icu --jar=\jdk1.5.0\bin
+--icu4j-root=\work\icu4j --version=3.8<br>
+Execution of gendtjar.pl script will create the required jar files in
+the $icu4c_root\source\tools\genrb\temp directory.</li>
+ <li> Move icudata.jar to <I>$icu4j_root</I>/src/com/ibm/icu/impl/data
+directory.</li>
+ <li> Move testdata.jar to <I>$icu4j_root</I>/src/com/ibm/dev/data
+directory.</li>
+ <li> Build resources target of ant to unpack the jar files with the
+following command. <br>
+ <I>$ant_home</I>/bin/ant resources</li>
+</ol>
+<h5> Generating Data from CLDR </h5>
+<I> Note: This procedure assumes that all 3 sources are in sibling directories</I>
+<ol>
+ <li>Checkout CLDR. $cldr_root in the following steps is the root directory where
+ the CLDR source files checked out.</li>
+ <li>Update <I>$cldr_root</I>/common to 'release-1-5-0-1' tag</li>
+ <li>Update <I>$cldr_root</I>/tools to 'release-1-5-0-1' tag</li>
+ <li>Checkout ICU with tag 'release-3-8'</li>
+ <li>Checkout ICU4J with tag 'release-3-8'</li>
+ <li>Build ICU4J</li>
+ <li>Build ICU4C</li>
+ <li>Change to <I>$cldr_root</I>/tools/java directory</li>
+ <li>Build CLDR using ant after pointing ICU4J_CLASSES env var to the newly build ICU4J</li>
+ <li>cd to <I>$icu4c_root</I>/source/data directory</li>
+ <li>Follow the instructions in the cldr-icu-readme.txt</li>
+ <li>Build ICU data from CLDR</li>
+ <li>cd to <I>$icu4c_root/source/tools/genrb</I></li>
+ <li>run gendtjar.pl as explained in the previous section.</li>
+ <li>cd to <I>$icu4j_root</I> dir</li>
+ <li>Build and test icu4j</li>
+</ol>
+
+<h3 class="doc"><a name="WhereToFindMore"></a>Where to Find More
+Information</h3>
+<p><a href="http://www.ibm.com/software/globalization/icu/">http://www.ibm.com/software/globalization/icu/</a>
+is a
+pointer to general information about the International Components for
+Unicode in Java </p>
+<p><a href="http://www.ibm.com/software/globalization/">http://www.ibm.com/software/globalization/</a>
+is a pointer to
+information on how to make applications global. </p>
+<h3 class="doc"><a name="SubmittingComments"></a>Submitting Comments,
+Requesting Features and
+Reporting Bugs</h3>
+<p>Your comments are important to making ICU4J successful. We are
+committed
+to fixing any bugs, and will use your feedback to help plan future
+releases.</p>
+<p>To submit comments, request features and report bugs, contact us
+through the <a
+ href="http://www.icu-project.org/contacts.html">ICU Support
+mailing list</a>.<br>
+While we are not able to respond individually to each comment, we do
+review all comments.</p>
+<br>
+<br>
+<h2>Thank you for your interest in ICU4J!</h2>
+<br>
+<hr align="center" size="2" width="100%">
+<p><I><font size="-1">Copyright © 2002-2007 International Business
+Machines Corporation and others. All Rights
+Reserved.<br>
+4400 North First Street, San José, CA 95193, USA
+</font></I></p>
+</body>
+</html>
diff --git a/src/META-INF/services/java.nio.charset.spi.CharsetProvider b/src/META-INF/services/java.nio.charset.spi.CharsetProvider
new file mode 100644
index 0000000..ca798e7
--- /dev/null
+++ b/src/META-INF/services/java.nio.charset.spi.CharsetProvider
@@ -0,0 +1,3 @@
+# Copyright (C) 2006, International Business Machines Corporation and others. All Rights Reserved.
+# icu4j converters
+com.ibm.icu.charset.CharsetProviderICU
diff --git a/src/com/ibm/icu/ICUConfig.properties b/src/com/ibm/icu/ICUConfig.properties
new file mode 100644
index 0000000..d9100a4
--- /dev/null
+++ b/src/com/ibm/icu/ICUConfig.properties
@@ -0,0 +1,13 @@
+#*
+#*******************************************************************************
+#* Copyright (C) 2008, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+#* This is the properties contains ICU runtime configuration
+#*
+
+#
+# The default TimeZone implementation type used by the ICU TimeZone
+# factory method. [ ICU | JDK ]
+#
+com.ibm.icu.util.TimeZone.DefaultTimeZoneType = ICU
diff --git a/src/com/ibm/icu/charset/Charset88591.java b/src/com/ibm/icu/charset/Charset88591.java
new file mode 100644
index 0000000..01dda42
--- /dev/null
+++ b/src/com/ibm/icu/charset/Charset88591.java
@@ -0,0 +1,111 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2007, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.charset;
+
+import java.nio.BufferOverflowException;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+class Charset88591 extends CharsetASCII {
+ public Charset88591(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ }
+
+ class CharsetDecoder88591 extends CharsetDecoderASCII {
+ public CharsetDecoder88591(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target,
+ byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) {
+
+ /*
+ * perform 88591 conversion from the source array to the target array. no range check is
+ * necessary.
+ */
+ for (int i = oldSource; i < limit; i++)
+ targetArray[i + offset] = (char) (sourceArray[i] & 0xff);
+
+ return null;
+ }
+
+ protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target)
+ throws BufferUnderflowException, BufferOverflowException {
+
+ /*
+ * perform 88591 conversion from the source buffer to the target buffer. no range check
+ * is necessary (an exception will be generated to end the loop).
+ */
+ while (true)
+ target.put((char) (source.get() & 0xff));
+ }
+ }
+
+ class CharsetEncoder88591 extends CharsetEncoderASCII {
+ public CharsetEncoder88591(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected final CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target,
+ char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit,
+ boolean flush) {
+ int i, ch = 0;
+
+ /*
+ * perform 88591 conversion from the source array to the target array, making sure each
+ * char in the source is within the correct range
+ */
+ for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff00) == 0); i++)
+ targetArray[i + offset] = (byte) ch;
+
+ /*
+ * if some byte was not in the correct range, we need to deal with this byte by calling
+ * encodeMalformedOrUnmappable and move the source and target positions to reflect the
+ * early termination of the loop
+ */
+ if ((ch & 0xff00) != 0) {
+ source.position(i + 1);
+ target.position(i + offset);
+ return encodeMalformedOrUnmappable(source, ch, flush);
+ } else
+ return null;
+ }
+
+ protected final CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target,
+ boolean flush) throws BufferUnderflowException, BufferOverflowException {
+ int ch;
+
+ /*
+ * perform 88591 conversion from the source buffer to the target buffer, making sure
+ * each char in the source is within the correct range
+ */
+ while (((ch = (int) source.get()) & 0xff00) == 0)
+ target.put((byte) ch);
+
+ /*
+ * if we reach here, it's because a character was not in the correct range, and we need
+ * to deak with this by calling encodeMalformedOrUnmappable.
+ */
+ return encodeMalformedOrUnmappable(source, ch, flush);
+ }
+
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoder88591(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoder88591(this);
+ }
+
+}
diff --git a/src/com/ibm/icu/charset/CharsetASCII.java b/src/com/ibm/icu/charset/CharsetASCII.java
new file mode 100644
index 0000000..10884d8
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetASCII.java
@@ -0,0 +1,354 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2007, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.BufferOverflowException;
+import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+
+class CharsetASCII extends CharsetICU {
+ protected byte[] fromUSubstitution = new byte[] { (byte) 0x1a };
+
+ public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 1;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ }
+
+ class CharsetDecoderASCII extends CharsetDecoderICU {
+
+ public CharsetDecoderASCII(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ boolean flush) {
+ if (!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return CoderResult.UNDERFLOW;
+ }
+ if (!target.hasRemaining()) {
+ /* no output available, can't do anything */
+ return CoderResult.OVERFLOW;
+ }
+
+ CoderResult cr;
+ int oldSource = source.position();
+ int oldTarget = target.position();
+
+ if (source.hasArray() && target.hasArray()) {
+ /* optimized loop */
+
+ /*
+ * extract arrays from the buffers and obtain various constant values that will be
+ * necessary in the core loop
+ */
+ byte[] sourceArray = source.array();
+ int sourceOffset = source.arrayOffset();
+ int sourceIndex = oldSource + sourceOffset;
+ int sourceLength = source.limit() - oldSource;
+
+ char[] targetArray = target.array();
+ int targetOffset = target.arrayOffset();
+ int targetIndex = oldTarget + targetOffset;
+ int targetLength = target.limit() - oldTarget;
+
+ int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
+ + sourceIndex;
+ int offset = targetIndex - sourceIndex;
+
+ /*
+ * perform the core loop... if it returns null, it must be due to an overflow or
+ * underflow
+ */
+ if ((cr = decodeLoopCoreOptimized(source, target, sourceArray, targetArray,
+ sourceIndex, offset, limit)) == null) {
+ if (sourceLength <= targetLength) {
+ source.position(oldSource + sourceLength);
+ target.position(oldTarget + sourceLength);
+ cr = CoderResult.UNDERFLOW;
+ } else {
+ source.position(oldSource + targetLength);
+ target.position(oldTarget + targetLength);
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+ } else {
+ /* unoptimized loop */
+
+ try {
+ /*
+ * perform the core loop... if it throws an exception, it must be due to an
+ * overflow or underflow
+ */
+ cr = decodeLoopCoreUnoptimized(source, target);
+
+ } catch (BufferUnderflowException ex) {
+ /* all of the source has been read */
+ cr = CoderResult.UNDERFLOW;
+ } catch (BufferOverflowException ex) {
+ /* the target is full */
+ source.position(source.position() - 1); /* rewind by 1 */
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+
+ /* set offsets since the start */
+ if (offsets != null) {
+ int count = target.position() - oldTarget;
+ int sourceIndex = -1;
+ while (--count >= 0)
+ offsets.put(++sourceIndex);
+ }
+
+ return cr;
+ }
+
+ protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target,
+ byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) {
+ int i, ch = 0;
+
+ /*
+ * perform ascii conversion from the source array to the target array, making sure each
+ * byte in the source is within the correct range
+ */
+ for (i = oldSource; i < limit && (((ch = (sourceArray[i] & 0xff)) & 0x80) == 0); i++)
+ targetArray[i + offset] = (char) ch;
+
+ /*
+ * if some byte was not in the correct range, we need to deal with this byte by calling
+ * decodeMalformedOrUnmappable and move the source and target positions to reflect the
+ * early termination of the loop
+ */
+ if ((ch & 0x80) != 0) {
+ source.position(i + 1);
+ target.position(i + offset);
+ return decodeMalformedOrUnmappable(ch);
+ } else
+ return null;
+ }
+
+ protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target)
+ throws BufferUnderflowException, BufferOverflowException {
+ int ch = 0;
+
+ /*
+ * perform ascii conversion from the source buffer to the target buffer, making sure
+ * each byte in the source is within the correct range
+ */
+ while (((ch = (source.get() & 0xff)) & 0x80) == 0)
+ target.put((char) ch);
+
+ /*
+ * if we reach here, it's because a character was not in the correct range, and we need
+ * to deak with this by calling decodeMalformedOrUnmappable
+ */
+ return decodeMalformedOrUnmappable(ch);
+ }
+
+ protected CoderResult decodeMalformedOrUnmappable(int ch) {
+ /*
+ * put the guilty character into toUBytesArray and return a message saying that the
+ * character was malformed and of length 1.
+ */
+ toUBytesArray[0] = (byte) ch;
+ toULength = 1;
+ return CoderResult.malformedForLength(1);
+ }
+ }
+
+ class CharsetEncoderASCII extends CharsetEncoderICU {
+
+ public CharsetEncoderASCII(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ private final static int NEED_TO_WRITE_BOM = 1;
+
+ protected void implReset() {
+ super.implReset();
+ fromUnicodeStatus = NEED_TO_WRITE_BOM;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ boolean flush) {
+ if (!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return CoderResult.UNDERFLOW;
+ }
+ if (!target.hasRemaining()) {
+ /* no output available, can't do anything */
+ return CoderResult.OVERFLOW;
+ }
+
+ CoderResult cr;
+ int oldSource = source.position();
+ int oldTarget = target.position();
+
+ if (fromUChar32 != 0) {
+ /*
+ * if we have a leading character in fromUChar32 that needs to be dealt with, we
+ * need to check for a matching trail character and taking the appropriate action as
+ * dictated by encodeTrail.
+ */
+ cr = encodeTrail(source, (char) fromUChar32, flush);
+ } else {
+ if (source.hasArray() && target.hasArray()) {
+ /* optimized loop */
+
+ /*
+ * extract arrays from the buffers and obtain various constant values that will
+ * be necessary in the core loop
+ */
+ char[] sourceArray = source.array();
+ int sourceOffset = source.arrayOffset();
+ int sourceIndex = oldSource + sourceOffset;
+ int sourceLength = source.limit() - oldSource;
+
+ byte[] targetArray = target.array();
+ int targetOffset = target.arrayOffset();
+ int targetIndex = oldTarget + targetOffset;
+ int targetLength = target.limit() - oldTarget;
+
+ int limit = ((sourceLength < targetLength) ? sourceLength : targetLength)
+ + sourceIndex;
+ int offset = targetIndex - sourceIndex;
+
+ /*
+ * perform the core loop... if it returns null, it must be due to an overflow or
+ * underflow
+ */
+ if ((cr = encodeLoopCoreOptimized(source, target, sourceArray, targetArray,
+ sourceIndex, offset, limit, flush)) == null) {
+ if (sourceLength <= targetLength) {
+ source.position(oldSource + sourceLength);
+ target.position(oldTarget + sourceLength);
+ cr = CoderResult.UNDERFLOW;
+ } else {
+ source.position(oldSource + targetLength);
+ target.position(oldTarget + targetLength);
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+ } else {
+ /* unoptimized loop */
+
+ try {
+ /*
+ * perform the core loop... if it throws an exception, it must be due to an
+ * overflow or underflow
+ */
+ cr = encodeLoopCoreUnoptimized(source, target, flush);
+
+ } catch (BufferUnderflowException ex) {
+ cr = CoderResult.UNDERFLOW;
+ } catch (BufferOverflowException ex) {
+ source.position(source.position() - 1); /* rewind by 1 */
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+ }
+
+ /* set offsets since the start */
+ if (offsets != null) {
+ int count = target.position() - oldTarget;
+ int sourceIndex = -1;
+ while (--count >= 0)
+ offsets.put(++sourceIndex);
+ }
+
+ return cr;
+ }
+
+ protected CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target,
+ char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit,
+ boolean flush) {
+ int i, ch = 0;
+
+ /*
+ * perform ascii conversion from the source array to the target array, making sure each
+ * char in the source is within the correct range
+ */
+ for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff80) == 0); i++)
+ targetArray[i + offset] = (byte) ch;
+
+ /*
+ * if some byte was not in the correct range, we need to deal with this byte by calling
+ * encodeMalformedOrUnmappable and move the source and target positions to reflect the
+ * early termination of the loop
+ */
+ if ((ch & 0xff80) != 0) {
+ source.position(i + 1);
+ target.position(i + offset);
+ return encodeMalformedOrUnmappable(source, ch, flush);
+ } else
+ return null;
+ }
+
+ protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target,
+ boolean flush) throws BufferUnderflowException, BufferOverflowException {
+ int ch;
+
+ /*
+ * perform ascii conversion from the source buffer to the target buffer, making sure
+ * each char in the source is within the correct range
+ */
+ while (((ch = (int) source.get()) & 0xff80) == 0)
+ target.put((byte) ch);
+
+ /*
+ * if we reach here, it's because a character was not in the correct range, and we need
+ * to deak with this by calling encodeMalformedOrUnmappable.
+ */
+ return encodeMalformedOrUnmappable(source, ch, flush);
+ }
+
+ protected final CoderResult encodeMalformedOrUnmappable(CharBuffer source, int ch, boolean flush) {
+ /*
+ * if the character is a lead surrogate, we need to call encodeTrail to attempt to match
+ * it up with a trail surrogate. if not, the character is unmappable.
+ */
+ return (UTF16.isSurrogate((char) ch))
+ ? encodeTrail(source, (char) ch, flush)
+ : CoderResult.unmappableForLength(1);
+ }
+
+ private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) {
+ /*
+ * ASCII doesn't support characters in the BMP, so if handleSurrogates returns null,
+ * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable.
+ */
+ CoderResult cr = handleSurrogates(source, lead);
+ if (cr != null) {
+ return cr;
+ } else {
+ //source.position(source.position() - 2);
+ return CoderResult.unmappableForLength(2);
+ }
+ }
+
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderASCII(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderASCII(this);
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetCESU8.java b/src/com/ibm/icu/charset/CharsetCESU8.java
new file mode 100644
index 0000000..7880830
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetCESU8.java
@@ -0,0 +1,18 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+/**
+ * The purpose of this class is to set isCESU8 to true in the super class, and to allow the Charset framework to open
+ * the variant UTF-8 converter without extra setup work. CESU-8 encodes/decodes supplementary characters as 6 bytes
+ * instead of the proper 4 bytes.
+ */
+class CharsetCESU8 extends CharsetUTF8 {
+ public CharsetCESU8(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetCallback.java b/src/com/ibm/icu/charset/CharsetCallback.java
new file mode 100644
index 0000000..6f52c63
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetCallback.java
@@ -0,0 +1,430 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006-2008, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CoderResult;
+
+/**
+ * <h2> Callback API for CharsetICU API </h2>
+ *
+ * CharsetCallback class defines some error behaviour functions called
+ * by CharsetDecoderICU and CharsetEncoderICU. The class also provides
+ * the facility by which clients can write their own callbacks.
+ *
+ * These functions, although public, should NEVER be called directly.
+ * They should be used as parameters to the onUmappableCharacter() and
+ * onMalformedInput() methods, to set the behaviour of a converter
+ * when it encounters UNMAPPED/INVALID sequences.
+ * Currently the only way to set callbacks is by using CodingErrorAction.
+ * In the future we will provide set methods on CharsetEncoder and CharsetDecoder
+ * that will accept CharsetCallback fields.
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+
+public class CharsetCallback {
+ /**
+ * FROM_U, TO_U context options for sub callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String SUB_STOP_ON_ILLEGAL = "i";
+
+ /**
+ * FROM_U, TO_U context options for skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String SKIP_STOP_ON_ILLEGAL = "i";
+
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
+ * @draft ICU 3.6
+ */
+ /*public*/ static final String ESCAPE_ICU = null;
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ * @draft ICU 3.6
+ */
+ /*public*/ static final String ESCAPE_JAVA = "J";
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String ESCAPE_C = "C";
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String ESCAPE_XML_DEC = "D";
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String ESCAPE_XML_HEX = "X";
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String ESCAPE_UNICODE = "U";
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
+ * @draft ICU 4.0
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String ESCAPE_CSS2 = "S";
+
+ /**
+ * Decoder Callback interface
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public interface Decoder {
+ /**
+ * This function is called when the bytes in the source cannot be handled,
+ * and this function is meant to handle or fix the error if possible.
+ *
+ * @return Result of decoding action. This returned object is set to an error
+ * if this function could not handle the conversion.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr);
+ }
+ /**
+ * Encoder Callback interface
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public interface Encoder {
+ /**
+ * This function is called when the Unicode characters in the source cannot be handled,
+ * and this function is meant to handle or fix the error if possible.
+ * @return Result of decoding action. This returned object is set to an error
+ * if this function could not handle the conversion.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr);
+ }
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ if(context==null){
+ return CoderResult.UNDERFLOW;
+ }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+ if(!cr.isUnmappable()){
+ return cr;
+ }else{
+ return CoderResult.UNDERFLOW;
+ }
+ }
+ return cr;
+ }
+ };
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+ if(context==null){
+ return CoderResult.UNDERFLOW;
+ }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+ if(!cr.isUnmappable()){
+ return cr;
+ }else{
+ return CoderResult.UNDERFLOW;
+ }
+ }
+ return cr;
+ }
+ };
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ if(context==null){
+ return encoder.cbFromUWriteSub(encoder, source, target, offsets);
+ }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+ if(!cr.isUnmappable()){
+ return cr;
+ }else{
+ return encoder.cbFromUWriteSub(encoder, source, target, offsets);
+ }
+ }
+ return cr;
+ }
+ };
+ private static final char[] kSubstituteChar1 = new char[]{0x1A};
+ private static final char[] kSubstituteChar = new char[] {0xFFFD};
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+
+ CharsetICU cs = (CharsetICU) decoder.charset();
+ /* could optimize this case, just one uchar */
+ if(decoder.invalidCharLength == 1 && cs.subChar1 != 0) {
+ return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar1, 0, 1, target, offsets, source.position());
+ } else {
+ return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar, 0, 1, target, offsets, source.position());
+ }
+ }
+ };
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ return cr;
+ }
+ };
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Decoder TO_U_CALLBACK_STOP = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+ return cr;
+ }
+ };
+ private static final int VALUE_STRING_LENGTH = 32;
+ private static final char UNICODE_PERCENT_SIGN_CODEPOINT = 0x0025;
+ private static final char UNICODE_U_CODEPOINT = 0x0055;
+ private static final char UNICODE_X_CODEPOINT = 0x0058;
+ private static final char UNICODE_RS_CODEPOINT = 0x005C;
+ private static final char UNICODE_U_LOW_CODEPOINT = 0x0075;
+ private static final char UNICODE_X_LOW_CODEPOINT = 0x0078;
+ private static final char UNICODE_AMP_CODEPOINT = 0x0026;
+ private static final char UNICODE_HASH_CODEPOINT = 0x0023;
+ private static final char UNICODE_SEMICOLON_CODEPOINT = 0x003B;
+ private static final char UNICODE_PLUS_CODEPOINT = 0x002B;
+ private static final char UNICODE_LEFT_CURLY_CODEPOINT = 0x007B;
+ private static final char UNICODE_RIGHT_CURLY_CODEPOINT = 0x007D;
+ private static final char UNICODE_SPACE_CODEPOINT = 0x0020;
+ /**
+ * Write escape callback
+ * @draft ICU 4.0
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Encoder FROM_U_CALLBACK_ESCAPE = new Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ char[] valueString = new char[VALUE_STRING_LENGTH];
+ int valueStringLength = 0;
+ int i = 0;
+
+ cr = CoderResult.UNDERFLOW;
+
+ if (context == null || !(context instanceof String)) {
+ while (i < length) {
+ valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ } else {
+ if (((String)context).equals(ESCAPE_JAVA)) {
+ while (i < length) {
+ valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
+ valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ } else if (((String)context).equals(ESCAPE_C)) {
+ valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
+
+ if (length == 2) {
+ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
+ valueStringLength = itou(valueString, valueStringLength, cp, 16, 8);
+ } else {
+ valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ } else if (((String)context).equals(ESCAPE_XML_DEC)) {
+ valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
+ valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
+ if (length == 2) {
+ valueStringLength += itou(valueString, valueStringLength, cp, 10, 0);
+ } else {
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 10, 0);
+ }
+ valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ } else if (((String)context).equals(ESCAPE_XML_HEX)) {
+ valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
+ valueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
+ valueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
+ if (length == 2) {
+ valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
+ } else {
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 0);
+ }
+ valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ } else if (((String)context).equals(ESCAPE_UNICODE)) {
+ valueString[valueStringLength++] = UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
+ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
+ valueString[valueStringLength++] = UNICODE_PLUS_CODEPOINT; /* adding + */
+ if (length == 2) {
+ valueStringLength += itou(valueString, valueStringLength,cp, 16, 4);
+ } else {
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ valueString[valueStringLength++] = UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
+ } else if (((String)context).equals(ESCAPE_CSS2)) {
+ valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
+ valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
+ /* Always add space character, because the next character might be whitespace,
+ which would erroneously be considered the termination of the escape sequence. */
+ valueString[valueStringLength++] = UNICODE_SPACE_CODEPOINT;
+ } else {
+ while (i < length) {
+ valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
+ valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ }
+ }
+ }
+
+ /* reset the error */
+ cr = CoderResult.UNDERFLOW;
+
+ cr = encoder.cbFromUWriteUChars(encoder, CharBuffer.wrap(valueString, 0, valueStringLength), target, offsets);
+ return cr;
+ }
+ };
+ /**
+ * Write escape callback
+ * @draft ICU 4.0
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Decoder TO_U_CALLBACK_ESCAPE = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+ char[] uniValueString = new char[VALUE_STRING_LENGTH];
+ int valueStringLength = 0;
+ int i = 0;
+
+ if (context == null || !(context instanceof String)) {
+ while (i < length) {
+ uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding X */
+ valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
+ }
+ } else {
+ if (((String)context).equals(ESCAPE_XML_DEC)) {
+ while (i < length) {
+ uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
+ uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
+ valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 10, 0);
+ uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ }
+ } else if (((String)context).equals(ESCAPE_XML_HEX)) {
+ while (i < length) {
+ uniValueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
+ uniValueString[valueStringLength++] = UNICODE_HASH_CODEPOINT; /* adding # */
+ uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT; /* adding x */
+ valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 0);
+ uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
+ }
+ } else if (((String)context).equals(ESCAPE_C)) {
+ uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding X */
+ valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
+ } else {
+ while (i < length) {
+ uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
+ uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT; /* adding X */
+ itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
+ valueStringLength += 2;
+ }
+ }
+ }
+ /* reset the error */
+ cr = CoderResult.UNDERFLOW;
+
+ CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0);
+
+ return cr;
+ }
+ };
+ /***
+ * Java port of uprv_itou() in ICU4C used by TO_U_CALLBACK_ESCAPE and FROM_U_CALLBACK_ESCAPE.
+ * Fills in a char string with the radix-based representation of a number padded with zeroes
+ * to minwidth.
+ * @draft ICU 4.0
+ * @provisional This API might change or be removed in a future release.
+ */
+ private static final int itou(char[] buffer, int sourceIndex, int i, int radix, int minwidth) {
+ int length = 0;
+ int digit;
+ int j;
+ char temp;
+
+ do {
+ digit = (int)(i % radix);
+ buffer[sourceIndex + length++] = (char)(digit <= 9 ? (0x0030+digit) : (0x0030+digit+7));
+ i = i/radix;
+ } while (i != 0 && (sourceIndex + length) < buffer.length);
+
+ while (length < minwidth) {
+ buffer[sourceIndex + length++] = (char)0x0030; /* zero padding */
+ }
+ /* reverses the string */
+ for (j = 0; j < (length / 2); j++) {
+ temp = buffer[(sourceIndex + length - 1) - j];
+ buffer[(sourceIndex + length-1) -j] = buffer[sourceIndex + j];
+ buffer[sourceIndex + j] = temp;
+ }
+
+ return length;
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetDecoderICU.java b/src/com/ibm/icu/charset/CharsetDecoderICU.java
new file mode 100644
index 0000000..d2f7997
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetDecoderICU.java
@@ -0,0 +1,733 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006-2008, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import com.ibm.icu.impl.Assert;
+
+/**
+ * An abstract class that provides framework methods of decoding operations for concrete
+ * subclasses.
+ * In the future this class will contain API that will implement converter sematics of ICU4C.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+public abstract class CharsetDecoderICU extends CharsetDecoder{
+
+ int toUnicodeStatus;
+ byte[] toUBytesArray = new byte[128];
+ int toUBytesBegin = 0;
+ int toULength;
+ char[] charErrorBufferArray = new char[128];
+ int charErrorBufferLength;
+ int charErrorBufferBegin;
+ char[] invalidCharBuffer = new char[128];
+ int invalidCharLength;
+
+ /* maximum number of indexed bytes */
+ private static final int EXT_MAX_BYTES = 0x1f;
+
+ /* store previous UChars/chars to continue partial matches */
+ byte[] preToUArray = new byte[EXT_MAX_BYTES];
+ int preToUBegin;
+ int preToULength; /* negative: replay */
+ int preToUFirstLength; /* length of first character */
+ int mode;
+
+ Object toUContext = null;
+ private CharsetCallback.Decoder onUnmappableCharacter = CharsetCallback.TO_U_CALLBACK_STOP;
+ private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP;
+ CharsetCallback.Decoder toCharErrorBehaviour = new CharsetCallback.Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source,
+ CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr) {
+ if (cr.isUnmappable()) {
+ return onUnmappableCharacter.call(decoder, context, source, target, offsets, buffer,
+ length, cr);
+ } else if (cr.isMalformed()) {
+ return onMalformedInput.call(decoder, context, source, target, offsets, buffer,
+ length, cr);
+ }
+ return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, source, target,
+ offsets, buffer, length, cr);
+ }
+ };
+
+ // exist to keep implOnMalformedInput and implOnUnmappableInput from being too recursive
+ private boolean malformedInputCalled = false;
+ private boolean unmappableCharacterCalled = false;
+
+ /**
+ * Construct a CharsetDecorderICU based on the information provided from a CharsetICU object.
+ *
+ * @param cs The CharsetICU object containing information about how to charset to decode.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ CharsetDecoderICU(CharsetICU cs) {
+ super(cs, (float) (1/(float)cs.maxCharsPerByte), cs.maxCharsPerByte);
+ }
+
+ /**
+ * Is this Decoder allowed to use fallbacks? A fallback mapping is a mapping
+ * that will convert a byte sequence to a Unicode codepoint sequence, but
+ * the encoded Unicode codepoint sequence will round trip convert to a different
+ * byte sequence. In ICU, this is can be called a reverse fallback.
+ * @return A boolean
+ * @draft ICU 3.8
+ * @provisional This API might change or be removed in a future release.
+ */
+ final boolean isFallbackUsed() {
+ return true;
+ }
+
+ /**
+ * Fallback is currently always used by icu4j decoders.
+ */
+ static final boolean isToUUseFallback() {
+ return true;
+ }
+
+ /**
+ * Fallback is currently always used by icu4j decoders.
+ */
+ static final boolean isToUUseFallback(boolean iUseFallback) {
+ return true;
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ *
+ * @param newAction action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected final void implOnMalformedInput(CodingErrorAction newAction) {
+ // don't run infinitely
+ if (malformedInputCalled)
+ return;
+
+ // if we get a replace, do not let the nio replace
+ if (newAction == CodingErrorAction.REPLACE) {
+ malformedInputCalled = true;
+ super.onMalformedInput(CodingErrorAction.IGNORE);
+ malformedInputCalled = false;
+ }
+
+ onMalformedInput = getCallback(newAction);
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ *
+ * @param newAction action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected final void implOnUnmappableCharacter(CodingErrorAction newAction) {
+ // dont run infinitely
+ if (unmappableCharacterCalled)
+ return;
+
+ // if we get a replace, do not let the nio replace
+ if (newAction == CodingErrorAction.REPLACE) {
+ unmappableCharacterCalled = true;
+ super.onUnmappableCharacter(CodingErrorAction.IGNORE);
+ unmappableCharacterCalled = false;
+ }
+
+ onUnmappableCharacter = getCallback(newAction);
+ }
+
+ /**
+ * Sets the callback encoder method and context to be used if an illegal sequence is encounterd.
+ * You would normally call this twice to set both the malform and unmappable error. In this case,
+ * newContext should remain the same since using a different newContext each time will negate the last
+ * one used.
+ * @param err CoderResult
+ * @param newCallback CharsetCallback.Encoder
+ * @param newContext Object
+ */
+ public final void setToUCallback(CoderResult err, CharsetCallback.Decoder newCallback, Object newContext) {
+ if (err.isMalformed()) {
+ onMalformedInput = newCallback;
+ } else if (err.isUnmappable()) {
+ onUnmappableCharacter = newCallback;
+ } else {
+ /* Error: Only malformed and unmappable are handled. */
+ }
+
+ if (toUContext == null || !toUContext.equals(newContext)) {
+ toUContext = newContext;
+ }
+ }
+
+ private static CharsetCallback.Decoder getCallback(CodingErrorAction action){
+ if(action==CodingErrorAction.REPLACE){
+ return CharsetCallback.TO_U_CALLBACK_SUBSTITUTE;
+ }else if(action==CodingErrorAction.IGNORE){
+ return CharsetCallback.TO_U_CALLBACK_SKIP;
+ }else if(action==CodingErrorAction.REPORT){
+ return CharsetCallback.TO_U_CALLBACK_STOP;
+ }
+ return CharsetCallback.TO_U_CALLBACK_STOP;
+ }
+ private final ByteBuffer EMPTY = ByteBuffer.allocate(0);
+ /**
+ * Flushes any characters saved in the converter's internal buffer and
+ * resets the converter.
+ * @param out action to be taken
+ * @return result of flushing action and completes the decoding all input.
+ * Returns CoderResult.UNDERFLOW if the action succeeds.
+ * @stable ICU 3.6
+ */
+ protected final CoderResult implFlush(CharBuffer out) {
+ return decode(EMPTY, out, null, true);
+ }
+
+ /**
+ * Resets the to Unicode mode of converter
+ * @stable ICU 3.6
+ */
+ protected void implReset() {
+ toUnicodeStatus = 0 ;
+ toULength = 0;
+ charErrorBufferLength = 0;
+ charErrorBufferBegin = 0;
+
+ /* store previous UChars/chars to continue partial matches */
+ preToUBegin = 0;
+ preToULength = 0; /* negative: replay */
+ preToUFirstLength = 0;
+
+ mode = 0;
+ }
+
+ /**
+ * Decodes one or more bytes. The default behaviour of the converter
+ * is stop and report if an error in input stream is encountered.
+ * To set different behaviour use @see CharsetDecoder.onMalformedInput()
+ * This method allows a buffer by buffer conversion of a data stream.
+ * The state of the conversion is saved between calls to convert.
+ * Among other things, this means multibyte input sequences can be
+ * split between calls. If a call to convert results in an Error, the
+ * conversion may be continued by calling convert again with suitably
+ * modified parameters.All conversions should be finished with a call to
+ * the flush method.
+ * @param in buffer to decode
+ * @param out buffer to populate with decoded result
+ * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @stable ICU 3.6
+ */
+ protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){
+ if(in.remaining() < toUCountPending()){
+ return CoderResult.UNDERFLOW;
+ }
+// if (!in.hasRemaining()) {
+// toULength = 0;
+// return CoderResult.UNDERFLOW;
+// }
+
+ in.position(in.position() + toUCountPending());
+
+ /* do the conversion */
+ CoderResult ret = decode(in, out, null, false);
+
+ // ok was there input held in the previous invocation of decodeLoop
+ // that resulted in output in this invocation?
+ in.position(in.position() - toUCountPending());
+
+ return ret;
+ }
+
+ /**
+ * Implements the ICU semantic for decode operation
+ * @param in The input byte buffer
+ * @param out The output character buffer
+ * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets, boolean flush);
+
+ /**
+ * Implements the ICU semantic for decode operation
+ * @param source The input byte buffer
+ * @param target The output character buffer
+ * @param offsets
+ * @param flush true if, and only if, the invoker can provide no
+ * additional input bytes beyond those in the given buffer.
+ * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+
+ /* check parameters */
+ if (target == null || source == null) {
+ throw new IllegalArgumentException();
+ }
+
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+ */
+ /*agljport:fix
+ if(
+ ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
+ ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t)
+ ) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ */
+
+ /* flush the target overflow buffer */
+ if (charErrorBufferLength > 0) {
+ int i = 0;
+ do {
+ if (!target.hasRemaining()) {
+ /* the overflow buffer contains too much, keep the rest */
+ int j = 0;
+
+ do {
+ charErrorBufferArray[j++] = charErrorBufferArray[i++];
+ } while (i < charErrorBufferLength);
+
+ charErrorBufferLength = (byte) j;
+ return CoderResult.OVERFLOW;
+ }
+
+ /* copy the overflow contents to the target */
+ target.put(charErrorBufferArray[i++]);
+ if (offsets != null) {
+ offsets.put(-1); /* no source index available for old output */
+ }
+ } while (i < charErrorBufferLength);
+
+ /* the overflow buffer is completely copied to the target */
+ charErrorBufferLength = 0;
+ }
+
+ if (!flush && !source.hasRemaining() && preToULength >= 0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return CoderResult.UNDERFLOW;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ return toUnicodeWithCallback(source, target, offsets, flush);
+ }
+
+ private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) {
+ int limit;
+ int delta, offset;
+
+ if(sourceIndex>=0) {
+ /*
+ * adjust each offset by adding the previous sourceIndex
+ * minus the length of the input sequence that caused an
+ * error, if any
+ */
+ delta=sourceIndex-errorInputLength;
+ } else {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ */
+ delta=-1;
+ }
+ limit=offsets.position()+length;
+ if(delta==0) {
+ /* most common case, nothing to do */
+ } else if(delta>0) {
+ /* add the delta to each offset (but not if the offset is <0) */
+ while(offsets.position()<limit) {
+ offset=offsets.get(offsets.position());
+ if(offset>=0) {
+ offsets.put(offset+delta);
+ }
+ //FIXME: ++offsets;
+ }
+ } else /* delta<0 */ {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ * or the error input sequence started in a previous buffer
+ */
+ while(offsets.position()<limit) {
+ offsets.put(-1);
+ }
+ }
+ }
+ final CoderResult toUnicodeWithCallback(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+
+ int sourceIndex;
+ int errorInputLength;
+ boolean converterSawEndOfInput, calledCallback;
+ int t=target.position();
+ int s=source.position();
+ /* variables for m:n conversion */
+ ByteBuffer replayArray = ByteBuffer.allocate(EXT_MAX_BYTES);
+ int replayArrayIndex = 0;
+
+ ByteBuffer realSource=null;
+ boolean realFlush=false;
+ int realSourceIndex=0;
+
+
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* get the converter implementation function */
+ sourceIndex=0;
+
+ if(preToULength>=0) {
+ /* normal mode */
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=source;
+ realFlush=flush;
+ realSourceIndex=sourceIndex;
+ //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
+ replayArray.put(preToUArray,0, -preToULength);
+ source=replayArray;
+ source.position(0);
+ source.limit(replayArrayIndex-preToULength);
+ flush=false;
+ sourceIndex=-1;
+ preToULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+ if(cr.isUnderflow()) {
+ /* convert */
+ cr = decodeLoop(source, target, offsets, flush);
+
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv->preToULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput= (cr.isUnderflow() && flush && source.remaining()==0 && toULength == 0);
+ } else {
+ /* handle error from getNextUChar() */
+ converterSawEndOfInput=false;
+ }
+
+ /* no callback called yet for this iteration */
+ calledCallback=false;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength=0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for(;;) {
+ /* update offsets if we write any */
+ if(offsets!=null) {
+
+ int length=(target.position()-t);
+ if(length>0) {
+ updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then pArgs->offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ //TODO: pArgs->offsets=offsets+=length;
+ }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(source.position()-s);
+ }
+
+ }
+
+ if(preToULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==null)
+ {
+ realSource=source;
+ realFlush=flush;
+ realSourceIndex=sourceIndex;
+
+ //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
+ replayArray.put(preToUArray,0, -preToULength);
+
+ source=replayArray;
+ source.limit(replayArrayIndex-preToULength);
+ flush=false;
+ if((sourceIndex+=preToULength)<0) {
+ sourceIndex=-1;
+ }
+
+ preToULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ //agljport:todo U_ASSERT(realSource==NULL);
+ Assert.assrt(realSource==null);
+ }
+ }
+
+ /* update pointers */
+ s=source.position();
+ t=target.position();
+
+ if(cr.isUnderflow()) {
+ if(s<source.limit())
+ {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if(realSource!=null) {
+ /* switch back from replaying to the real source and continue */
+ source = realSource;
+ flush=realFlush;
+ sourceIndex=realSourceIndex;
+ realSource=null;
+ break;
+ } else if(flush && toULength>0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ cr = CoderResult.malformedForLength(toULength);
+ calledCallback=false; /* new error condition */
+ } else {
+ /* input consumed */
+ if(flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if(!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ implReset();
+ }
+
+ /* done successfully */
+ return cr;
+ }
+ }
+
+ /* U_FAILURE(*err) */
+ {
+
+ if( calledCallback || cr.isOverflow() ||
+ (cr.isMalformed() && cr.isUnmappable())
+ ) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if(realSource!=null) {
+ int length;
+ Assert.assrt(preToULength==0);
+ length=(int)(source.limit()-source.position());
+ if(length>0) {
+ //UConverterUtility.uprv_memcpy(preToUArray, preToUBegin, pArgs.sourceArray, pArgs.sourceBegin, length);
+ source.get(preToUArray, preToUBegin, length);
+ preToULength=(byte)-length;
+ }
+
+ source=realSource;
+ flush=realFlush;
+ }
+ return cr;
+ }
+ }
+
+ /* copy toUBytes[] to invalidCharBuffer[] */
+ errorInputLength=invalidCharLength=toULength;
+ if(errorInputLength>0) {
+ copy(toUBytesArray, 0, invalidCharBuffer, 0, errorInputLength);
+ }
+
+ /* set the converter state to deal with the next character */
+ toULength=0;
+
+ /* call the callback function */
+ cr = toCharErrorBehaviour.call(this, toUContext, source, target, offsets, invalidCharBuffer, errorInputLength, cr);
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback=true;
+ }
+ }
+ }
+
+ /**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @draft ICU 3.6
+ */
+ /*public*/ int toUCountPending() {
+ if(preToULength > 0){
+ return preToULength ;
+ } else if(preToULength < 0){
+ return -preToULength;
+ } else if(toULength > 0){
+ return toULength;
+ } else {
+ return 0;
+ }
+ }
+
+
+ private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) {
+ for(int i=srcOffset; i<length; i++){
+ dst[dstOffset++]=(char)src[srcOffset++];
+ }
+ }
+ /**
+ * ONLY used by ToU callback functions.
+ * This function will write out the specified characters to the target
+ * character buffer.
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ static final CoderResult toUWriteUChars( CharsetDecoderICU cnv,
+ char[] ucharsArray, int ucharsBegin, int length,
+ CharBuffer target, IntBuffer offsets, int sourceIndex) {
+
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* write UChars */
+ if(offsets==null) {
+ while(length>0 && target.hasRemaining()) {
+ target.put(ucharsArray[ucharsBegin++]);
+ --length;
+ }
+
+ } else {
+ /* output with offsets */
+ while(length>0 && target.hasRemaining()) {
+ target.put(ucharsArray[ucharsBegin++]);
+ offsets.put(sourceIndex);
+ --length;
+ }
+ }
+ /* write overflow */
+ if(length>0) {
+ cnv.charErrorBufferLength= 0;
+ cr = CoderResult.OVERFLOW;
+ do {
+ cnv.charErrorBufferArray[cnv.charErrorBufferLength++]=ucharsArray[ucharsBegin++];
+ } while(--length>0);
+ }
+ return cr;
+ }
+ /**
+ * This function will write out the Unicode substitution character to the
+ * target character buffer.
+ * Sub classes to override this method if required
+ * @param decoder
+ * @param source
+ * @param target
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ CoderResult cbToUWriteSub(CharsetDecoderICU decoder,
+ ByteBuffer source, CharBuffer target,
+ IntBuffer offsets){
+ String sub = decoder.replacement();
+ CharsetICU cs = (CharsetICU) decoder.charset();
+ if (decoder.invalidCharLength==1 && cs.subChar1 != 0x00) {
+ char[] subArr = new char[] { 0x1a };
+ return CharsetDecoderICU.toUWriteUChars(decoder, subArr, 0, sub
+ .length(), target, offsets, source.position());
+ } else {
+ return CharsetDecoderICU.toUWriteUChars(decoder, sub.toCharArray(),
+ 0, sub.length(), target, offsets, source.position());
+
+ }
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetEncoderICU.java b/src/com/ibm/icu/charset/CharsetEncoderICU.java
new file mode 100644
index 0000000..0a838f0
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetEncoderICU.java
@@ -0,0 +1,942 @@
+/**
+ *******************************************************************************
+ * Copyright (C) 2006-2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ *
+ *******************************************************************************
+ */
+
+package com.ibm.icu.charset;
+
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import com.ibm.icu.impl.Assert;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+
+/**
+ * An abstract class that provides framework methods of decoding operations for concrete
+ * subclasses.
+ * In the future this class will contain API that will implement converter sematics of ICU4C.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+public abstract class CharsetEncoderICU extends CharsetEncoder {
+
+ /* this is used in fromUnicode DBCS tables as an "unassigned" marker */
+ static final char MISSING_CHAR_MARKER = '\uFFFF';
+
+ byte[] errorBuffer = new byte[30];
+
+ int errorBufferLength = 0;
+
+ /** these are for encodeLoopICU */
+ int fromUnicodeStatus;
+
+ int fromUChar32;
+
+ boolean useSubChar1;
+
+ boolean useFallback;
+
+ /* maximum number of indexed UChars */
+ static final int EXT_MAX_UCHARS = 19;
+
+ /* store previous UChars/chars to continue partial matches */
+ int preFromUFirstCP; /* >=0: partial match */
+
+ char[] preFromUArray = new char[EXT_MAX_UCHARS];
+
+ int preFromUBegin;
+
+ int preFromULength; /* negative: replay */
+
+ char[] invalidUCharBuffer = new char[2];
+
+ int invalidUCharLength;
+
+ Object fromUContext;
+
+ private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;
+
+ private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
+
+ CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr) {
+ if (cr.isUnmappable()) {
+ return onUnmappableInput.call(encoder, context, source, target,
+ offsets, buffer, length, cp, cr);
+ } else if (cr.isMalformed()) {
+ return onMalformedInput.call(encoder, context, source, target,
+ offsets, buffer, length, cp, cr);
+ }
+ return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context,
+ source, target, offsets, buffer, length, cp, cr);
+
+ }
+ };
+
+ /**
+ * Construcs a new encoder for the given charset
+ *
+ * @param cs
+ * for which the decoder is created
+ * @param replacement
+ * the substitution bytes
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
+ super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2,
+ cs.maxBytesPerChar, replacement);
+ }
+
+ /**
+ * Is this Encoder allowed to use fallbacks? A fallback mapping is a mapping
+ * that will convert a Unicode codepoint sequence to a byte sequence, but
+ * the encoded byte sequence will round trip convert to a different
+ * Unicode codepoint sequence.
+ * @return true if the converter uses fallback, false otherwise.
+ * @draft ICU 3.8
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean isFallbackUsed() {
+ return useFallback;
+ }
+
+ /**
+ * Sets whether this Encoder can use fallbacks?
+ * @param usesFallback true if the user wants the converter to take
+ * advantage of the fallback mapping, false otherwise.
+ * @draft ICU 3.8
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void setFallbackUsed(boolean usesFallback) {
+ useFallback = usesFallback;
+ }
+
+ /**
+ * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
+ * @param c A codepoint
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ final boolean isFromUUseFallback(int c) {
+ return (useFallback)
+ || (UCharacter.getType(c) == UCharacter.PRIVATE_USE);
+ }
+
+ /**
+ * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
+ */
+ static final boolean isFromUUseFallback(boolean iUseFallback, int c) {
+ return (iUseFallback)
+ || (UCharacter.getType(c) == UCharacter.PRIVATE_USE);
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ *
+ * @param newAction
+ * action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected void implOnMalformedInput(CodingErrorAction newAction) {
+ onMalformedInput = getCallback(newAction);
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ *
+ * @param newAction
+ * action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
+ onUnmappableInput = getCallback(newAction);
+ }
+
+ /**
+ * Sets the callback encoder method and context to be used if an illegal sequence is encounterd.
+ * You would normally call this twice to set both the malform and unmappable error. In this case,
+ * newContext should remain the same since using a different newContext each time will negate the last
+ * one used.
+ * @param err CoderResult
+ * @param newCallback CharsetCallback.Encoder
+ * @param newContext Object
+ */
+ public final void setFromUCallback(CoderResult err, CharsetCallback.Encoder newCallback, Object newContext) {
+ if (err.isMalformed()) {
+ onMalformedInput = newCallback;
+ } else if (err.isUnmappable()) {
+ onUnmappableInput = newCallback;
+ } else {
+ /* Error: Only malformed and unmappable are handled. */
+ }
+
+ if (fromUContext == null || !fromUContext.equals(newContext)) {
+ fromUContext = newContext;
+ }
+ }
+
+ /**
+ * Sets fromUContext used in callbacks.
+ *
+ * @param newContext Object
+ * @exception IllegalArgumentException
+ * @draft ICU 4.0
+ */
+ public final void setFromUContext(Object newContext) {
+ fromUContext = newContext;
+ }
+
+ private static CharsetCallback.Encoder getCallback(CodingErrorAction action) {
+ if (action == CodingErrorAction.REPLACE) {
+ return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
+ } else if (action == CodingErrorAction.IGNORE) {
+ return CharsetCallback.FROM_U_CALLBACK_SKIP;
+ } else if (action == CodingErrorAction.REPORT) {
+ return CharsetCallback.FROM_U_CALLBACK_STOP;
+ }
+ return CharsetCallback.FROM_U_CALLBACK_STOP;
+ }
+
+ private static final CharBuffer EMPTY = CharBuffer.allocate(0);
+
+ /**
+ * Flushes any characters saved in the converter's internal buffer and
+ * resets the converter.
+ * @param out action to be taken
+ * @return result of flushing action and completes the decoding all input.
+ * Returns CoderResult.UNDERFLOW if the action succeeds.
+ * @stable ICU 3.6
+ */
+ protected CoderResult implFlush(ByteBuffer out) {
+ return encode(EMPTY, out, null, true);
+ }
+
+ /**
+ * Resets the from Unicode mode of converter
+ * @stable ICU 3.6
+ */
+ protected void implReset() {
+ errorBufferLength = 0;
+ fromUnicodeStatus = 0;
+ fromUChar32 = 0;
+ fromUnicodeReset();
+ }
+
+ private void fromUnicodeReset() {
+ preFromUBegin = 0;
+ preFromUFirstCP = UConverterConstants.U_SENTINEL;
+ preFromULength = 0;
+ }
+
+ /**
+ * Encodes one or more chars. The default behaviour of the
+ * converter is stop and report if an error in input stream is encountered.
+ * To set different behaviour use @see CharsetEncoder.onMalformedInput()
+ * @param in buffer to decode
+ * @param out buffer to populate with decoded result
+ * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @stable ICU 3.6
+ */
+ protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+ if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty
+ // The Java framework should have already substituted what was left.
+ fromUChar32 = 0;
+ //fromUnicodeReset();
+ return CoderResult.UNDERFLOW;
+ }
+ in.position(in.position() + fromUCountPending());
+ /* do the conversion */
+ CoderResult ret = encode(in, out, null, false);
+ setSourcePosition(in);
+ if (ret.isUnderflow() && in.hasRemaining()) {
+ // The Java framework is going to substitute what is left.
+ fromUnicodeReset();
+ }
+ return ret;
+ }
+
+ /**
+ * Implements ICU semantics of buffer management
+ * @param source
+ * @param target
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target,
+ IntBuffer offsets, boolean flush);
+
+ /**
+ * Implements ICU semantics for encoding the buffer
+ * @param source The input character buffer
+ * @param target The output byte buffer
+ * @param offsets
+ * @param flush true if, and only if, the invoker can provide no
+ * additional input bytes beyond those in the given buffer.
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ final CoderResult encode(CharBuffer source, ByteBuffer target,
+ IntBuffer offsets, boolean flush) {
+
+ /* check parameters */
+ if (target == null || source == null) {
+ throw new IllegalArgumentException();
+ }
+
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be targetLimit=t+0x7fffffff; for example.
+ */
+
+ /* flush the target overflow buffer */
+ if (errorBufferLength > 0) {
+ byte[] overflowArray;
+ int i, length;
+
+ overflowArray = errorBuffer;
+ length = errorBufferLength;
+ i = 0;
+ do {
+ if (target.remaining() == 0) {
+ /* the overflow buffer contains too much, keep the rest */
+ int j = 0;
+
+ do {
+ overflowArray[j++] = overflowArray[i++];
+ } while (i < length);
+
+ errorBufferLength = (byte) j;
+ return CoderResult.OVERFLOW;
+ }
+
+ /* copy the overflow contents to the target */
+ target.put(overflowArray[i++]);
+ if (offsets != null) {
+ offsets.put(-1); /* no source index available for old output */
+ }
+ } while (i < length);
+
+ /* the overflow buffer is completely copied to the target */
+ errorBufferLength = 0;
+ }
+
+ if (!flush && source.remaining() == 0 && preFromULength >= 0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return CoderResult.UNDERFLOW;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ return fromUnicodeWithCallback(source, target, offsets, flush);
+
+ }
+
+ /**
+ * Implementation note for m:n conversions
+ *
+ * While collecting source units to find the longest match for m:n conversion,
+ * some source units may need to be stored for a partial match.
+ * When a second buffer does not yield a match on all of the previously stored
+ * source units, then they must be "replayed", i.e., fed back into the converter.
+ *
+ * The code relies on the fact that replaying will not nest -
+ * converting a replay buffer will not result in a replay.
+ * This is because a replay is necessary only after the _continuation_ of a
+ * partial match failed, but a replay buffer is converted as a whole.
+ * It may result in some of its units being stored again for a partial match,
+ * but there will not be a continuation _during_ the replay which could fail.
+ *
+ * It is conceivable that a callback function could call the converter
+ * recursively in a way that causes another replay to be stored, but that
+ * would be an error in the callback function.
+ * Such violations will cause assertion failures in a debug build,
+ * and wrong output, but they will not cause a crash.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ final CoderResult fromUnicodeWithCallback(CharBuffer source,
+ ByteBuffer target, IntBuffer offsets, boolean flush) {
+ int sBufferIndex;
+ int sourceIndex;
+ int errorInputLength;
+ boolean converterSawEndOfInput, calledCallback;
+
+ /* variables for m:n conversion */
+ CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
+ int replayArrayIndex = 0;
+ CharBuffer realSource;
+ boolean realFlush;
+
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* get the converter implementation function */
+ sourceIndex = 0;
+
+ if (preFromULength >= 0) {
+ /* normal mode */
+ realSource = null;
+ realFlush = false;
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource = source;
+ realFlush = flush;
+
+ //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
+ replayArray.put(preFromUArray, 0, -preFromULength);
+ source.position(replayArrayIndex);
+ source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration
+ source = replayArray;
+ flush = false;
+
+ preFromULength = 0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for (;;) {
+ /* convert */
+ cr = encodeLoop(source, target, offsets, flush);
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv.preFromULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput = (boolean) (cr.isUnderflow() && flush
+ && source.remaining() == 0 && fromUChar32 == 0);
+
+ /* no callback called yet for this iteration */
+ calledCallback = false;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength = 0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for (;;) {
+ /* update offsets if we write any */
+ if (offsets != null) {
+ int length = target.remaining();
+ if (length > 0) {
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ offsets.position(offsets.position() + length);
+ }
+
+ if (sourceIndex >= 0) {
+ sourceIndex += (int) (source.position());
+ }
+ }
+
+ if (preFromULength < 0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if (realSource == null) {
+ realSource = source;
+ realFlush = flush;
+
+ //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
+ replayArray.put(preFromUArray, 0, -preFromULength);
+
+ source = replayArray;
+ source.position(replayArrayIndex);
+ source.limit(replayArrayIndex - preFromULength);
+ flush = false;
+ if ((sourceIndex += preFromULength) < 0) {
+ sourceIndex = -1;
+ }
+
+ preFromULength = 0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ //agljport:todo U_ASSERT(realSource==NULL);
+ Assert.assrt(realSource == null);
+ }
+ }
+
+ /* update pointers */
+ sBufferIndex = source.position();
+ if (cr.isUnderflow()) {
+ if (sBufferIndex < source.limit()) {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if (realSource != null) {
+ /* switch back from replaying to the real source and continue */
+ source = realSource;
+ flush = realFlush;
+ sourceIndex = source.position();
+ realSource = null;
+ break;
+ } else if (flush && fromUChar32 != 0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
+ cr = CoderResult.malformedForLength(1);
+ calledCallback = false; /* new error condition */
+ } else {
+ /* input consumed */
+ if (flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if (!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ implReset();
+ }
+
+ /* done successfully */
+ return cr;
+ }
+ }
+
+ /*U_FAILURE(*err) */
+ {
+
+ if (calledCallback || cr.isOverflow()
+ || (!cr.isMalformed() && !cr.isUnmappable())) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if (realSource != null) {
+ int length;
+
+ //agljport:todo U_ASSERT(cnv.preFromULength==0);
+
+ length = source.remaining();
+ if (length > 0) {
+ //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
+ source.get(preFromUArray, 0, length);
+ preFromULength = (byte) -length;
+ }
+ source = realSource;
+ flush = realFlush;
+ }
+ return cr;
+ }
+ }
+
+ /* callback handling */
+ {
+ int codePoint;
+
+ /* get and write the code point */
+ codePoint = fromUChar32;
+ errorInputLength = UTF16.append(invalidUCharBuffer, 0,
+ fromUChar32);
+ invalidUCharLength = errorInputLength;
+
+ /* set the converter state to deal with the next character */
+ fromUChar32 = 0;
+
+ /* call the callback function */
+ cr = fromCharErrorBehaviour.call(this, fromUContext,
+ source, target, offsets, invalidUCharBuffer,
+ invalidUCharLength, codePoint, cr);
+ }
+
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback = true;
+ }
+ }
+ }
+
+ /*
+ * Ascertains if a given Unicode code point (32bit value for handling surrogates)
+ * can be converted to the target encoding. If the caller wants to test if a
+ * surrogate pair can be converted to target encoding then the
+ * responsibility of assembling the int value lies with the caller.
+ * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
+ * <pre>
+ * while(i<mySource.length){
+ * if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
+ * if(UTF16.isTrailSurrogate(mySource[i+1])){
+ * int temp = UTF16.charAt(mySource,i,i+1,0);
+ * if(!((CharsetEncoderICU) myConv).canEncode(temp)){
+ * passed=false;
+ * }
+ * i++;
+ * i++;
+ * }
+ * }
+ * }
+ * </pre>
+ * or
+ * <pre>
+ * String src = new String(mySource);
+ * int i,codepoint;
+ * boolean passed = false;
+ * while(i<src.length()){
+ * codepoint = UTF16.charAt(src,i);
+ * i+= (codepoint>0xfff)? 2:1;
+ * if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
+ * passed = false;
+ * }
+ * }
+ * </pre>
+ *
+ * @param codepoint Unicode code point as int value
+ * @return true if a character can be converted
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /* TODO This is different from Java's canEncode(char) API.
+ * ICU's API should implement getUnicodeSet,
+ * and override canEncode(char) which queries getUnicodeSet.
+ * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C.
+ */
+ /*public boolean canEncode(int codepoint) {
+ return true;
+ }*/
+ /**
+ * Overrides super class method
+ * @stable ICU 3.6
+ */
+ public boolean isLegalReplacement(byte[] repl) {
+ return true;
+ }
+
+ /**
+ * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
+ * @param cnv
+ * @param bytesArray
+ * @param bytesBegin
+ * @param bytesLength
+ * @param out
+ * @param offsets
+ * @param sourceIndex
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
+ byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out,
+ IntBuffer offsets, int sourceIndex) {
+
+ //write bytes
+ int obl = bytesLength;
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int bytesLimit = bytesBegin + bytesLength;
+ try {
+ for (; bytesBegin < bytesLimit;) {
+ out.put(bytesArray[bytesBegin]);
+ bytesBegin++;
+ }
+ // success
+ bytesLength = 0;
+ } catch (BufferOverflowException ex) {
+ cr = CoderResult.OVERFLOW;
+ }
+
+ if (offsets != null) {
+ while (obl > bytesLength) {
+ offsets.put(sourceIndex);
+ --obl;
+ }
+ }
+ //write overflow
+ cnv.errorBufferLength = bytesLimit - bytesBegin;
+ if (cnv.errorBufferLength > 0) {
+ int index = 0;
+ while (bytesBegin < bytesLimit) {
+ cnv.errorBuffer[index++] = bytesArray[bytesBegin++];
+ }
+ cr = CoderResult.OVERFLOW;
+ }
+ return cr;
+ }
+
+ /**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @draft ICU 3.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/int fromUCountPending() {
+ if (preFromULength > 0) {
+ return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
+ } else if (preFromULength < 0) {
+ return -preFromULength;
+ } else if (fromUChar32 > 0) {
+ return 1;
+ } else if (preFromUFirstCP > 0) {
+ return UTF16.getCharCount(preFromUFirstCP);
+ }
+ return 0;
+ }
+
+ /**
+ *
+ * @param source
+ */
+ private final void setSourcePosition(CharBuffer source) {
+
+ // ok was there input held in the previous invocation of decodeLoop
+ // that resulted in output in this invocation?
+ source.position(source.position() - fromUCountPending());
+ }
+
+ /**
+ * Write the codepage substitution character.
+ * Subclasses to override this method.
+ * For stateful converters, it is typically necessary to handle this
+ * specificially for the converter in order to properly maintain the state.
+ * @param source The input character buffer
+ * @param target The output byte buffer
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source,
+ ByteBuffer target, IntBuffer offsets) {
+ CharsetICU cs = (CharsetICU) encoder.charset();
+ byte[] sub = encoder.replacement();
+ if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
+ return CharsetEncoderICU.fromUWriteBytes(encoder,
+ new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
+ .position());
+ } else {
+ return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
+ sub.length, target, offsets, source.position());
+ }
+ }
+
+ /**
+ * Write the characters to target.
+ * @param source The input character buffer
+ * @param target The output byte buffer
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 4.0
+ * @provisional This API might change or be removed in a future release.
+ * */
+ CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* This is a fun one. Recursion can occur - we're basically going to
+ * just retry shoving data through the same converter. Note, if you got
+ * here through some kind of invalid sequence, you maybe should emit a
+ * reset sequence of some kind. Since this IS an actual conversion,
+ * take care that you've changed the callback or the data, or you'll
+ * get an infinite loop.
+ */
+
+ int oldTargetPosition = target.position();
+ int offsetIndex = source.position();
+
+ cr = encoder.encode(source, target, null, false); /* no offsets and no flush */
+
+ if (offsets != null) {
+ while (target.position() != oldTargetPosition) {
+ offsets.put(offsetIndex);
+ oldTargetPosition++;
+ }
+ }
+
+ /* Note, if you did something like used a stop subcallback, things would get interesting.
+ * In fact, here's where we want to return the partially consumed in-source!
+ */
+ if (cr.isOverflow()) {
+ /* Overflowed target. Now, we'll write into the charErrorBuffer.
+ * It's a fixed size. If we overflow it...Hm
+ */
+
+ /* start the new target at the first free slot in the error buffer */
+ int errBuffLen = encoder.errorBufferLength;
+ ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer);
+ newTarget.position(errBuffLen); /* set the position at the end of the error buffer */
+ encoder.errorBufferLength = 0;
+
+ encoder.encode(source, newTarget, null, false);
+
+ encoder.errorBuffer = newTarget.array();
+ encoder.errorBufferLength = newTarget.position();
+ }
+
+ return cr;
+ }
+
+ /**
+ * <p>
+ * Handles a common situation where a character has been read and it may be
+ * a lead surrogate followed by a trail surrogate. This method can change
+ * the source position and will modify fromUChar32.
+ * </p>
+ *
+ * <p>
+ * If <code>null</code> is returned, then there was success in reading a
+ * surrogate pair, the codepoint is stored in <code>fromUChar32</code> and
+ * <code>fromUChar32</code> should be reset (to 0) after being read.
+ * </p>
+ *
+ * @param source
+ * The encoding source.
+ * @param lead
+ * A character that may be the first in a surrogate pair.
+ * @return <code>CoderResult.malformedForLength(1)</code> or
+ * <code>CoderResult.UNDERFLOW</code> if there is a problem, or
+ * <code>null</code> if there isn't.
+ * @see handleSurrogates(CharBuffer, char)
+ * @see handleSurrogates(CharBuffer, int, char)
+ * @see handleSurrogates(char[], int, int, char)
+ */
+ final CoderResult handleSurrogates(CharBuffer source, char lead) {
+ if (!UTF16.isLeadSurrogate(lead)) {
+ fromUChar32 = lead;
+ return CoderResult.malformedForLength(1);
+ }
+
+ if (!source.hasRemaining()) {
+ fromUChar32 = lead;
+ return CoderResult.UNDERFLOW;
+ }
+
+ char trail = source.get();
+
+ if (!UTF16.isTrailSurrogate(trail)) {
+ fromUChar32 = lead;
+ source.position(source.position() - 1);
+ return CoderResult.malformedForLength(1);
+ }
+
+ fromUChar32 = UCharacter.getCodePoint(lead, trail);
+ return null;
+ }
+
+ /**
+ * <p>
+ * Same as <code>handleSurrogates(CharBuffer, char)</code>, but with arrays. As an added
+ * requirement, the calling method must also increment the index if this method returns
+ * <code>null</code>.
+ * </p>
+ *
+ *
+ * @param source
+ * The encoding source.
+ * @param lead
+ * A character that may be the first in a surrogate pair.
+ * @return <code>CoderResult.malformedForLength(1)</code> or
+ * <code>CoderResult.UNDERFLOW</code> if there is a problem, or <code>null</code> if
+ * there isn't.
+ * @see handleSurrogates(CharBuffer, char)
+ * @see handleSurrogates(CharBuffer, int, char)
+ * @see handleSurrogates(char[], int, int, char)
+ */
+ final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex,
+ int sourceLimit, char lead) {
+ if (!UTF16.isLeadSurrogate(lead)) {
+ fromUChar32 = lead;
+ return CoderResult.malformedForLength(1);
+ }
+
+ if (sourceIndex >= sourceLimit) {
+ fromUChar32 = lead;
+ return CoderResult.UNDERFLOW;
+ }
+
+ char trail = sourceArray[sourceIndex];
+
+ if (!UTF16.isTrailSurrogate(trail)) {
+ fromUChar32 = lead;
+ return CoderResult.malformedForLength(1);
+ }
+
+ fromUChar32 = UCharacter.getCodePoint(lead, trail);
+ return null;
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetHZ.java b/src/com/ibm/icu/charset/CharsetHZ.java
new file mode 100644
index 0000000..1b19a03
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetHZ.java
@@ -0,0 +1,345 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+
+public class CharsetHZ extends CharsetICU {
+
+ private static final int UCNV_TILDE = 0x7E; /* ~ */
+ private static final int UCNV_OPEN_BRACE = 0x7B; /* { */
+ private static final int UCNV_CLOSE_BRACE = 0x7D; /* } */
+ private static final byte[] SB_ESCAPE = new byte[] { 0x7E, 0x7D };
+ private static final byte[] DB_ESCAPE = new byte[] { 0x7E, 0x7B };
+ private static final byte[] TILDE_ESCAPE = new byte[] { 0x7E, 0x7E };
+ private static final byte[] fromUSubstitution = new byte[] { (byte) 0x1A };
+
+ private CharsetMBCS gbCharset;
+ private boolean isEmptySegment;
+
+ public CharsetHZ(String icuCanonicalName, String canonicalName, String[] aliases) {
+ super(icuCanonicalName, canonicalName, aliases);
+ gbCharset = (CharsetMBCS) new CharsetProviderICU().charsetForName("GBK");
+
+ maxBytesPerChar = 4;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+
+ isEmptySegment = false;
+ }
+
+ class CharsetDecoderHZ extends CharsetDecoderICU {
+ CharsetMBCS.CharsetDecoderMBCS gbDecoder;
+ boolean isStateDBCS = false;
+
+ public CharsetDecoderHZ(CharsetICU cs) {
+ super(cs);
+ gbDecoder = (CharsetMBCS.CharsetDecoderMBCS) gbCharset.newDecoder();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ gbDecoder.implReset();
+
+ isStateDBCS = false;
+ isEmptySegment = false;
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ byte[] tempBuf = new byte[2];
+ int targetUniChar = 0;
+ int mySourceChar = 0;
+
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ else if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ while (source.hasRemaining()) {
+
+ if (target.hasRemaining()) {
+
+ // get the byte as unsigned
+ mySourceChar = source.get() & 0xff;
+
+ if (mode == UCNV_TILDE) {
+ /* second byte after ~ */
+ mode = 0;
+ switch (mySourceChar) {
+ case 0x0A:
+ /* no output for ~\n (line-continuation marker) */
+ continue;
+ case UCNV_TILDE:
+ if (offsets != null) {
+ offsets.put(source.position() - 2);
+ }
+ target.put((char) mySourceChar);
+ continue;
+ case UCNV_OPEN_BRACE:
+ case UCNV_CLOSE_BRACE:
+ isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
+ if (isEmptySegment) {
+ isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
+ this.toUBytesArray[0] = UCNV_TILDE;
+ this.toUBytesArray[1] = (byte)mySourceChar;
+ this.toULength = 2;
+ return CoderResult.malformedForLength(1);
+ }
+ isEmptySegment = true;
+ continue;
+ default:
+ /*
+ * if the first byte is equal to TILDE and the trail byte is not a valid byte then it is an
+ * error condition
+ */
+ mySourceChar |= 0x7e00;
+ targetUniChar = 0xffff;
+ isEmptySegment = false; /* different error here, reset this to avoid spurious future error */
+ break;
+ }
+ } else if (isStateDBCS) {
+ if (toUnicodeStatus == 0) {
+ /* lead byte */
+ if (mySourceChar == UCNV_TILDE) {
+ mode = UCNV_TILDE;
+ } else {
+ /*
+ * add another bit to distinguish a 0 byte from not having seen a lead byte
+ */
+ toUnicodeStatus = mySourceChar | 0x100;
+ isEmptySegment = false; /* the segment has something, either valid or will produce a different error, so reset this */
+ }
+ continue;
+ } else {
+ /* trail byte */
+ int leadByte = toUnicodeStatus & 0xff;
+ if (0x21 <= leadByte && leadByte <= 0x7d && 0x21 <= mySourceChar && mySourceChar <= 0x7e) {
+ tempBuf[0] = (byte) (leadByte + 0x80);
+ tempBuf[1] = (byte) (mySourceChar + 0x80);
+ targetUniChar = gbDecoder.simpleGetNextUChar(tempBuf, super.isFallbackUsed());
+ } else {
+ targetUniChar = 0xffff;
+ }
+ /*
+ * add another bit so that the code below writes 2 bytes in case of error
+ */
+ mySourceChar |= 0x10000 | (leadByte << 8);
+ toUnicodeStatus = 0;
+ }
+ } else {
+ if (mySourceChar == UCNV_TILDE) {
+ mode = UCNV_TILDE;
+ continue;
+ } else if (mySourceChar <= 0x7f) {
+ targetUniChar = mySourceChar; /* ASCII */
+ isEmptySegment = false; /* the segment has something valid */
+ } else {
+ targetUniChar = 0xffff;
+ isEmptySegment = false; /* different error here, reset this to avoid spurious future error */
+ }
+ }
+
+ if (targetUniChar < 0xfffe) {
+ if (offsets != null) {
+ offsets.put(source.position() - 1 - (isStateDBCS ? 1 : 0));
+ }
+
+ target.put((char) targetUniChar);
+ } else /* targetUniChar >= 0xfffe */{
+ if (mySourceChar > 0xff) {
+ toUBytesArray[toUBytesBegin + 0] = (byte) (mySourceChar >> 8);
+ toUBytesArray[toUBytesBegin + 1] = (byte) mySourceChar;
+ toULength = 2;
+ } else {
+ toUBytesArray[toUBytesBegin + 0] = (byte) mySourceChar;
+ toULength = 1;
+ }
+ if (targetUniChar == 0xfffe) {
+ return CoderResult.unmappableForLength(toULength);
+ } else {
+ return CoderResult.malformedForLength(toULength);
+ }
+ }
+ } else {
+ return CoderResult.OVERFLOW;
+ }
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ }
+
+ class CharsetEncoderHZ extends CharsetEncoderICU {
+ CharsetMBCS.CharsetEncoderMBCS gbEncoder;
+ boolean isEscapeAppended = false;
+ boolean isTargetUCharDBCS = false;
+
+ public CharsetEncoderHZ(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ gbEncoder = (CharsetMBCS.CharsetEncoderMBCS) gbCharset.newEncoder();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ gbEncoder.implReset();
+
+ isEscapeAppended = false;
+ isTargetUCharDBCS = false;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ int length = 0;
+ int[] targetUniChar = new int[] { 0 };
+ int mySourceChar = 0;
+ boolean oldIsTargetUCharDBCS = isTargetUCharDBCS;
+
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ else if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ if (fromUChar32 != 0 && target.hasRemaining()) {
+ CoderResult cr = handleSurrogates(source, (char) fromUChar32);
+ return (cr != null) ? cr : CoderResult.unmappableForLength(2);
+ }
+ /* writing the char to the output stream */
+ while (source.hasRemaining()) {
+ targetUniChar[0] = MISSING_CHAR_MARKER;
+ if (target.hasRemaining()) {
+
+ mySourceChar = source.get();
+
+ oldIsTargetUCharDBCS = isTargetUCharDBCS;
+ if (mySourceChar == UCNV_TILDE) {
+ /*
+ * concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);
+ */
+ concatEscape(source, target, offsets, TILDE_ESCAPE);
+ continue;
+ } else if (mySourceChar <= 0x7f) {
+ length = 1;
+ targetUniChar[0] = mySourceChar;
+ } else {
+ length = gbEncoder.fromUChar32(mySourceChar, targetUniChar, super.isFallbackUsed());
+
+ /*
+ * we can only use lead bytes 21..7D and trail bytes 21..7E
+ */
+ if (length == 2 && 0xa1a1 <= targetUniChar[0] && targetUniChar[0] <= 0xfdfe
+ && 0xa1 <= (targetUniChar[0] & 0xff) && (targetUniChar[0] & 0xff) <= 0xfe) {
+ targetUniChar[0] -= 0x8080;
+ } else {
+ targetUniChar[0] = MISSING_CHAR_MARKER;
+ }
+ }
+ if (targetUniChar[0] != MISSING_CHAR_MARKER) {
+ isTargetUCharDBCS = (targetUniChar[0] > 0x00FF);
+ if (oldIsTargetUCharDBCS != isTargetUCharDBCS || !isEscapeAppended) {
+ /* Shifting from a double byte to single byte mode */
+ if (!isTargetUCharDBCS) {
+ concatEscape(source, target, offsets, SB_ESCAPE);
+ isEscapeAppended = true;
+ } else { /*
+ * Shifting from a single byte to double byte mode
+ */
+ concatEscape(source, target, offsets, DB_ESCAPE);
+ isEscapeAppended = true;
+
+ }
+ }
+
+ if (isTargetUCharDBCS) {
+ if (target.hasRemaining()) {
+ target.put((byte) (targetUniChar[0] >> 8));
+ if (offsets != null) {
+ offsets.put(source.position() - 1);
+ }
+ if (target.hasRemaining()) {
+ target.put((byte) targetUniChar[0]);
+ if (offsets != null) {
+ offsets.put(source.position() - 1);
+ }
+ } else {
+ errorBuffer[errorBufferLength++] = (byte) targetUniChar[0];
+ // *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ } else {
+ errorBuffer[errorBufferLength++] = (byte) (targetUniChar[0] >> 8);
+ errorBuffer[errorBufferLength++] = (byte) targetUniChar[0];
+ // *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+
+ } else {
+ if (target.hasRemaining()) {
+ target.put((byte) targetUniChar[0]);
+ if (offsets != null) {
+ offsets.put(source.position() - 1);
+ }
+
+ } else {
+ errorBuffer[errorBufferLength++] = (byte) targetUniChar[0];
+ // *err = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+
+ } else {
+ /* oops.. the code point is unassigned */
+ /* Handle surrogates */
+ /* check if the char is a First surrogate */
+
+ if (UTF16.isSurrogate((char) mySourceChar)) {
+ // use that handy handleSurrogates method everyone's been talking about!
+ CoderResult cr = handleSurrogates(source, (char) mySourceChar);
+ return (cr != null) ? cr : CoderResult.unmappableForLength(2);
+ } else {
+ /* callback(unassigned) for a BMP code point */
+ // *err = U_INVALID_CHAR_FOUND;
+ fromUChar32 = mySourceChar;
+ return CoderResult.unmappableForLength(1);
+ }
+ }
+ } else {
+ // *err = U_BUFFER_OVERFLOW_ERROR;
+ return CoderResult.OVERFLOW;
+ }
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+
+ private CoderResult concatEscape(CharBuffer source, ByteBuffer target, IntBuffer offsets, byte[] strToAppend) {
+ CoderResult cr = null;
+ for (int i=0; i<strToAppend.length; i++) {
+ byte b = strToAppend[i];
+ if (target.hasRemaining()) {
+ target.put(b);
+ if (offsets != null)
+ offsets.put(source.position() - 1);
+ } else {
+ errorBuffer[errorBufferLength++] = b;
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+ return cr;
+ }
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderHZ(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderHZ(this);
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetICU.java b/src/com/ibm/icu/charset/CharsetICU.java
new file mode 100644
index 0000000..d97f079
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetICU.java
@@ -0,0 +1,328 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006-2008, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+//import java.io.ByteArrayInputStream;
+//import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+
+import java.lang.reflect.InvocationTargetException;
+import java.nio.charset.*;
+import java.util.HashMap;
+
+/**
+ * <p>A subclass of java.nio.Charset for providing implementation of ICU's charset converters.
+ * This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link Charset#forName } and {@link #forNameICU }. With that
+ * converter, you can get its properties, set options, convert your data.</p>
+ *
+ * <p>Since many software programs recogize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases.
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+public abstract class CharsetICU extends Charset{
+
+ String icuCanonicalName;
+ String javaCanonicalName;
+ int options;
+
+ float maxCharsPerByte;
+
+ String name; /* +4: 60 internal name of the converter- invariant chars */
+
+ int codepage; /* +64: 4 codepage # (now IBM-$codepage) */
+
+ byte platform; /* +68: 1 platform of the converter (only IBM now) */
+ byte conversionType; /* +69: 1 conversion type */
+
+ int minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
+ int maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
+
+ byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */
+ byte subCharLen; /* +76: 1 */
+
+ byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
+ byte hasFromUnicodeFallback; /* +78: 1 */
+ short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
+ byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
+ //byte reserved[/*19*/]; /* +81: 19 to round out the structure */
+
+
+ /**
+ *
+ * @param icuCanonicalName
+ * @param canonicalName
+ * @param aliases
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected CharsetICU(String icuCanonicalName, String canonicalName, String[] aliases) {
+ super(canonicalName,aliases);
+ if(canonicalName.length() == 0){
+ throw new IllegalCharsetNameException(canonicalName);
+ }
+ this.javaCanonicalName = canonicalName;
+ this.icuCanonicalName = icuCanonicalName;
+ }
+
+ /**
+ * Ascertains if a charset is a sub set of this charset
+ * Implements the abstract method of super class.
+ * @param cs charset to test
+ * @return true if the given charset is a subset of this charset
+ * @stable ICU 3.6
+ */
+ public boolean contains(Charset cs){
+ if (null == cs) {
+ return false;
+ } else if (this.equals(cs)) {
+ return true;
+ }
+ return false;
+ }
+ private static final HashMap algorithmicCharsets = new HashMap();
+ static{
+ /*algorithmicCharsets.put("BOCU-1", "com.ibm.icu.charset.CharsetBOCU1" );
+ algorithmicCharsets.put("lmbcs1", "com.ibm.icu.charset.CharsetLMBCS1" );
+ algorithmicCharsets.put("lmbcs11", "com.ibm.icu.charset.CharsetLMBCS11" );
+ algorithmicCharsets.put("lmbcs16", "com.ibm.icu.charset.CharsetLMBCS16" );
+ algorithmicCharsets.put("lmbcs17", "com.ibm.icu.charset.CharsetLMBCS17" );
+ algorithmicCharsets.put("lmbcs18", "com.ibm.icu.charset.CharsetLMBCS18" );
+ algorithmicCharsets.put("lmbcs19", "com.ibm.icu.charset.CharsetLMBCS19" );
+ algorithmicCharsets.put("lmbcs2", "com.ibm.icu.charset.CharsetLMBCS2" );
+ algorithmicCharsets.put("lmbcs3", "com.ibm.icu.charset.CharsetLMBCS3" );
+ algorithmicCharsets.put("lmbcs4", "com.ibm.icu.charset.CharsetLMBCS4" );
+ algorithmicCharsets.put("lmbcs5", "com.ibm.icu.charset.CharsetLMBCS5" );
+ algorithmicCharsets.put("lmbcs6", "com.ibm.icu.charset.CharsetLMBCS6" );
+ algorithmicCharsets.put("lmbcs8", "com.ibm.icu.charset.CharsetLMBCS8" )
+ algorithmicCharsets.put("scsu", "com.ibm.icu.charset.CharsetSCSU" );
+ */
+ algorithmicCharsets.put("US-ASCII", "com.ibm.icu.charset.CharsetASCII" );
+ algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.charset.Charset88591" );
+ algorithmicCharsets.put("UTF-16", "com.ibm.icu.charset.CharsetUTF16" );
+ algorithmicCharsets.put("UTF-16BE", "com.ibm.icu.charset.CharsetUTF16BE" );
+ algorithmicCharsets.put("UTF-16LE", "com.ibm.icu.charset.CharsetUTF16LE" );
+ algorithmicCharsets.put("UTF16_OppositeEndian", "com.ibm.icu.charset.CharsetUTF16LE" );
+ algorithmicCharsets.put("UTF16_PlatformEndian", "com.ibm.icu.charset.CharsetUTF16" );
+ algorithmicCharsets.put("UTF-32", "com.ibm.icu.charset.CharsetUTF32" );
+ algorithmicCharsets.put("UTF-32BE", "com.ibm.icu.charset.CharsetUTF32BE" );
+ algorithmicCharsets.put("UTF-32LE", "com.ibm.icu.charset.CharsetUTF32LE" );
+ algorithmicCharsets.put("UTF32_OppositeEndian", "com.ibm.icu.charset.CharsetUTF32LE" );
+ algorithmicCharsets.put("UTF32_PlatformEndian", "com.ibm.icu.charset.CharsetUTF32" );
+ algorithmicCharsets.put("UTF-8", "com.ibm.icu.charset.CharsetUTF8" );
+ algorithmicCharsets.put("CESU-8", "com.ibm.icu.charset.CharsetCESU8" );
+ algorithmicCharsets.put("UTF-7", "com.ibm.icu.charset.CharsetUTF7" );
+ algorithmicCharsets.put("ISCII,version=0", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=1", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=2", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=3", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=4", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=5", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=6", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=7", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("ISCII,version=8", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("IMAP-mailbox-name", "com.ibm.icu.charset.CharsetUTF7" );
+ algorithmicCharsets.put("HZ", "com.ibm.icu.charset.CharsetHZ" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=0", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=1", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=2", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=3", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ja,version=4", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=zh,version=0", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=zh,version=1", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ko,version=0", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("ISO_2022,locale=ko,version=1", "com.ibm.icu.charset.CharsetISO2022" );
+ }
+
+ /*public*/ static final Charset getCharset(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ String className = (String) algorithmicCharsets.get(icuCanonicalName);
+ if(className==null){
+ //all the cnv files are loaded as MBCS
+ className = "com.ibm.icu.charset.CharsetMBCS";
+ }
+ try{
+ CharsetICU conv = null;
+ Class cs = Class.forName(className);
+ Class[] paramTypes = new Class[]{ String.class, String.class, String[].class};
+ final Constructor c = cs.getConstructor(paramTypes);
+ Object[] params = new Object[]{ icuCanonicalName, javaCanonicalName, aliases};
+
+ // Run constructor
+ try {
+ Object obj = c.newInstance(params);
+ if(obj!=null && obj instanceof CharsetICU){
+ conv = (CharsetICU)obj;
+ return conv;
+ }
+ }catch (InvocationTargetException e) {
+ throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className+ ". Exception:" + e.getTargetException());
+ }
+ }catch(ClassNotFoundException ex){
+ }catch(NoSuchMethodException ex){
+ }catch (IllegalAccessException ex){
+ }catch (InstantiationException ex){
+ }
+ throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className);
+ }
+
+ static final boolean isSurrogate(int c){
+ return (((c)&0xfffff800)==0xd800);
+ }
+
+ /*
+ * Returns the default charset name
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+// static final String getDefaultCharsetName(){
+// String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding();
+// return defaultEncoding;
+// }
+
+ /**
+ * Returns a charset object for the named charset.
+ * This method gurantee that ICU charset is returned when
+ * available. If the ICU charset provider does not support
+ * the specified charset, then try other charset providers
+ * including the standard Java charset provider.
+ *
+ * @param charsetName The name of the requested charset,
+ * may be either a canonical name or an alias
+ * @return A charset object for the named charset
+ * @throws IllegalCharsetNameException If the given charset name
+ * is illegal
+ * @throws UnsupportedCharsetException If no support for the
+ * named charset is available in this instance of th Java
+ * virtual machine
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Charset forNameICU(String charsetName) throws IllegalCharsetNameException, UnsupportedCharsetException {
+ CharsetProviderICU icuProvider = new CharsetProviderICU();
+ CharsetICU cs = (CharsetICU) icuProvider.charsetForName(charsetName);
+ if (cs != null) {
+ return cs;
+ }
+ return Charset.forName(charsetName);
+ }
+
+// /**
+// * @see java.lang.Comparable#compareTo(java.lang.Object)
+// * @stable 3.8
+// */
+// public int compareTo(Object otherObj) {
+// if (!(otherObj instanceof CharsetICU)) {
+// return -1;
+// }
+// return icuCanonicalName.compareTo(((CharsetICU)otherObj).icuCanonicalName);
+// }
+
+ /**
+ * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
+ * start of the stream for example U+FEFF (the Unicode BOM/signature
+ * character) that can be ignored.
+ *
+ * Detects Unicode signature byte sequences at the start of the byte stream
+ * and returns number of bytes of the BOM of the indicated Unicode charset.
+ * 0 is returned when no Unicode signature is recognized.
+ *
+ */
+ // TODO This should be proposed as CharsetDecoderICU API.
+// static String detectUnicodeSignature(ByteBuffer source) {
+// int signatureLength = 0; // number of bytes of the signature
+// final int SIG_MAX_LEN = 5;
+// String sigUniCharset = null; // states what unicode charset is the BOM
+// int i = 0;
+//
+// /*
+// * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
+// * don't misdetect something
+// */
+// byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
+// (byte) 0xa5 };
+//
+// while (i < source.remaining() && i < SIG_MAX_LEN) {
+// start[i] = source.get(i);
+// i++;
+// }
+//
+// if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
+// signatureLength = 2;
+// sigUniCharset = "UTF-16BE";
+// source.position(signatureLength);
+// return sigUniCharset;
+// } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
+// if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
+// signatureLength = 4;
+// sigUniCharset = "UTF-32LE";
+// source.position(signatureLength);
+// return sigUniCharset;
+// } else {
+// signatureLength = 2;
+// sigUniCharset = "UTF-16LE";
+// source.position(signatureLength);
+// return sigUniCharset;
+// }
+// } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
+// && start[2] == (byte) 0xBF) {
+// signatureLength = 3;
+// sigUniCharset = "UTF-8";
+// source.position(signatureLength);
+// return sigUniCharset;
+// } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
+// && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
+// signatureLength = 4;
+// sigUniCharset = "UTF-32BE";
+// source.position(signatureLength);
+// return sigUniCharset;
+// } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
+// && start[2] == (byte) 0xFF) {
+// signatureLength = 3;
+// sigUniCharset = "SCSU";
+// source.position(signatureLength);
+// return sigUniCharset;
+// } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
+// && start[2] == (byte) 0x28) {
+// signatureLength = 3;
+// sigUniCharset = "BOCU-1";
+// source.position(signatureLength);
+// return sigUniCharset;
+// } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
+// && start[2] == (byte) 0x76) {
+//
+// if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
+// signatureLength = 5;
+// sigUniCharset = "UTF-7";
+// source.position(signatureLength);
+// return sigUniCharset;
+// } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39
+// || start[3] == (byte) 0x2B || start[3] == (byte) 0x2F) {
+// signatureLength = 4;
+// sigUniCharset = "UTF-7";
+// source.position(signatureLength);
+// return sigUniCharset;
+// }
+// } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
+// && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
+// signatureLength = 4;
+// sigUniCharset = "UTF-EBCDIC";
+// source.position(signatureLength);
+// return sigUniCharset;
+// }
+//
+// /* no known Unicode signature byte sequence recognized */
+// return null;
+// }
+
+}
+
diff --git a/src/com/ibm/icu/charset/CharsetISCII.java b/src/com/ibm/icu/charset/CharsetISCII.java
new file mode 100644
index 0000000..d7e970e
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetISCII.java
@@ -0,0 +1,1274 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+
+/**
+ * @author Michael Ow
+ *
+ */
+class CharsetISCII extends CharsetICU {
+ private final short UCNV_OPTIONS_VERSION_MASK = 0X0f;
+ //private final short NUKTA = 0x093c;
+ //private final short HALANT = 0x094d;
+ private final short ZWNJ = 0x200c; /* Zero Width Non Joiner */
+ private final short ZWJ = 0x200d; /* Zero Width Joiner */
+ //private final int INVALID_CHAR = 0xffff;
+ private final short ATR = 0xef; /* Attribute code */
+ private final short EXT = 0xf0; /* Extension code */
+ private final short DANDA = 0x0964;
+ private final short DOUBLE_DANDA = 0x0965;
+ private final short ISCII_NUKTA = 0xe9;
+ private final short ISCII_HALANT = 0xe8;
+ private final short ISCII_DANDA = 0xea;
+ private final short ISCII_VOWEL_SIGN_E = 0xe0;
+ private final short ISCII_INV = 0xd9;
+ private final short INDIC_BLOCK_BEGIN = 0x0900;
+ private final short INDIC_BLOCK_END = 0x0d7f;
+ private final short INDIC_RANGE = (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN);
+ private final short VOCALLIC_RR = 0x0931;
+ private final short LF = 0x0a;
+ private final short ASCII_END = 0xa0;
+ private final int NO_CHAR_MARKER = 0xfffe;
+ private final short TELUGU_DELTA = (UniLang.DELTA * UniLang.TELUGU);
+ private final short DEV_ABBR_SIGN = 0x0970;
+ private final short DEV_ANUDATTA = 0x0952;
+ private final short EXT_RANGE_BEGIN = 0xa1;
+ private final short EXT_RANGE_END = 0xee;
+
+ private static final class UniLang {
+ static final short DEVALANGARI = 0;
+ static final short BENGALI = DEVALANGARI + 1;
+ static final short GURMUKHI = BENGALI + 1;
+ static final short GUJARATI = GURMUKHI + 1;
+ static final short ORIYA = GUJARATI + 1;
+ static final short TAMIL = ORIYA + 1;
+ static final short TELUGU = TAMIL + 1;
+ static final short KANNADA = TELUGU + 1;
+ static final short MALAYALAM = KANNADA + 1;
+ static final short DELTA = 0x80;
+ }
+
+ private static final class ISCIILang {
+ static final short DEF = 0x40;
+ static final short RMN = 0x41;
+ static final short DEV = 0x42;
+ static final short BNG = 0x43;
+ static final short TML = 0x44;
+ static final short TLG = 0x45;
+ static final short ASM = 0x46;
+ static final short ORI = 0x47;
+ static final short KND = 0x48;
+ static final short MLM = 0x49;
+ static final short GJR = 0x4a;
+ static final short PNJ = 0x4b;
+ static final short ARB = 0x71;
+ static final short PES = 0x72;
+ static final short URD = 0x73;
+ static final short SND = 0x74;
+ static final short KSM = 0x75;
+ static final short PST = 0x76;
+ }
+
+ private static final class MaskEnum {
+ static final short DEV_MASK = 0x80;
+ static final short PNJ_MASK = 0x40;
+ static final short GJR_MASK = 0x20;
+ static final short ORI_MASK = 0x10;
+ static final short BNG_MASK = 0x08;
+ static final short KND_MASK = 0x04;
+ static final short MLM_MASK = 0x02;
+ static final short TML_MASK = 0x01;
+ static final short ZERO = 0x00;
+ }
+
+ private final String ISCII_CNV_PREFIX = "ISCII,version=";
+
+ private final class UConverterDataISCII {
+ int option;
+ int contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */
+ int contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */
+ short defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */
+ short currentDeltaFromUnicode; /* current delta in Indic block */
+ short currentDeltaToUnicode; /* current delta in Indic block */
+ short currentMaskFromUnicode; /* mask for current state in fromUnicode */
+ short currentMaskToUnicode; /* mask for current state in toUnicode */
+ short defMaskToUnicode; /* mask for default state in toUnicode */
+ boolean isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */
+ boolean resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered */
+ String name;
+
+ UConverterDataISCII(int option, String name) {
+ this.option = option;
+ this.name = name;
+
+ initialize();
+ }
+
+ void initialize() {
+ this.contextCharToUnicode = NO_CHAR_MARKER; /* contextCharToUnicode */
+ this.currentDeltaFromUnicode = 0x0000; /* contextCharFromUnicode */
+ this.defDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* defDeltaToUnicode */
+ this.currentDeltaFromUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaFromUnicode */
+ this.currentDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaToUnicode */
+ this.currentMaskToUnicode = (short)lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskToUnicode */
+ this.currentMaskFromUnicode = (short)lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskFromUnicode */
+ this.defMaskToUnicode = (short)lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* defMaskToUnicode */
+ this.isFirstBuffer = true; /* isFirstBuffer */
+ this.resetToDefaultToUnicode = false; /* resetToDefaultToUnicode */
+ }
+ }
+
+ private static final class LookupDataStruct {
+ short uniLang;
+ short maskEnum;
+ short isciiLang;
+
+ LookupDataStruct(short uniLang, short maskEnum, short isciiLang) {
+ this.uniLang = uniLang;
+ this.maskEnum = maskEnum;
+ this.isciiLang = isciiLang;
+ }
+ }
+
+ private static final LookupDataStruct [] lookupInitialData = {
+ new LookupDataStruct(UniLang.DEVALANGARI, MaskEnum.DEV_MASK, ISCIILang.DEV),
+ new LookupDataStruct(UniLang.BENGALI, MaskEnum.BNG_MASK, ISCIILang.BNG),
+ new LookupDataStruct(UniLang.GURMUKHI, MaskEnum.PNJ_MASK, ISCIILang.PNJ),
+ new LookupDataStruct(UniLang.GUJARATI, MaskEnum.GJR_MASK, ISCIILang.GJR),
+ new LookupDataStruct(UniLang.ORIYA, MaskEnum.ORI_MASK, ISCIILang.ORI),
+ new LookupDataStruct(UniLang.TAMIL, MaskEnum.TML_MASK, ISCIILang.TML),
+ new LookupDataStruct(UniLang.TELUGU, MaskEnum.KND_MASK, ISCIILang.TLG),
+ new LookupDataStruct(UniLang.KANNADA, MaskEnum.KND_MASK, ISCIILang.KND),
+ new LookupDataStruct(UniLang.MALAYALAM, MaskEnum.MLM_MASK, ISCIILang.MLM)
+ };
+
+ /*
+ * The values in validity table are indexed by the lower bits of Unicode
+ * range 0x0900 - 0x09ff. The values have a structure like:
+ * -----------------------------------------------------------------
+ * |DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML |
+ * | | | | | ASM | KND | | |
+ * -----------------------------------------------------------------
+ * If a code point is valid in a particular script
+ * then that bit is turned on
+ *
+ * Unicode does not distinguish between Bengali and Assamese aso we use 1 bit for
+ * to represent these languages
+ *
+ * Telugu and Kannda have same codepoints except for Vocallic_RR which we special case
+ * and combine and use 1 bit to represent these languages
+ */
+ private static final short validityTable[] = {
+ /* This state table is tool generated so please do not edit unless you know exactly what you are doing */
+ /* Note: This table was edited to mirror the Windows XP implementation */
+ /* ISCII: Valid: Unicode */
+ /* 0xa0: 0x00: 0x900 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xa1: 0xb8: 0x901 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xa2: 0xfe: 0x902 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa3: 0xbf: 0x903 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0x00: 0x00: 0x904 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xa4: 0xff: 0x905 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa5: 0xff: 0x906 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa6: 0xff: 0x907 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa7: 0xff: 0x908 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa8: 0xff: 0x909 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xa9: 0xff: 0x90a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xaa: 0xfe: 0x90b */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x90c */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xae: 0x80: 0x90d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xab: 0x87: 0x90e */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xac: 0xff: 0x90f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xad: 0xff: 0x910 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb2: 0x80: 0x911 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xaf: 0x87: 0x912 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb0: 0xff: 0x913 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb1: 0xff: 0x914 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb3: 0xff: 0x915 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb4: 0xfe: 0x916 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xb5: 0xfe: 0x917 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xb6: 0xfe: 0x918 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xb7: 0xff: 0x919 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb8: 0xff: 0x91a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xb9: 0xfe: 0x91b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xba: 0xff: 0x91c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xbb: 0xfe: 0x91d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xbc: 0xff: 0x91e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xbd: 0xff: 0x91f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xbe: 0xfe: 0x920 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xbf: 0xfe: 0x921 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc0: 0xfe: 0x922 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc1: 0xff: 0x923 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xc2: 0xff: 0x924 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xc3: 0xfe: 0x925 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc4: 0xfe: 0x926 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc5: 0xfe: 0x927 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xc6: 0xff: 0x928 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xc7: 0x81: 0x929 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.TML_MASK,
+ /* 0xc8: 0xff: 0x92a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xc9: 0xfe: 0x92b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xca: 0xfe: 0x92c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xcb: 0xfe: 0x92d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xcc: 0xfe: 0x92e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xcd: 0xff: 0x92f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xcf: 0xff: 0x930 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd0: 0x87: 0x931 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd1: 0xff: 0x932 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd2: 0xb7: 0x933 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd3: 0x83: 0x934 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd4: 0xff: 0x935 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd5: 0xfe: 0x936 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0xd6: 0xbf: 0x937 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd7: 0xff: 0x938 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xd8: 0xff: 0x939 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0x00: 0x00: 0x93a */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x93b */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xe9: 0xda: 0x93c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x93d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xda: 0xff: 0x93e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xdb: 0xff: 0x93f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xdc: 0xff: 0x940 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xdd: 0xff: 0x941 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xde: 0xff: 0x942 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xdf: 0xbe: 0x943 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x944 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xe3: 0x80: 0x945 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xe0: 0x87: 0x946 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe1: 0xff: 0x947 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe2: 0xff: 0x948 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe7: 0x80: 0x949 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xe4: 0x87: 0x94a */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe5: 0xff: 0x94b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe6: 0xff: 0x94c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xe8: 0xff: 0x94d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xec: 0x00: 0x94e */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xed: 0x00: 0x94f */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x950 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x951 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x952 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x953 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x954 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x955 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x956 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x957 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x958 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x959 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x95e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xce: 0x98: 0x95f */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x960 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x961 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x962 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x963 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xea: 0xf8: 0x964 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xeaea: 0x00: 0x965 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+ /* 0xf1: 0xff: 0x966 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf2: 0xff: 0x967 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf3: 0xff: 0x968 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf4: 0xff: 0x969 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf5: 0xff: 0x96a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf6: 0xff: 0x96b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf7: 0xff: 0x96c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf8: 0xff: 0x96d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xf9: 0xff: 0x96e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0xfa: 0xff: 0x96f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK,
+ /* 0x00: 0x80: 0x970 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO,
+
+ /*
+ * The length of the array is 128 to provide values for 0x900..0x97f.
+ * The last 15 entries for 0x971..0x97f of the table are all zero
+ * because no Indic script uses such Unicode code points.
+ */
+
+ /* 0x00: 0x00: 0x971 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x972 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x973 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x974 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x975 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x976 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x977 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x978 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x979 */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97A */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97B */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97C */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97D */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97E */ MaskEnum.ZERO,
+ /* 0x00: 0x00: 0x97F */ MaskEnum.ZERO,
+ };
+
+ private static final char fromUnicodeTable[] = {
+ 0x00a0, /* 0x0900 */
+ 0x00a1, /* 0x0901 */
+ 0x00a2, /* 0x0902 */
+ 0x00a3, /* 0x0903 */
+ 0xa4e0, /* 0x0904 */
+ 0x00a4, /* 0x0905 */
+ 0x00a5, /* 0x0906 */
+ 0x00a6, /* 0x0907 */
+ 0x00a7, /* 0x0908 */
+ 0x00a8, /* 0x0909 */
+ 0x00a9, /* 0x090a */
+ 0x00aa, /* 0x090b */
+ 0xA6E9, /* 0x090c */
+ 0x00ae, /* 0x090d */
+ 0x00ab, /* 0x090e */
+ 0x00ac, /* 0x090f */
+ 0x00ad, /* 0x0910 */
+ 0x00b2, /* 0x0911 */
+ 0x00af, /* 0x0912 */
+ 0x00b0, /* 0x0913 */
+ 0x00b1, /* 0x0914 */
+ 0x00b3, /* 0x0915 */
+ 0x00b4, /* 0x0916 */
+ 0x00b5, /* 0x0917 */
+ 0x00b6, /* 0x0918 */
+ 0x00b7, /* 0x0919 */
+ 0x00b8, /* 0x091a */
+ 0x00b9, /* 0x091b */
+ 0x00ba, /* 0x091c */
+ 0x00bb, /* 0x091d */
+ 0x00bc, /* 0x091e */
+ 0x00bd, /* 0x091f */
+ 0x00be, /* 0x0920 */
+ 0x00bf, /* 0x0921 */
+ 0x00c0, /* 0x0922 */
+ 0x00c1, /* 0x0923 */
+ 0x00c2, /* 0x0924 */
+ 0x00c3, /* 0x0925 */
+ 0x00c4, /* 0x0926 */
+ 0x00c5, /* 0x0927 */
+ 0x00c6, /* 0x0928 */
+ 0x00c7, /* 0x0929 */
+ 0x00c8, /* 0x092a */
+ 0x00c9, /* 0x092b */
+ 0x00ca, /* 0x092c */
+ 0x00cb, /* 0x092d */
+ 0x00cc, /* 0x092e */
+ 0x00cd, /* 0x092f */
+ 0x00cf, /* 0x0930 */
+ 0x00d0, /* 0x0931 */
+ 0x00d1, /* 0x0932 */
+ 0x00d2, /* 0x0933 */
+ 0x00d3, /* 0x0934 */
+ 0x00d4, /* 0x0935 */
+ 0x00d5, /* 0x0936 */
+ 0x00d6, /* 0x0937 */
+ 0x00d7, /* 0x0938 */
+ 0x00d8, /* 0x0939 */
+ 0xFFFF, /* 0x093a */
+ 0xFFFF, /* 0x093b */
+ 0x00e9, /* 0x093c */
+ 0xEAE9, /* 0x093d */
+ 0x00da, /* 0x093e */
+ 0x00db, /* 0x093f */
+ 0x00dc, /* 0x0940 */
+ 0x00dd, /* 0x0941 */
+ 0x00de, /* 0x0942 */
+ 0x00df, /* 0x0943 */
+ 0xDFE9, /* 0x0944 */
+ 0x00e3, /* 0x0945 */
+ 0x00e0, /* 0x0946 */
+ 0x00e1, /* 0x0947 */
+ 0x00e2, /* 0x0948 */
+ 0x00e7, /* 0x0949 */
+ 0x00e4, /* 0x094a */
+ 0x00e5, /* 0x094b */
+ 0x00e6, /* 0x094c */
+ 0x00e8, /* 0x094d */
+ 0x00ec, /* 0x094e */
+ 0x00ed, /* 0x094f */
+ 0xA1E9, /* 0x0950 */ /* OM Symbol */
+ 0xFFFF, /* 0x0951 */
+ 0xF0B8, /* 0x0952 */
+ 0xFFFF, /* 0x0953 */
+ 0xFFFF, /* 0x0954 */
+ 0xFFFF, /* 0x0955 */
+ 0xFFFF, /* 0x0956 */
+ 0xFFFF, /* 0x0957 */
+ 0xb3e9, /* 0x0958 */
+ 0xb4e9, /* 0x0959 */
+ 0xb5e9, /* 0x095a */
+ 0xbae9, /* 0x095b */
+ 0xbfe9, /* 0x095c */
+ 0xC0E9, /* 0x095d */
+ 0xc9e9, /* 0x095e */
+ 0x00ce, /* 0x095f */
+ 0xAAe9, /* 0x0960 */
+ 0xA7E9, /* 0x0961 */
+ 0xDBE9, /* 0x0962 */
+ 0xDCE9, /* 0x0963 */
+ 0x00ea, /* 0x0964 */
+ 0xeaea, /* 0x0965 */
+ 0x00f1, /* 0x0966 */
+ 0x00f2, /* 0x0967 */
+ 0x00f3, /* 0x0968 */
+ 0x00f4, /* 0x0969 */
+ 0x00f5, /* 0x096a */
+ 0x00f6, /* 0x096b */
+ 0x00f7, /* 0x096c */
+ 0x00f8, /* 0x096d */
+ 0x00f9, /* 0x096e */
+ 0x00fa, /* 0x096f */
+ 0xF0BF, /* 0x0970 */
+ 0xFFFF, /* 0x0971 */
+ 0xFFFF, /* 0x0972 */
+ 0xFFFF, /* 0x0973 */
+ 0xFFFF, /* 0x0974 */
+ 0xFFFF, /* 0x0975 */
+ 0xFFFF, /* 0x0976 */
+ 0xFFFF, /* 0x0977 */
+ 0xFFFF, /* 0x0978 */
+ 0xFFFF, /* 0x0979 */
+ 0xFFFF, /* 0x097a */
+ 0xFFFF, /* 0x097b */
+ 0xFFFF, /* 0x097c */
+ 0xFFFF, /* 0x097d */
+ 0xFFFF, /* 0x097e */
+ 0xFFFF, /* 0x097f */
+ };
+ private static final char toUnicodeTable[] = {
+ 0x0000, /* 0x00 */
+ 0x0001, /* 0x01 */
+ 0x0002, /* 0x02 */
+ 0x0003, /* 0x03 */
+ 0x0004, /* 0x04 */
+ 0x0005, /* 0x05 */
+ 0x0006, /* 0x06 */
+ 0x0007, /* 0x07 */
+ 0x0008, /* 0x08 */
+ 0x0009, /* 0x09 */
+ 0x000a, /* 0x0a */
+ 0x000b, /* 0x0b */
+ 0x000c, /* 0x0c */
+ 0x000d, /* 0x0d */
+ 0x000e, /* 0x0e */
+ 0x000f, /* 0x0f */
+ 0x0010, /* 0x10 */
+ 0x0011, /* 0x11 */
+ 0x0012, /* 0x12 */
+ 0x0013, /* 0x13 */
+ 0x0014, /* 0x14 */
+ 0x0015, /* 0x15 */
+ 0x0016, /* 0x16 */
+ 0x0017, /* 0x17 */
+ 0x0018, /* 0x18 */
+ 0x0019, /* 0x19 */
+ 0x001a, /* 0x1a */
+ 0x001b, /* 0x1b */
+ 0x001c, /* 0x1c */
+ 0x001d, /* 0x1d */
+ 0x001e, /* 0x1e */
+ 0x001f, /* 0x1f */
+ 0x0020, /* 0x20 */
+ 0x0021, /* 0x21 */
+ 0x0022, /* 0x22 */
+ 0x0023, /* 0x23 */
+ 0x0024, /* 0x24 */
+ 0x0025, /* 0x25 */
+ 0x0026, /* 0x26 */
+ 0x0027, /* 0x27 */
+ 0x0028, /* 0x28 */
+ 0x0029, /* 0x29 */
+ 0x002a, /* 0x2a */
+ 0x002b, /* 0x2b */
+ 0x002c, /* 0x2c */
+ 0x002d, /* 0x2d */
+ 0x002e, /* 0x2e */
+ 0x002f, /* 0x2f */
+ 0x0030, /* 0x30 */
+ 0x0031, /* 0x31 */
+ 0x0032, /* 0x32 */
+ 0x0033, /* 0x33 */
+ 0x0034, /* 0x34 */
+ 0x0035, /* 0x35 */
+ 0x0036, /* 0x36 */
+ 0x0037, /* 0x37 */
+ 0x0038, /* 0x38 */
+ 0x0039, /* 0x39 */
+ 0x003A, /* 0x3A */
+ 0x003B, /* 0x3B */
+ 0x003c, /* 0x3c */
+ 0x003d, /* 0x3d */
+ 0x003e, /* 0x3e */
+ 0x003f, /* 0x3f */
+ 0x0040, /* 0x40 */
+ 0x0041, /* 0x41 */
+ 0x0042, /* 0x42 */
+ 0x0043, /* 0x43 */
+ 0x0044, /* 0x44 */
+ 0x0045, /* 0x45 */
+ 0x0046, /* 0x46 */
+ 0x0047, /* 0x47 */
+ 0x0048, /* 0x48 */
+ 0x0049, /* 0x49 */
+ 0x004a, /* 0x4a */
+ 0x004b, /* 0x4b */
+ 0x004c, /* 0x4c */
+ 0x004d, /* 0x4d */
+ 0x004e, /* 0x4e */
+ 0x004f, /* 0x4f */
+ 0x0050, /* 0x50 */
+ 0x0051, /* 0x51 */
+ 0x0052, /* 0x52 */
+ 0x0053, /* 0x53 */
+ 0x0054, /* 0x54 */
+ 0x0055, /* 0x55 */
+ 0x0056, /* 0x56 */
+ 0x0057, /* 0x57 */
+ 0x0058, /* 0x58 */
+ 0x0059, /* 0x59 */
+ 0x005a, /* 0x5a */
+ 0x005b, /* 0x5b */
+ 0x005c, /* 0x5c */
+ 0x005d, /* 0x5d */
+ 0x005e, /* 0x5e */
+ 0x005f, /* 0x5f */
+ 0x0060, /* 0x60 */
+ 0x0061, /* 0x61 */
+ 0x0062, /* 0x62 */
+ 0x0063, /* 0x63 */
+ 0x0064, /* 0x64 */
+ 0x0065, /* 0x65 */
+ 0x0066, /* 0x66 */
+ 0x0067, /* 0x67 */
+ 0x0068, /* 0x68 */
+ 0x0069, /* 0x69 */
+ 0x006a, /* 0x6a */
+ 0x006b, /* 0x6b */
+ 0x006c, /* 0x6c */
+ 0x006d, /* 0x6d */
+ 0x006e, /* 0x6e */
+ 0x006f, /* 0x6f */
+ 0x0070, /* 0x70 */
+ 0x0071, /* 0x71 */
+ 0x0072, /* 0x72 */
+ 0x0073, /* 0x73 */
+ 0x0074, /* 0x74 */
+ 0x0075, /* 0x75 */
+ 0x0076, /* 0x76 */
+ 0x0077, /* 0x77 */
+ 0x0078, /* 0x78 */
+ 0x0079, /* 0x79 */
+ 0x007a, /* 0x7a */
+ 0x007b, /* 0x7b */
+ 0x007c, /* 0x7c */
+ 0x007d, /* 0x7d */
+ 0x007e, /* 0x7e */
+ 0x007f, /* 0x7f */
+ 0x0080, /* 0x80 */
+ 0x0081, /* 0x81 */
+ 0x0082, /* 0x82 */
+ 0x0083, /* 0x83 */
+ 0x0084, /* 0x84 */
+ 0x0085, /* 0x85 */
+ 0x0086, /* 0x86 */
+ 0x0087, /* 0x87 */
+ 0x0088, /* 0x88 */
+ 0x0089, /* 0x89 */
+ 0x008a, /* 0x8a */
+ 0x008b, /* 0x8b */
+ 0x008c, /* 0x8c */
+ 0x008d, /* 0x8d */
+ 0x008e, /* 0x8e */
+ 0x008f, /* 0x8f */
+ 0x0090, /* 0x90 */
+ 0x0091, /* 0x91 */
+ 0x0092, /* 0x92 */
+ 0x0093, /* 0x93 */
+ 0x0094, /* 0x94 */
+ 0x0095, /* 0x95 */
+ 0x0096, /* 0x96 */
+ 0x0097, /* 0x97 */
+ 0x0098, /* 0x98 */
+ 0x0099, /* 0x99 */
+ 0x009a, /* 0x9a */
+ 0x009b, /* 0x9b */
+ 0x009c, /* 0x9c */
+ 0x009d, /* 0x9d */
+ 0x009e, /* 0x9e */
+ 0x009f, /* 0x9f */
+ 0x00A0, /* 0xa0 */
+ 0x0901, /* 0xa1 */
+ 0x0902, /* 0xa2 */
+ 0x0903, /* 0xa3 */
+ 0x0905, /* 0xa4 */
+ 0x0906, /* 0xa5 */
+ 0x0907, /* 0xa6 */
+ 0x0908, /* 0xa7 */
+ 0x0909, /* 0xa8 */
+ 0x090a, /* 0xa9 */
+ 0x090b, /* 0xaa */
+ 0x090e, /* 0xab */
+ 0x090f, /* 0xac */
+ 0x0910, /* 0xad */
+ 0x090d, /* 0xae */
+ 0x0912, /* 0xaf */
+ 0x0913, /* 0xb0 */
+ 0x0914, /* 0xb1 */
+ 0x0911, /* 0xb2 */
+ 0x0915, /* 0xb3 */
+ 0x0916, /* 0xb4 */
+ 0x0917, /* 0xb5 */
+ 0x0918, /* 0xb6 */
+ 0x0919, /* 0xb7 */
+ 0x091a, /* 0xb8 */
+ 0x091b, /* 0xb9 */
+ 0x091c, /* 0xba */
+ 0x091d, /* 0xbb */
+ 0x091e, /* 0xbc */
+ 0x091f, /* 0xbd */
+ 0x0920, /* 0xbe */
+ 0x0921, /* 0xbf */
+ 0x0922, /* 0xc0 */
+ 0x0923, /* 0xc1 */
+ 0x0924, /* 0xc2 */
+ 0x0925, /* 0xc3 */
+ 0x0926, /* 0xc4 */
+ 0x0927, /* 0xc5 */
+ 0x0928, /* 0xc6 */
+ 0x0929, /* 0xc7 */
+ 0x092a, /* 0xc8 */
+ 0x092b, /* 0xc9 */
+ 0x092c, /* 0xca */
+ 0x092d, /* 0xcb */
+ 0x092e, /* 0xcc */
+ 0x092f, /* 0xcd */
+ 0x095f, /* 0xce */
+ 0x0930, /* 0xcf */
+ 0x0931, /* 0xd0 */
+ 0x0932, /* 0xd1 */
+ 0x0933, /* 0xd2 */
+ 0x0934, /* 0xd3 */
+ 0x0935, /* 0xd4 */
+ 0x0936, /* 0xd5 */
+ 0x0937, /* 0xd6 */
+ 0x0938, /* 0xd7 */
+ 0x0939, /* 0xd8 */
+ 0x200D, /* 0xd9 */
+ 0x093e, /* 0xda */
+ 0x093f, /* 0xdb */
+ 0x0940, /* 0xdc */
+ 0x0941, /* 0xdd */
+ 0x0942, /* 0xde */
+ 0x0943, /* 0xdf */
+ 0x0946, /* 0xe0 */
+ 0x0947, /* 0xe1 */
+ 0x0948, /* 0xe2 */
+ 0x0945, /* 0xe3 */
+ 0x094a, /* 0xe4 */
+ 0x094b, /* 0xe5 */
+ 0x094c, /* 0xe6 */
+ 0x0949, /* 0xe7 */
+ 0x094d, /* 0xe8 */
+ 0x093c, /* 0xe9 */
+ 0x0964, /* 0xea */
+ 0xFFFF, /* 0xeb */
+ 0xFFFF, /* 0xec */
+ 0xFFFF, /* 0xed */
+ 0xFFFF, /* 0xee */
+ 0xFFFF, /* 0xef */
+ 0xFFFF, /* 0xf0 */
+ 0x0966, /* 0xf1 */
+ 0x0967, /* 0xf2 */
+ 0x0968, /* 0xf3 */
+ 0x0969, /* 0xf4 */
+ 0x096a, /* 0xf5 */
+ 0x096b, /* 0xf6 */
+ 0x096c, /* 0xf7 */
+ 0x096d, /* 0xf8 */
+ 0x096e, /* 0xf9 */
+ 0x096f, /* 0xfa */
+ 0xFFFF, /* 0xfb */
+ 0xFFFF, /* 0xfc */
+ 0xFFFF, /* 0xfd */
+ 0xFFFF, /* 0xfe */
+ 0xFFFF, /* 0xff */
+ };
+ private static final char nuktaSpecialCases[][] = {
+ { 16 /* length of array */ , 0 },
+ { 0xA6, 0x090c },
+ { 0xEA, 0x093D },
+ { 0xDF, 0x0944 },
+ { 0xA1, 0x0950 },
+ { 0xb3, 0x0958 },
+ { 0xb4, 0x0959 },
+ { 0xb5, 0x095a },
+ { 0xba, 0x095b },
+ { 0xbf, 0x095c },
+ { 0xC0, 0x095d },
+ { 0xc9, 0x095e },
+ { 0xAA, 0x0960 },
+ { 0xA7, 0x0961 },
+ { 0xDB, 0x0962 },
+ { 0xDC, 0x0963 }
+ };
+ private static final char vowelSignESpecialCases[][] = {
+ { 2 /* length of array */ , 0 },
+ { 0xA4, 0x0904 }
+ };
+
+ private static final short lookupTable[][] = {
+ { MaskEnum.ZERO, MaskEnum.ZERO }, /* DEFAULT */
+ { MaskEnum.ZERO, MaskEnum.ZERO }, /* ROMAN */
+ { UniLang.DEVALANGARI, MaskEnum.DEV_MASK },
+ { UniLang.BENGALI, MaskEnum.BNG_MASK },
+ { UniLang.TAMIL, MaskEnum.TML_MASK },
+ { UniLang.TELUGU, MaskEnum.KND_MASK },
+ { UniLang.BENGALI, MaskEnum.BNG_MASK },
+ { UniLang.ORIYA, MaskEnum.ORI_MASK },
+ { UniLang.KANNADA, MaskEnum.KND_MASK },
+ { UniLang.MALAYALAM, MaskEnum.MLM_MASK },
+ { UniLang.GUJARATI, MaskEnum.GJR_MASK },
+ { UniLang.GURMUKHI, MaskEnum.PNJ_MASK }
+ };
+
+ private UConverterDataISCII extraInfo = null;
+ protected byte[] fromUSubstitution = new byte[]{(byte)0x1A};
+
+ public CharsetISCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 4;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ //get the version number of the ISCII converter
+ int option = Integer.parseInt(icuCanonicalName.substring(14));
+
+ extraInfo = new UConverterDataISCII(
+ option,
+ new String(ISCII_CNV_PREFIX + (option & UCNV_OPTIONS_VERSION_MASK)) /* name */
+ );
+ }
+
+ /*
+ * Rules for ISCII to Unicode converter
+ * ISCII is a stateful encoding. To convert ISCII bytes to Unicode,
+ * which is both precomposed and decomposed from characters
+ * pre-context and post-context need to be considered.
+ *
+ * Post context
+ * i) ATR : Attribute code is used to declare the font and script switching.
+ * Currently we only switch scripts and font codes consumed without generating an error
+ * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
+ * obsolete characters
+ * Pre context
+ * i) Halant: if preceeded by a halant then it is a explicit halant
+ * ii) Nukta:
+ * a) if preceeded by a halant then it is a soft halant
+ * b) if preceeded by specific consonants and the ligatures have pre-composed
+ * characters in Unicode then convert to pre-composed characters
+ * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
+ */
+ class CharsetDecoderISCII extends CharsetDecoderICU {
+ public CharsetDecoderISCII(CharsetICU cs) {
+ super(cs);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ this.toUnicodeStatus = 0xFFFF;
+ extraInfo.initialize();
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int targetUniChar = 0x0000;
+ short sourceChar = 0x0000;
+ UConverterDataISCII data;
+ boolean gotoCallBack = false;
+
+ data = extraInfo;
+ //data.contextCharToUnicode; /* contains previous ISCII codepoint visited */
+ //this.toUnicodeStatus; /* contains the mapping to Unicode of the above codepoint */
+
+ while (source.hasRemaining()) {
+ targetUniChar = UConverterConstants.missingCharMarker;
+
+ if (target.hasRemaining()) {
+ sourceChar = (short)((short)source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+ /* look at the post-context perform special processing */
+ if (data.contextCharToUnicode == ATR) {
+ /* If we have ATR in data.contextCharToUnicode then we need to change our
+ * state to Indic Script specified by sourceChar
+ */
+ /* check if the sourceChar is supported script range */
+ if (((short)(ISCIILang.PNJ - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (ISCIILang.PNJ - ISCIILang.DEV)) {
+ data.currentDeltaToUnicode = (short)(lookupTable[sourceChar & 0x0F][0] * UniLang.DELTA);
+ data.currentMaskToUnicode = lookupTable[sourceChar & 0x0F][1];
+ } else if (sourceChar == ISCIILang.DEF) {
+ /* switch back to default */
+ data.currentDeltaToUnicode = data.defDeltaToUnicode;
+ data.currentMaskToUnicode = data.defMaskToUnicode;
+ } else {
+ if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
+ /* these are display codes consume and continue */
+ } else {
+ cr = CoderResult.malformedForLength(1);
+ /* reset */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ gotoCallBack = true;
+ }
+ }
+ /* reset */
+ if (!gotoCallBack) {
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ continue;
+ }
+ } else if (data.contextCharToUnicode == EXT) {
+ /* check if sourceChar is in 0xA1 - 0xEE range */
+ if (((short)(EXT_RANGE_END - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
+ /* We currently support only Anudatta and Devanagari abbreviation sign */
+ if (sourceChar == 0xBF || sourceChar == 0xB8) {
+ targetUniChar = (sourceChar == 0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
+
+ /* find out if the mappling is valid in this state */
+ if ((validityTable[((short)targetUniChar) & UConverterConstants.UNSIGNED_BYTE_MASK] & data.currentMaskToUnicode) > 0) {
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+
+ /* write to target */
+ WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode);
+
+ continue;
+ }
+ }
+ /* byte unit is unassigned */
+ targetUniChar = UConverterConstants.missingCharMarker;
+ cr = CoderResult.unmappableForLength(1);
+ } else {
+ /* only 0xA1 - 0xEE are legal after EXT char */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ cr = CoderResult.malformedForLength(1);
+ }
+ gotoCallBack = true;
+ } else if (data.contextCharToUnicode == ISCII_INV) {
+ if (sourceChar == ISCII_HALANT) {
+ targetUniChar = 0x0020; /* replace with space according to Indic FAQ */
+ } else {
+ targetUniChar = ZWJ;
+ }
+
+ /* write to target */
+ WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode);
+ /* reset */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ }
+
+ /* look at the pre-context and perform special processing */
+ if (!gotoCallBack) {
+ switch (sourceChar) {
+ case ISCII_INV:
+ case EXT: /* falls through */
+ case ATR:
+ data.contextCharToUnicode = (char)sourceChar;
+
+ if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) {
+ WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ }
+ continue;
+ case ISCII_DANDA:
+ /* handle double danda */
+ if (data.contextCharToUnicode == ISCII_DANDA) {
+ targetUniChar = DOUBLE_DANDA;
+ /* clear the context */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ } else {
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ }
+ break;
+ case ISCII_HALANT:
+ /* handle explicit halant */
+ if (data.contextCharToUnicode == ISCII_HALANT) {
+ targetUniChar = ZWNJ;
+ /* clear context */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ } else {
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ }
+ break;
+ case 0x0A:
+ /* fall through */
+ case 0x0D:
+ data.resetToDefaultToUnicode = true;
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ break;
+ case ISCII_VOWEL_SIGN_E:
+ /* find <CHAR> + SIGN_VOWEL_E special mapping */
+ int n = 1;
+ boolean find = false;
+ for (; n < vowelSignESpecialCases[0][0]; n++) {
+ if (vowelSignESpecialCases[n][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) {
+ targetUniChar = vowelSignESpecialCases[n][1];
+ find = true;
+ break;
+ }
+ }
+ if (find) {
+ /* find out if the mapping is valid in this state */
+ if ((validityTable[(byte)targetUniChar] & data.currentMaskFromUnicode) > 0) {
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ break;
+ }
+ }
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ break;
+ case ISCII_NUKTA:
+ /* handle soft halant */
+ if (data.contextCharToUnicode == ISCII_HALANT) {
+ targetUniChar = ZWJ;
+ /* clear the context */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ break;
+ } else {
+ /* try to handle <CHAR> + ISCII_NUKTA special mappings */
+ int i = 1;
+ boolean found = false;
+ for (; i < nuktaSpecialCases[0][0]; i++) {
+ if (nuktaSpecialCases[i][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) {
+ targetUniChar = nuktaSpecialCases[i][1];
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ /* find out if the mapping is valid in this state */
+ if ((validityTable[(byte)targetUniChar] & data.currentMaskToUnicode) > 0) {
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ break;
+ }
+ /* else fall through to default */
+ }
+ /* else fall through to default */
+ }
+
+ default:
+ targetUniChar = GetMapping(sourceChar, targetUniChar, data);
+ data.contextCharToUnicode = (char)sourceChar;
+ break;
+ } //end of switch
+ }//end of CallBack if statement
+
+ if (this.toUnicodeStatus != UConverterConstants.missingCharMarker && !gotoCallBack) {
+ /* write the previously mapped codepoint */
+ WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ }
+
+ if (targetUniChar != UConverterConstants.missingCharMarker && !gotoCallBack) {
+ /* now save the targetUniChar for delayed write */
+ this.toUnicodeStatus = (char)targetUniChar;
+ if (data.resetToDefaultToUnicode) {
+ data.currentDeltaToUnicode = data.defDeltaToUnicode;
+ data.currentMaskToUnicode = data.defMaskToUnicode;
+ data.resetToDefaultToUnicode = false;
+ }
+ } else {
+ /* we reach here only if targetUniChar == missingCharMarker
+ * so assign codes to reason and err
+ */
+ if (!gotoCallBack) {
+ cr = CoderResult.unmappableForLength(1);
+ }
+//CallBack :
+ toUBytesArray[0] = (byte)sourceChar;
+ toULength = 1;
+ gotoCallBack = false;
+ break;
+ }
+ } else {
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+
+ } //end of while
+
+ if (!cr.isError() && !cr.isOverflow() && flush && !source.hasRemaining()) {
+ /*end of the input stream */
+ if (data.contextCharToUnicode == ATR || data.contextCharToUnicode == EXT || data.contextCharToUnicode == ISCII_INV) {
+ /* set toUBytes[] */
+ toUBytesArray[0] = (byte)data.contextCharToUnicode;
+ toULength = 1;
+
+ /* avoid looping on truncated sequences */
+ data.contextCharToUnicode = NO_CHAR_MARKER;
+ } else {
+ toULength = 0;
+ }
+
+ if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) {
+ /* output a remaining target character */
+ WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode);
+ this.toUnicodeStatus = UConverterConstants.missingCharMarker;
+ }
+ }
+ return cr;
+ }
+
+ private CoderResult WriteToTargetToU(IntBuffer offsets, int offset, ByteBuffer source, CharBuffer target, int targetUniChar, short delta) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ /* add offset to current Indic Block */
+ if (targetUniChar > ASCII_END &&
+ targetUniChar != ZWJ &&
+ targetUniChar != ZWNJ &&
+ targetUniChar != DANDA &&
+ targetUniChar != DOUBLE_DANDA) {
+ targetUniChar += delta;
+ }
+ /* now write the targetUniChar */
+ if (target.hasRemaining()) {
+ target.put((char)targetUniChar);
+ if (offsets != null) {
+ offsets.put(offset);
+ }
+ } else {
+ charErrorBufferArray[charErrorBufferLength++] = (char)targetUniChar;
+ cr = CoderResult.OVERFLOW;
+ }
+ return cr;
+ }
+
+ private int GetMapping(short sourceChar, int targetUniChar, UConverterDataISCII data) {
+ targetUniChar = toUnicodeTable[sourceChar];
+ /* is the code point valid in current script? */
+ if (sourceChar > ASCII_END &&
+ (validityTable[(short)targetUniChar & UConverterConstants.UNSIGNED_BYTE_MASK] & data.currentMaskToUnicode) == 0) {
+ /* Vocallic RR is assigne in ISCII Telugu and Unicode */
+ if (data.currentDeltaToUnicode != (TELUGU_DELTA) &&
+ targetUniChar != VOCALLIC_RR) {
+ targetUniChar = UConverterConstants.missingCharMarker;
+ }
+ }
+ return targetUniChar;
+ }
+ }
+
+ /*
+ * Rules:
+ * Explicit Halant :
+ * <HALANT> + <ZWNJ>
+ * Soft Halant :
+ * <HALANT> + <ZWJ>
+ */
+ class CharsetEncoderISCII extends CharsetEncoderICU {
+ public CharsetEncoderISCII(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ extraInfo.initialize();
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
+ int targetByteUnit = 0x0000;
+ int sourceChar = 0x0000;
+ UConverterDataISCII converterData;
+ short newDelta = 0;
+ short range = 0;
+ boolean deltaChanged = false;
+ CoderResult cr;
+
+ /* initialize data */
+ converterData = extraInfo;
+ newDelta = converterData.currentDeltaFromUnicode;
+ range = (short)(newDelta / UniLang.DELTA);
+
+ if ((sourceChar = fromUChar32) != 0) {
+ cr = handleSurrogates(source, (char) sourceChar);
+ return (cr != null) ? cr : CoderResult.unmappableForLength(2);
+ }
+
+ /* writing the char to the output stream */
+ while (true) {
+ if (!source.hasRemaining())
+ return CoderResult.UNDERFLOW;
+ if (!target.hasRemaining())
+ return CoderResult.OVERFLOW;
+
+ targetByteUnit = UConverterConstants.missingCharMarker;
+ sourceChar = source.get();
+
+ /* check if input is in ASCII and C0 control codes range */
+ if (sourceChar <= ASCII_END) {
+ cr = WriteToTargetFromU(offsets, source, target, sourceChar);
+ if (cr.isOverflow()) {
+ break;
+ }
+ if (sourceChar == LF) {
+ targetByteUnit = ATR << 8;
+ targetByteUnit += (byte)lookupInitialData[range].isciiLang;
+ fromUnicodeStatus = sourceChar;
+ /* now append ATR and language code */
+ cr = WriteToTargetFromU(offsets, source, target, targetByteUnit);
+ if (cr.isOverflow()) {
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (sourceChar) {
+ case ZWNJ:
+ /* contextChar has HALANT */
+ if (converterData.contextCharFromUnicode != 0) {
+ converterData.contextCharFromUnicode = 0x00;
+ targetByteUnit = ISCII_HALANT;
+ } else {
+ /* consume ZWNJ and continue */
+ converterData.contextCharFromUnicode = 0x00;
+ continue;
+ }
+ break;
+ case ZWJ:
+ /* contextChar has HALANT */
+ if (converterData.contextCharFromUnicode != 0) {
+ targetByteUnit = ISCII_NUKTA;
+ } else {
+ targetByteUnit = ISCII_INV;
+ }
+ converterData.contextCharFromUnicode = 0x00;
+ break;
+ default:
+ /* is the sourceChar in the INDIC_RANGE? */
+ if((char)(INDIC_BLOCK_END - sourceChar) <= INDIC_RANGE) {
+ /* Danda and Doube Danda are valid in Northern scripts.. since Unicode
+ * does not include these codepoints in all Northern scripts we need to
+ * filter them out
+ */
+ if (sourceChar != DANDA && sourceChar != DOUBLE_DANDA) {
+ /* find out to which block the sourceChar belongs */
+ range = (short)((sourceChar - INDIC_BLOCK_BEGIN) / UniLang.DELTA);
+ newDelta = (short)(range * UniLang.DELTA);
+
+ /* Now are we in the same block as previous? */
+ if (newDelta != converterData.currentDeltaFromUnicode || converterData.isFirstBuffer) {
+ converterData.currentDeltaFromUnicode = newDelta;
+ converterData.currentMaskFromUnicode = lookupInitialData[range].maskEnum;
+ deltaChanged = true;
+ converterData.isFirstBuffer = false;
+ }
+ /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
+ /* now subtract the new delta from sourceChar */
+ sourceChar -= converterData.currentDeltaFromUnicode;
+ }
+ /* get the target byte unit */
+ targetByteUnit = fromUnicodeTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK];
+
+ /* is the code point valid in current script? */
+ if ((validityTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK] & converterData.currentMaskFromUnicode) == 0) {
+ /* Vocallic RR is assigned in ISCII Telugu and Unicode */
+ if (converterData.currentDeltaFromUnicode != (TELUGU_DELTA) && sourceChar != VOCALLIC_RR) {
+ targetByteUnit = UConverterConstants.missingCharMarker;
+ }
+ }
+
+ if (deltaChanged) {
+ /* we are in a script block which is different than
+ * previous sourceChar's script block write ATR and language codes
+ */
+ char temp = 0;
+ temp = (char)(ATR << 8);
+ temp += (char)((short)lookupInitialData[range].isciiLang & UConverterConstants.UNSIGNED_BYTE_MASK);
+ /* reset */
+ deltaChanged = false;
+ /* now append ATR and language code */
+ cr = WriteToTargetFromU(offsets, source, target, temp);
+ if (cr.isOverflow()) {
+ break;
+ }
+ }
+ }
+ /* reset context char */
+ converterData.contextCharFromUnicode = 0x00;
+ break;
+ } //end of switch
+
+ if (targetByteUnit != UConverterConstants.missingCharMarker) {
+ if (targetByteUnit == ISCII_HALANT) {
+ converterData.contextCharFromUnicode = (char)targetByteUnit;
+ }
+ /*write targetByteUnit to target */
+ cr = WriteToTargetFromU(offsets, source, target, targetByteUnit);
+ if (cr.isOverflow()) {
+ break;
+ }
+ } else if (UTF16.isSurrogate((char)sourceChar)) {
+ cr = handleSurrogates(source, (char) sourceChar);
+ return (cr != null) ? cr : CoderResult.unmappableForLength(2);
+ } else {
+ return CoderResult.unmappableForLength(1);
+ }
+ } /* end of while */
+
+ /* save the state and return */
+ return cr;
+ }
+
+ private CoderResult WriteToTargetFromU(IntBuffer offsets, CharBuffer source, ByteBuffer target, int targetByteUnit) {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ /* write the targetUniChar to target */
+ if (target.hasRemaining()) {
+ if (targetByteUnit <= 0xFF) {
+ target.put((byte)targetByteUnit);
+ if (offsets != null) {
+ offsets.put((source.position() - 1));
+ }
+ } else {
+ target.put((byte)(targetByteUnit >> 8));
+ if (offsets != null) {
+ offsets.put((source.position() - 1));
+ }
+ if (target.hasRemaining()) {
+ target.put((byte)targetByteUnit);
+ if (offsets != null) {
+ offsets.put((source.position() - 1));
+ }
+ } else {
+ errorBuffer[errorBufferLength++] = (byte)targetByteUnit;
+ cr = CoderResult.OVERFLOW;
+ }
+ }
+ } else {
+ if ((targetByteUnit & 0xFF00) > 0) {
+ errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 8);
+ }
+ errorBuffer[errorBufferLength++] = (byte)(targetByteUnit);
+ cr = CoderResult.OVERFLOW;
+ }
+ return cr;
+ }
+ }
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderISCII(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderISCII(this);
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetISO2022.java b/src/com/ibm/icu/charset/CharsetISO2022.java
new file mode 100644
index 0000000..b71b0fd
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetISO2022.java
@@ -0,0 +1,3089 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.Arrays;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.charset.CharsetMBCS.MBCSToUFallback;
+import com.ibm.icu.charset.CharsetMBCS.UConverterMBCSTable;
+import com.ibm.icu.charset.CharsetMBCS;
+import com.ibm.icu.charset.CharsetMBCS.CharsetEncoderMBCS;
+import com.ibm.icu.charset.CharsetMBCS.CharsetDecoderMBCS;
+
+class CharsetISO2022 extends CharsetICU {
+ private UConverterDataISO2022 myConverterData;
+ private int variant; // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN}
+
+ private static final byte[] SHIFT_IN_STR = { 0x0f };
+ private static final byte[] SHIFT_OUT_STR = { 0x0e };
+
+ private static final byte CR = 0x0D;
+ private static final byte LF = 0x0A;
+ private static final byte H_TAB = 0x09;
+ private static final byte SPACE = 0x20;
+
+ private static final char HWKANA_START = 0xff61;
+ private static final char HWKANA_END = 0xff9f;
+
+ /*
+ * 94-character sets with native byte values A1..FE are encoded in ISO 2022
+ * as bytes 21..7E. (Subtract 0x80.)
+ * 96-character sets with native bit values A0..FF are encoded in ISO 2022
+ * as bytes 20..7F. (Subtract 0x80.)
+ * Do not encode C1 control codes with native bytes 80..9F
+ * as bytes 00..1F (C0 control codes).
+ */
+ private static final char GR94_START = 0xa1;
+ private static final char GR94_END = 0xfe;
+ private static final char GR96_START = 0xa0;
+ private static final char GR96_END = 0xff;
+
+ /* for ISO-2022-JP and -CN implementations */
+ // typedef enum {
+ /* shared values */
+ private static final byte INVALID_STATE = -1;
+ private static final byte ASCII = 0;
+
+ private static final byte SS2_STATE = 0x10;
+ private static final byte SS3_STATE = 0x11;
+
+ /* JP */
+ private static final byte ISO8859_1 = 1;
+ private static final byte ISO8859_7 = 2;
+ private static final byte JISX201 = 3;
+ private static final byte JISX208 = 4;
+ private static final byte JISX212 = 5;
+ private static final byte GB2312 = 6;
+ private static final byte KSC5601 = 7;
+ private static final byte HWKANA_7BIT = 8; /* Halfwidth Katakana 7 bit */
+
+ /* CN */
+ /* the first few enum constants must keep their values because they corresponds to myConverterArray[] */
+ private static final byte GB2312_1 = 1;
+ private static final byte ISO_IR_165= 2;
+ private static final byte CNS_11643 = 3;
+
+ /*
+ * these are used in StateEnum and ISO2022State variables,
+ * but CNS_11643 must be used to index into myConverterArray[]
+ */
+ private static final byte CNS_11643_0 = 0x20;
+ private static final byte CNS_11643_1 = 0x21;
+ private static final byte CNS_11643_2 = 0x22;
+ private static final byte CNS_11643_3 = 0x23;
+ private static final byte CNS_11643_4 = 0x24;
+ private static final byte CNS_11643_5 = 0x25;
+ private static final byte CNS_11643_6 = 0x26;
+ private static final byte CNS_11643_7 = 0x27;
+ // } StateEnum;
+
+
+ public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
+ super(icuCanonicalName, javaCanonicalName, aliases);
+
+ myConverterData = new UConverterDataISO2022();
+
+ int version = 0;
+ int versionIndex = icuCanonicalName.indexOf("version=");
+ if (versionIndex > 0) {
+ try {
+ version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();
+ } catch (NumberFormatException e) {
+ throw new UnsupportedCharsetException(icuCanonicalName);
+ }
+ }
+
+ myConverterData.version = version;
+
+ if (icuCanonicalName.indexOf("locale=ja") > 0) {
+ ISO2022InitJP(version);
+ } else if (icuCanonicalName.indexOf("locale=zh") > 0) {
+ ISO2022InitCN(version);
+ } else if (icuCanonicalName.indexOf("locale=ko") > 0) {
+ ISO2022InitKR(version);
+ } else {
+ throw new UnsupportedCharsetException(icuCanonicalName);
+ }
+
+ myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
+ myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
+ }
+
+ private void ISO2022InitJP(int version) {
+ variant = ISO_2022_JP;
+
+ maxBytesPerChar = 6;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ // open the required converters and cache them
+ if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) {
+ myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData;
+ }
+ //myConverterData.myConverterArray[JISX201] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-201")).sharedData;
+ myConverterData.myConverterArray[JISX208] = ((CharsetMBCS)CharsetICU.forNameICU("Shift-JIS")).sharedData;
+ if ((jpCharsetMasks[version]&CSM(JISX212)) != 0) {
+ myConverterData.myConverterArray[JISX212] = ((CharsetMBCS)CharsetICU.forNameICU("jisx-212")).sharedData;
+ }
+ if ((jpCharsetMasks[version]&CSM(GB2312)) != 0) {
+ myConverterData.myConverterArray[GB2312] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
+ }
+ if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) {
+ myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData;
+ }
+ myConverterData.name = "ISO_2022,locale=ja,version=" + version;
+
+ // create a generic CharsetMBCS object
+ myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
+ }
+
+ private void ISO2022InitCN(int version) {
+ variant = ISO_2022_CN;
+
+ maxBytesPerChar = 8;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ // open the required coverters and cache them.
+ myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
+ if (version == 1) {
+ myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData;
+ }
+ myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData;
+
+ if (version == 1) {
+ myConverterData.name = "ISO_2022,locale=ja,version=" + version;
+ } else {
+ myConverterData.version = 0;
+ myConverterData.name = "ISO_2022,locale=ja,version=0";
+ }
+
+ // create a generic CharsetMBCS object
+ myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
+ }
+
+ private void ISO2022InitKR(int version) {
+ variant = ISO_2022_KR;
+
+ maxBytesPerChar = 3;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+
+ if (version == 1) {
+ myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
+ myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
+ myConverterData.name = "ISO_2022,locale=ko,version=1";
+ } else {
+ myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949");
+ myConverterData.version = 0;
+ myConverterData.name = "ISO_2022,locale=ko,version=0";
+ }
+
+ myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
+ myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
+ }
+
+ /*
+ * ISO 2022 control codes must not be converted from Unicode
+ * because they would mess up the byte stream.
+ * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
+ * corresponding to SO, SI, and ESC.
+ */
+ private static boolean IS_2022_CONTROL(int c) {
+ return (((c)<0x20) && ((((int)1<<c) & 0x0800c000) != 0));
+ }
+
+ /*
+ * Check that the result is a 2-byte value with each byte in the range A1..FE
+ * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
+ * to move it to the ISO 2022 range 21..7E.
+ * return 0 if out of range.
+ */
+ private static int _2022FromGR94DBCS(int value) {
+ if ((value <= 0xfefe && value >= 0xa1a1) &&
+ ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
+ return (value - 0x8080); /* shift down to 21..7e byte range */
+ } else {
+ return 0; /* not valid for ISO 2022 */
+ }
+ }
+
+ /* is the StateEnum charset value for a DBCS charset? */
+ private static boolean IS_JP_DBCS(byte cs) {
+ return ((JISX208 <= cs) && (cs <= KSC5601));
+ }
+
+ private static short CSM(short cs) {
+ return (short)(1<<cs);
+ }
+
+ private static int getFallback(UConverterMBCSTable mbcsTable, int offset)
+ {
+ MBCSToUFallback[] toUFallbacks;
+ int i, start, limit;
+
+ limit = mbcsTable.countToUFallbacks;
+ if(limit>0) {
+ /* do a binary search for the fallback mapping */
+ toUFallbacks = mbcsTable.toUFallbacks;
+ start = 0;
+ while(start<limit-1) {
+ i = (start+limit)/2;
+ if(offset<toUFallbacks[i].offset) {
+ limit = i;
+ }
+ else {
+ start = i;
+ }
+ }
+
+ /* did we really find it? */
+ if(offset==toUFallbacks[start].offset) {
+ return toUFallbacks[start].codePoint;
+ }
+ }
+
+ return 0xfffe;
+ }
+ /* This gets the valid index of the end of buffer when decoding. */
+ private static int getEndOfBuffer_2022(ByteBuffer source, boolean flush) {
+ int sourceIndex = source.position();
+ byte mySource = 0;
+ mySource = source.get(sourceIndex);
+
+ while (source.hasRemaining() && mySource != ESC_2022) {
+ mySource = source.get();
+ if (mySource == ESC_2022) {
+ break;
+ }
+ sourceIndex++;
+ }
+ return sourceIndex;
+ }
+
+ /*
+ * This is a simple version of _MBCSGetNextUChar() that is used
+ * by other converter implementations.
+ * It only returns an "assigned" result if it consumes the entire input.
+ * It does not use state from the converter, nor error codes.
+ * It does not handle the EBCDIC swaplfnl option (set in UConverter).
+ * It handles conversion extensions but not GB 18030.
+ *
+ * Return value:
+ * U+fffe unassigned
+ * U+ffff illegal
+ * otherwise the Unicode code point
+ */
+ static int MBCSSimpleGetNextUChar(UConverterSharedData sharedData,
+ ByteBuffer source,
+ boolean useFallback) {
+ int[][] stateTable;
+ char[] unicodeCodeUnits;
+
+ int offset;
+ int state;
+ int action;
+
+ int c;
+ int entry;
+
+ int i, length;
+
+ /* set up the local pointers */
+ stateTable=sharedData.mbcs.stateTable;
+ unicodeCodeUnits=sharedData.mbcs.unicodeCodeUnits;
+
+ length = source.limit();
+
+ /* converter state */
+ offset=0;
+ state=sharedData.mbcs.dbcsOnlyState;
+
+ /* conversion loop */
+ for(;;) {
+ if (source.hasRemaining() == false) {
+ /* no input at all: "illegal" */
+ return 0xffff;
+ }
+ int sourceByte = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
+ i = source.position();
+
+ entry = stateTable[state][sourceByte];
+ if (CharsetMBCS.MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state = CharsetMBCS.MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset+=CharsetMBCS.MBCS_ENTRY_TRANSITION_OFFSET(entry);
+
+ if (i == length) {
+ return 0xffff; /* truncated character */
+ }
+ } else {
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action = CharsetMBCS.MBCS_ENTRY_FINAL_ACTION(entry);
+ if(action==CharsetMBCS.MBCS_STATE_VALID_16) {
+ offset+=CharsetMBCS.MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[offset];
+ if(c!=0xfffe) {
+ /* done */
+ } else if (useFallback) {
+ c = getFallback(sharedData.mbcs, offset);
+ /* else done with 0xfffe */
+ }
+ break;
+ } else if(action==CharsetMBCS.MBCS_STATE_VALID_DIRECT_16) {
+ /* output BMP code point */
+ c = CharsetMBCS.MBCS_ENTRY_FINAL_VALUE_16(entry);
+ break;
+ } else if (action==CharsetMBCS.MBCS_STATE_VALID_16_PAIR) {
+ offset += CharsetMBCS.MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c=unicodeCodeUnits[offset++];
+ if(c<0xd800) {
+ /* output BMP code point below 0xd800 */
+ } else if (useFallback ? c<=0xdfff : c<=0xdbff) {
+ /* output roundtrip or fallback supplementary code point */
+ c = (((c&0x3ff)<<10) + unicodeCodeUnits[offset] + (0x10000-0xdc00));
+ } else if(useFallback ? (c&0xfffe)==0xe000 : c==0xe000) {
+ /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
+ c=unicodeCodeUnits[offset];
+ } else if(c==0xffff) {
+ return 0xffff;
+ } else {
+ c=0xfffe;
+ }
+ break;
+ } else if(action==CharsetMBCS.MBCS_STATE_VALID_DIRECT_20) {
+ /* output supplementary code point */
+ c=0x10000+CharsetMBCS.MBCS_ENTRY_FINAL_VALUE(entry);
+ break;
+ } else if(action==CharsetMBCS.MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(!useFallback) {
+ c=0xfffe;
+ break;
+ }
+ /* output BMP code point */
+ c=CharsetMBCS.MBCS_ENTRY_FINAL_VALUE_16(entry);
+ break;
+ } else if(action==CharsetMBCS.MBCS_STATE_FALLBACK_DIRECT_20) {
+ if(!useFallback) {
+ c=0xfffe;
+ break;
+ }
+ /* output supplementary code point */
+ c=0x10000+CharsetMBCS.MBCS_ENTRY_FINAL_VALUE(entry);
+ break;
+ } else if(action==CharsetMBCS.MBCS_STATE_UNASSIGNED) {
+ c=0xfffe;
+ break;
+ }
+
+ /*
+ * forbid MBCS_STATE_CHANGE_ONLY for this function,
+ * and MBCS_STATE_ILLEGAL and reserved action codes
+ */
+ return 0xffff;
+ }
+ }
+
+ if (i != length) {
+ /* illegal for this function: not all input consumed */
+ return 0xffff;
+ }
+
+ if(c==0xfffe) {
+ /* try an extension mapping */
+ ByteBuffer cx=sharedData.mbcs.extIndexes;
+ cx.position(0);
+ if(cx != null) {
+ source.position(0);
+ return extSimpleMatchToU(cx, source, useFallback, sharedData);
+ }
+ }
+
+ return c;
+ }
+ /* This private static method is use by MBCSSimpleGetNextUChar for extension mapping.*/
+ private static int extSimpleMatchToU(ByteBuffer cx, ByteBuffer source, boolean useFallback, UConverterSharedData sharedData) {
+ int[] value = new int[1];
+ int match;
+
+ if (source.remaining() <= 0) {
+ return 0xffff;
+ }
+
+ /* try to match */
+ match = extMatchToU(cx, (byte)-1, source, null, value,
+ useFallback, true, sharedData);
+
+ if (match == source.array().length) {
+ /* write result for simple, single-character conversion */
+ if (CharsetMBCS.TO_U_IS_CODE_POINT(value[0])) {
+ return CharsetMBCS.TO_U_GET_CODE_POINT(value[0]);
+ }
+ }
+
+ /*
+ * return no match because
+ * - match>0 && value points to string: simple conversion cannot handle multiple code points
+ * - match>0 && match!=length: not all input consumed, forbidden for this function
+ * - match==0: no match found in the first place
+ * - match<0: partial match, not supported for simple conversion (and flush==TRUE)
+ */
+ return 0xfffe;
+ }
+ /* This private static method is use by extSimpleMatchToU for extension mapping. */
+ private static int extMatchToU(ByteBuffer cx, byte sisoState, ByteBuffer pre, ByteBuffer src,
+ int[] pMatchValue, boolean isUseFallback, boolean flush, UConverterSharedData sharedData) {
+ IntBuffer toUTable, toUSection;
+
+ int preLength = pre.array().length;
+ int value, matchValue, srcLength;
+ int i, j, index, length, matchLength;
+ short b;
+
+ if (src == null) {
+ srcLength = 0;
+ } else {
+ srcLength = src.array().length;
+ }
+
+ if (cx == null || cx.getInt(CharsetMBCS.EXT_TO_U_LENGTH) <= 0) {
+ return 0; /* no extension data, no match */
+ }
+
+ /* initialize */
+ toUTable = (IntBuffer)CharsetMBCS.ARRAY(cx, CharsetMBCS.EXT_TO_U_INDEX, int.class);//(IntBuffer) ARRAY(cx, EXT_TO_U_INDEX, int.class);
+ index = 0;
+
+ matchValue = 0;
+ i = j = matchLength = 0;
+
+ if (sisoState == 0) {
+ /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
+ if (preLength > 1) {
+ return 0; /* no match of a DBCS sequence in SBCS mode */
+ } else if (preLength == 1) {
+ srcLength = 0;
+ } else /* preLength==0 */{
+ if (srcLength > 1) {
+ srcLength = 1;
+ }
+ }
+ flush = true;
+ }
+
+ /* we must not remember fallback matches when not using fallbacks */
+
+ /* match input units until there is a full match or the input is consumed */
+ for (;;) {
+ /* go to the next section */
+ int oldpos = toUTable.position();
+ toUSection = ((IntBuffer) toUTable.position(index)).slice();
+ toUTable.position(oldpos);
+
+ /* read first pair of the section */
+ value = toUSection.get();
+ length = CharsetMBCS.TO_U_GET_BYTE(value);
+ value = CharsetMBCS.TO_U_GET_VALUE(value);
+ if (value != 0 && (CharsetMBCS.TO_U_IS_ROUNDTRIP(value)) /*|| isToUUseFallback(isUseFallback))*/
+ && CharsetMBCS.TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
+ /* remember longest match so far */
+ matchValue = value;
+ matchLength = i + j;
+ }
+
+ /* match pre[] then src[] */
+ if (i < preLength) {
+ b = (short) (pre.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ } else if (j < srcLength) {
+ b = (short) (src.get(j++) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ } else {
+ /* all input consumed, partial match */
+ if (flush || (length = (i + j)) > CharsetMBCS.MAX_BYTES) {
+ /*
+ * end of the entire input stream, stop with the longest match so far or: partial match must not
+ * be longer than UCNV_EXT_MAX_BYTES because it must fit into state buffers
+ */
+ break;
+ } else {
+ /* continue with more input next time */
+ return -length;
+ }
+ }
+
+ /* search for the current UChar */
+ value = CharsetMBCS.findToU(toUSection, length, b);
+ if (value == 0) {
+ /* no match here, stop with the longest match so far */
+ break;
+ } else {
+ if (CharsetMBCS.TO_U_IS_PARTIAL(value)) {
+ /* partial match, continue */
+ index = CharsetMBCS.TO_U_GET_PARTIAL_INDEX(value);
+ } else {
+ if (CharsetMBCS.TO_U_IS_ROUNDTRIP(value) /*|| isToUUseFallback(isUseFallback)) */&&
+ CharsetMBCS.TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
+ /* full match, stop with result */
+ matchValue = value;
+ matchLength = i + j;
+ } else {
+ /* full match on fallback not taken, stop with the longest match so far */
+ }
+ break;
+ }
+ }
+ }
+
+ if (matchLength == 0) {
+ /* no match at all */
+ return 0;
+ }
+
+ /* return result */
+ pMatchValue[0] = CharsetMBCS.TO_U_MASK_ROUNDTRIP(matchValue);
+ return matchLength;
+ }
+ /*
+ * @param is the the output byte
+ * @return 1 roundtrip byte 0 no mapping -1 fallback byte
+ */
+ static int MBCSSingleFromUChar32(UConverterSharedData sharedData, int c, int[] retval, boolean useFallback) {
+ char[] table;
+ int value;
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ return 0;
+ }
+ /* convert the Unicode code point in c into codepage bytes */
+ table = sharedData.mbcs.fromUnicodeTable;
+ /* get the byte for the output */
+ value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
+ /* get the byte for the output */
+ retval[0] = value & 0xff;
+ if (value >= 0xf00) {
+ return 1; /* roundtrip */
+ } else if (useFallback ? value>=0x800 : value>=0xc00) {
+ return -1; /* fallback taken */
+ } else {
+ return 0; /* no mapping */
+ }
+ }
+
+ /*
+ * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
+ * to whether that charset is used in the corresponding version x of ISO_2022, locale=ja,version=x
+ *
+ * Note: The converter uses some leniency:
+ * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
+ * all versions, not just JIS7 and JIS8.
+ * - ICU does not distinguish between different version so of JIS X 0208.
+ */
+ private static final short jpCharsetMasks[] = {
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)),
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)),
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)),
+ (short)(CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7))
+ };
+
+ // typedef enum {
+ private static final byte ASCII1 = 0;
+ private static final byte LATIN1 = 1;
+ private static final byte SBCS = 2;
+ private static final byte DBCS = 3;
+ private static final byte MBCS = 4;
+ private static final byte HWKANA = 5;
+ // } Cnv2002Type;
+
+ private class ISO2022State {
+ private byte []cs; /* Charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
+ private byte g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
+ private byte prevG; /* g before single shift (SS2 or SS3) */
+
+ ISO2022State() {
+ cs = new byte[4];
+ }
+
+ void reset() {
+ Arrays.fill(cs, (byte)0);
+ g = 0;
+ prevG = 0;
+ }
+
+ void copyFrom(ISO2022State other) {
+ for (int i = 0; i < cs.length; i++) {
+ cs[i] = other.cs[i];
+ }
+ g = other.g;
+ prevG = other.prevG;
+ }
+ }
+
+ private static final byte UCNV_OPTIONS_VERSION_MASK = 0xf;
+ private static final byte UCNV_2022_MAX_CONVERTERS = 10;
+
+ private class UConverterDataISO2022 {
+ UConverterSharedData []myConverterArray;
+ CharsetEncoderMBCS currentEncoder;
+ CharsetDecoderMBCS currentDecoder;
+ CharsetMBCS currentConverter;
+ int currentType; // Cnv2022Type;
+ ISO2022State toU2022State;
+ ISO2022State fromU2022State;
+ int key;
+ int version;
+ String name;
+ String locale;
+ boolean isEmptySegment;
+
+ UConverterDataISO2022() {
+ myConverterArray = new UConverterSharedData[UCNV_2022_MAX_CONVERTERS];
+ toU2022State = new ISO2022State();
+ fromU2022State = new ISO2022State();
+ name = new String();
+ locale = new String();
+ currentType = 0;
+ key = 0;
+ version = 0;
+ isEmptySegment = false;
+ }
+
+ void reset() {
+ toU2022State.reset();
+ fromU2022State.reset();
+ isEmptySegment = false;
+ }
+ }
+
+ private static final byte ESC_2022 = 0x1B; /* ESC */
+
+ // typedef enum {
+ private static final byte INVALID_2022 = -1; /* Doesn't correspond to a valid iso 2022 escape sequence */
+ private static final byte VALID_NON_TERMINAL_2022 = 0; /* so far corresponds to a valid iso 2022 escape sequence */
+ private static final byte VALID_TERMINAL_2022 = 1; /* corresponds to a valid iso 2022 escape sequence */
+ private static final byte VALID_MAYBE_TERMINAL_2022 = 2; /* so far matches one iso 2022 escape sequence, but by adding
+ more characters might match another escape sequence */
+ // } UCNV_TableStates_2022;
+
+ /*
+ * The way these state transition arrays work is:
+ * ex : ESC$B is the sequence for JISX208
+ * a) First Iteration: char is ESC
+ * i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
+ * int x = normalize_esq_chars_2022[27] which is equal to 1
+ * ii) Search for this value in escSeqStateTable_Key_2022[]
+ * value of x is stored at escSeqStateTable_Key_2022[0]
+ * iii) Save this index as offset
+ * iv) Get state of this sequence from escSeqStateTable_Value_2022[]
+ * escSeqStateTable_value_2022[offset], which is VALID_NON_TERMINAL_2022
+ * b) Switch on this state and continue to next char
+ * i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
+ * which is normalize_esq_chars_2022[36] == 4
+ * ii) x is currently 1(from above)
+ * x<<=5 -- x is now 32
+ * x+=normalize_esq_chars_2022[36]
+ * now x is 36
+ * iii) Search for this value in escSeqStateTable_Key_2022[]
+ * value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
+ * iv) Get state of this sequence from escSeqStateTable_Value_2022[]
+ * escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
+ * c) Switch on this state and continue to next char
+ * i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index
+ * ii) x is currently 36 (from above)
+ * x<<=5 -- x is now 1152
+ * x+= normalize_esq_chars_2022[66]
+ * now x is 1161
+ * iii) Search for this value in escSeqStateTable_Key_2022[]
+ * value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
+ * iv) Get state of this sequence from escSeqStateTable_Value_2022[1]
+ * escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
+ * v) Get the converter name from escSeqStateTable_Result_2022[21] which is JISX208
+ */
+ /* Below are the 3 arrays depicting a state transition table */
+ private static final byte normalize_esq_chars_2022[] = {
+ /* 0 1 2 3 4 5 6 7 8 9 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 4, 7, 29, 0,
+ 2, 24, 26, 27, 0, 3, 23, 6, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 5, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 25, 28,
+ 0, 0, 21, 0, 0, 0, 0, 0, 0, 0,
+ 22, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0
+ };
+
+ private static final short MAX_STATES_2022 = 74;
+ private static final int escSeqStateTable_Key_2022[/* MAX_STATES_2022 */] = {
+ /* 0 1 2 3 4 5 6 7 8 9 */
+ 1, 34, 36, 39, 55, 57, 60, 61, 1093, 1096,
+ 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106,
+ 1109, 1154, 1157, 1160, 1161, 1176, 1178, 1179, 1254, 1257,
+ 1768, 1773, 1957, 35105, 36933, 36936, 36937, 36938, 36939, 36940,
+ 36942, 36943, 36944, 36945, 36946, 36947, 36948, 37640, 37642, 37644,
+ 37646, 37711, 37744, 37745, 37746, 37747, 37748, 40133, 40136, 40138,
+ 40139, 40140, 40141, 1123363, 35947624, 35947625, 35947626, 35947627, 35947629, 35947630,
+ 35947631, 35947635, 35947636, 35947638
+ };
+
+ private static final byte escSeqStateTable_Value_2022[/* MAX_STATES_2022 */] = {
+ /* 0 1 2 3 4 */
+ VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_MAYBE_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
+ VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022
+ };
+
+ /* Type def for refactoring changeState_2022 code */
+ // typedef enum {
+ private static final byte ISO_2022_JP = 1;
+ private static final byte ISO_2022_KR = 2;
+ private static final byte ISO_2022_CN = 3;
+ // } Variant2022;
+
+ /* const UConverterSharedData _ISO2022Data; */
+ //private UConverterSharedData _ISO2022JPData;
+ //private UConverterSharedData _ISO2022KRData;
+ //private UConverterSharedData _ISO2022CNData;
+
+ /******************** to unicode ********************/
+ /****************************************************
+ * Recognized escape sequenes are
+ * <ESC>(B ASCII
+ * <ESC>.A ISO-8859-1
+ * <ESC>.F ISO-8859-7
+ * <ESC>(J JISX-201
+ * <ESC>(I JISX-201
+ * <ESC>$B JISX-208
+ * <ESC>$@ JISX-208
+ * <ESC>$(D JISX-212
+ * <ESC>$A GB2312
+ * <ESC>$(C KSC5601
+ */
+ private final static byte nextStateToUnicodeJP[/* MAX_STATES_2022 */] = {
+ /* 0 1 2 3 4 5 6 7 8 9 */
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, SS2_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ ASCII, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, JISX201, HWKANA_7BIT, JISX201, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, JISX208, GB2312, JISX208, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ ISO8859_1, ISO8859_7, JISX208, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, KSC5601, JISX212, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE
+ };
+
+ private final static byte nextStateToUnicodeCN[/* MAX_STATES_2022 */] = {
+ /* 0 1 2 3 4 5 6 7 8 9 */
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, SS2_STATE, SS3_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, GB2312_1, INVALID_STATE, ISO_IR_165,
+ CNS_11643_1, CNS_11643_2, CNS_11643_3, CNS_11643_4, CNS_11643_5, CNS_11643_6, CNS_11643_7, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
+ INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE
+ };
+
+ /* runs through a state machine to determine the escape sequence - codepage correspondence */
+ private CoderResult changeState_2022(CharsetDecoderICU decoder, ByteBuffer source, int var) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ boolean DONE = false;
+ byte value;
+ int key[] = {myConverterData.key};
+ int offset[] = {0};
+ byte c;
+ int malformLength = 0;
+
+ value = VALID_NON_TERMINAL_2022;
+ while (source.hasRemaining()) {
+ c = source.get();
+ malformLength++;
+ decoder.toUBytesArray[decoder.toULength++] = c;
+ value = getKey_2022(c, key, offset);
+
+ switch(value) {
+
+ case VALID_NON_TERMINAL_2022:
+ /* continue with the loop */
+ break;
+
+ case VALID_TERMINAL_2022:
+ key[0] = 0;
+ DONE = true;
+ break;
+
+ case INVALID_2022:
+ DONE = true;
+ break;
+
+ case VALID_MAYBE_TERMINAL_2022:
+ /* not ISO_2022 itself, finish here */
+ value = VALID_TERMINAL_2022;
+ key[0] = 0;
+ DONE = true;
+ break;
+ }
+ if (DONE) {
+ break;
+ }
+ }
+// DONE:
+ myConverterData.key = key[0];
+
+ if (value == VALID_NON_TERMINAL_2022) {
+ /* indicate that the escape sequence is incomplete: key !=0 */
+ return err;
+ } else if (value == INVALID_2022) {
+ return CoderResult.malformedForLength(malformLength);
+ } else /* value == VALID_TERMINAL_2022 */ {
+ switch (var) {
+ case ISO_2022_JP: {
+ byte tempState = nextStateToUnicodeJP[offset[0]];
+ switch (tempState) {
+ case INVALID_STATE:
+ err = CoderResult.malformedForLength(malformLength);
+ break;
+ case SS2_STATE:
+ if (myConverterData.toU2022State.cs[2] != 0) {
+ if (myConverterData.toU2022State.g < 2) {
+ myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
+ }
+ myConverterData.toU2022State.g = 2;
+ } else {
+ /* illegal to have SS2 before a matching designator */
+ err = CoderResult.malformedForLength(malformLength);
+ }
+ break;
+ /* case SS3_STATE: not used in ISO-2022-JP-x */
+ case ISO8859_1:
+ case ISO8859_7:
+ if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
+ err = CoderResult.unmappableForLength(malformLength);
+ } else {
+ /* G2 charset for SS2 */
+ myConverterData.toU2022State.cs[2] = tempState;
+ }
+ break;
+ default:
+ if ((jpCharsetMasks[myConverterData.version] & CSM(tempState)) == 0) {
+ err = CoderResult.unmappableForLength(source.position() - 1);
+ } else {
+ /* G0 charset */
+ myConverterData.toU2022State.cs[0] = tempState;
+ }
+ break;
+ } // end of switch
+ break;
+ }
+ case ISO_2022_CN: {
+ byte tempState = nextStateToUnicodeCN[offset[0]];
+ switch (tempState) {
+ case INVALID_STATE:
+ err = CoderResult.unmappableForLength(malformLength);
+ break;
+ case SS2_STATE:
+ if (myConverterData.toU2022State.cs[2] != 0) {
+ if (myConverterData.toU2022State.g < 2) {
+ myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
+ }
+ myConverterData.toU2022State.g = 2;
+ } else {
+ /* illegal to have SS2 before a matching designator */
+ err = CoderResult.malformedForLength(malformLength);
+ }
+ break;
+ case SS3_STATE:
+ if (myConverterData.toU2022State.cs[3] != 0) {
+ if (myConverterData.toU2022State.g < 2) {
+ myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
+ }
+ myConverterData.toU2022State.g = 3;
+ } else {
+ /* illegal to have SS3 before a matching designator */
+ err = CoderResult.malformedForLength(malformLength);
+ }
+ break;
+ case ISO_IR_165:
+ if (myConverterData.version == 0) {
+ err = CoderResult.unmappableForLength(malformLength);
+ break;
+ }
+ /* fall through */
+ case GB2312_1:
+ /* fall through */
+ case CNS_11643_1:
+ myConverterData.toU2022State.cs[1] = tempState;
+ break;
+ case CNS_11643_2:
+ myConverterData.toU2022State.cs[2] = tempState;
+ break;
+ default:
+ /* other CNS 11643 planes */
+ if (myConverterData.version == 0) {
+ err = CoderResult.unmappableForLength(source.position() - 1);
+ } else {
+ myConverterData.toU2022State.cs[3] = tempState;
+ }
+ break;
+ } //end of switch
+ }
+ break;
+ case ISO_2022_KR:
+ if (offset[0] == 0x30) {
+ /* nothing to be done, just accept this one escape sequence */
+ } else {
+ err = CoderResult.unmappableForLength(malformLength);
+ }
+ break;
+ default:
+ err = CoderResult.malformedForLength(malformLength);
+ break;
+ } // end of switch
+ }
+ if (!err.isError()) {
+ decoder.toULength = 0;
+ }
+ return err;
+ }
+
+ private static byte getKey_2022(byte c, int[]key, int[]offset) {
+ int togo;
+ int low = 0;
+ int hi = MAX_STATES_2022;
+ int oldmid = 0;
+
+ togo = normalize_esq_chars_2022[(short)c&UConverterConstants.UNSIGNED_BYTE_MASK];
+
+ if (togo == 0) {
+ /* not a valid character anywhere in an escape sequence */
+ key[0] = 0;
+ offset[0] = 0;
+ return INVALID_2022;
+ }
+ togo = (key[0] << 5) + togo;
+
+ while (hi != low) { /* binary search */
+ int mid = (hi+low) >> 1; /* Finds median */
+
+ if (mid == oldmid) {
+ break;
+ }
+
+ if (escSeqStateTable_Key_2022[mid] > togo) {
+ hi = mid;
+ } else if (escSeqStateTable_Key_2022[mid] < togo) {
+ low = mid;
+ } else /* we found it */ {
+ key[0] = togo;
+ offset[0] = mid;
+ return escSeqStateTable_Value_2022[mid];
+ }
+ oldmid = mid;
+ }
+ return INVALID_2022;
+ }
+
+ /*
+ * To Unicode Callback helper function
+ */
+ private static CoderResult toUnicodeCallback(CharsetDecoderICU cnv, int sourceChar, int targetUniChar) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ if (sourceChar > 0xff) {
+ cnv.toUBytesArray[0] = (byte)(sourceChar>>8);
+ cnv.toUBytesArray[1] = (byte)sourceChar;
+ cnv.toULength = 2;
+ } else {
+ cnv.toUBytesArray[0] = (byte)sourceChar;
+ cnv.toULength = 1;
+ }
+
+ if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) {
+ err = CoderResult.unmappableForLength(1);
+ } else {
+ err = CoderResult.malformedForLength(1);
+ }
+
+ return err;
+ }
+
+ /****************************ISO-2022-JP************************************/
+ private class CharsetDecoderISO2022JP extends CharsetDecoderICU {
+ public CharsetDecoderISO2022JP(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ myConverterData.reset();
+ }
+ /*
+ * Map 00..7F to Unicode according to JIS X 0201.
+ * */
+ private int jisx201ToU(int value) {
+ if (value < 0x5c) {
+ return value;
+ } else if (value == 0x5c) {
+ return 0xa5;
+ } else if (value == 0x7e) {
+ return 0x203e;
+ } else { /* value <= 0x7f */
+ return value;
+ }
+ }
+ /*
+ * Convert a pair of JIS X 208 21..7E bytes to Shift-JIS.
+ * If either byte is outside 21..7E make sure that the result is not valid
+ * for Shift-JIS so that the converter catches it.
+ * Some invalid byte values already turn into equally invalid Shift-JIS
+ * byte values and need not be tested explicitly.
+ */
+ private void _2022ToSJIS(char c1, char c2, byte []bytes) {
+ if ((c1&1) > 0) {
+ ++c1;
+ if (c2 <= 0x5f) {
+ c2 += 0x1f;
+ } else if (c2 <= 0x7e) {
+ c2 += 0x20;
+ } else {
+ c2 = 0; /* invalid */
+ }
+ } else {
+ if ((c2 >= 0x21) && (c2 <= 0x7e)) {
+ c2 += 0x7e;
+ } else {
+ c2 = 0; /* invalid */
+ }
+ }
+
+ c1 >>=1;
+ if (c1 <= 0x2f) {
+ c1 += 0x70;
+ } else if (c1 <= 0x3f) {
+ c1 += 0xb0;
+ } else {
+ c1 = 0; /* invalid */
+ }
+ bytes[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c1);
+ bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2);
+ }
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ boolean gotoGetTrail = false;
+ boolean gotoEscape = false;
+ CoderResult err = CoderResult.UNDERFLOW;
+ byte []tempBuf = new byte[2];
+ int targetUniChar = 0x0000;
+ int mySourceChar = 0x0000;
+ int mySourceCharTemp = 0x0000; // use for getTrail label call.
+ byte cs; /* StateEnum */
+ byte csTemp= 0; // use for getTrail label call.
+
+ if (myConverterData.key != 0) {
+ /* continue with a partial escape sequence */
+ // goto escape;
+ gotoEscape = true;
+ } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
+ /* continue with a partial double-byte character */
+ mySourceChar = toUBytesArray[0];
+ toULength = 0;
+ cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
+ // goto getTrailByte;
+ mySourceCharTemp = 0x99;
+ gotoGetTrail = true;
+ }
+
+ while (source.hasRemaining() || gotoEscape || gotoGetTrail) {
+ // This code is here for the goto escape label call above.
+ if (gotoEscape) {
+ mySourceCharTemp = ESC_2022;
+ }
+
+ targetUniChar = UConverterConstants.missingCharMarker;
+
+ if (gotoEscape || gotoGetTrail || target.hasRemaining()) {
+ if (!gotoEscape && !gotoGetTrail) {
+ mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();
+ mySourceCharTemp = mySourceChar;
+ }
+
+ switch (mySourceCharTemp) {
+ case UConverterConstants.SI:
+ if (myConverterData.version == 3) {
+ myConverterData.toU2022State.g = 0;
+ continue;
+ } else {
+ /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
+ myConverterData.isEmptySegment = false;
+ break;
+ }
+
+ case UConverterConstants.SO:
+ if (myConverterData.version == 3) {
+ /* JIS7: switch to G1 half-width Katakana */
+ myConverterData.toU2022State.cs[1] = HWKANA_7BIT;
+ myConverterData.toU2022State.g = 1;
+ continue;
+ } else {
+ /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
+ myConverterData.isEmptySegment = false; /* reset this, we have a different error */
+ break;
+ }
+
+ case ESC_2022:
+ if (!gotoEscape) {
+ source.position(source.position() - 1);
+ } else {
+ gotoEscape = false;
+ }
+// escape:
+ {
+ int mySourceBefore = source.position();
+ int toULengthBefore = this.toULength;
+
+ err = changeState_2022(this, source, variant);
+
+ /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
+ if(myConverterData.version == 0 && myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
+ err = CoderResult.malformedForLength(source.position() - mySourceBefore);
+ this.toULength = toULengthBefore + (source.position() - mySourceBefore);
+ }
+ }
+
+ /* invalid or illegal escape sequence */
+ if(err.isError()){
+ myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
+ return err;
+ }
+ /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
+ if(myConverterData.key == 0) {
+ myConverterData.isEmptySegment = true;
+ }
+
+ continue;
+ /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
+ case CR:
+ /* falls through */
+ case LF:
+ /* automatically reset to single-byte mode */
+ if (myConverterData.toU2022State.cs[0] != ASCII && myConverterData.toU2022State.cs[0] != JISX201) {
+ myConverterData.toU2022State.cs[0] = ASCII;
+ }
+ myConverterData.toU2022State.cs[2] = 0;
+ myConverterData.toU2022State.g = 0;
+ /* falls through */
+ default :
+ /* convert one or two bytes */
+ myConverterData.isEmptySegment = false;
+ cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
+ csTemp = cs;
+ if (gotoGetTrail) {
+ csTemp = (byte)0x99;
+ }
+ if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) {
+ /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
+ targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
+
+ /* return from a single-shift state to the previous one */
+ if (myConverterData.toU2022State.g >= 2) {
+ myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
+ }
+ } else {
+ switch(csTemp) {
+ case ASCII:
+ if (mySourceChar <= 0x7f) {
+ targetUniChar = mySourceChar;
+ }
+ break;
+ case ISO8859_1:
+ if (mySourceChar <= 0x7f) {
+ targetUniChar = mySourceChar + 0x80;
+ }
+ /* return from a single-shift state to the prevous one */
+ myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
+ break;
+ case ISO8859_7:
+ if (mySourceChar <= 0x7f) {
+ /* convert mySourceChar+0x80 to use a normal 8-bit table */
+ targetUniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(myConverterData.myConverterArray[cs].mbcs,
+ mySourceChar+0x80);
+ }
+ /* return from a single-shift state to the previous one */
+ myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
+ break;
+ case JISX201:
+ if (mySourceChar <= 0x7f) {
+ targetUniChar = jisx201ToU(mySourceChar);
+ }
+ break;
+ case HWKANA_7BIT:
+ if ((mySourceChar >= 0x21) && (mySourceChar <= 0x5f)) {
+ /* 7-bit halfwidth Katakana */
+ targetUniChar = mySourceChar + (HWKANA_START - 0x21);
+ break;
+ }
+ default :
+ /* G0 DBCS */
+ if (gotoGetTrail || source.hasRemaining()) {
+// getTrailByte:
+ gotoGetTrail = false;
+ byte trailByte;
+ trailByte = source.get();
+ if (cs == JISX208) {
+ _2022ToSJIS((char)(UConverterConstants.UNSIGNED_BYTE_MASK & mySourceChar),
+ (char)(UConverterConstants.UNSIGNED_BYTE_MASK & trailByte), tempBuf);
+ } else {
+ tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & mySourceChar);
+ tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & trailByte);
+ }
+ mySourceChar = (mySourceChar << 8) | (short)(UConverterConstants.UNSIGNED_BYTE_MASK & trailByte);
+ ByteBuffer tempByteBuf = ByteBuffer.wrap(tempBuf);
+ targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], tempByteBuf, false);
+ } else {
+ toUBytesArray[0] = (byte)mySourceChar;
+ toULength = 1;
+ // goto endloop;
+ return err;
+ }
+ } /* end of inner switch */
+ }
+ break;
+ } /* end of outer switch */
+
+ if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) {
+ if (offsets != null) {
+ offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ target.put((char)targetUniChar);
+ } else if (targetUniChar > UConverterConstants.missingCharMarker) {
+ /* disassemble the surrogate pair and write to output */
+ targetUniChar -= 0x0010000;
+ target.put((char)(0xd800 + (char)(targetUniChar>>10)));
+ target.position(target.position()-1);
+ if (offsets != null) {
+ offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ target.get();
+ if (target.hasRemaining()) {
+ target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
+ target.position(target.position()-1);
+ if (offsets != null) {
+ offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ target.get();
+ } else {
+ charErrorBufferArray[charErrorBufferLength++] =
+ (char)(0xdc00+(char)(targetUniChar&0x3ff));
+ }
+ } else {
+ /* Call the callback function */
+ err = toUnicodeCallback(this, mySourceChar, targetUniChar);
+ break;
+ }
+ } else { /* goes with "if (target.hasRemaining())" way up near the top of the function */
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+//endloop:
+ return err;
+ }
+ } // end of class CharsetDecoderISO2022JP
+
+ /****************************ISO-2022-CN************************************/
+ private class CharsetDecoderISO2022CN extends CharsetDecoderICU {
+ public CharsetDecoderISO2022CN(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ myConverterData.reset();
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ byte[] tempBuf = new byte[3];
+ int targetUniChar = 0x0000;
+ int mySourceChar = 0x0000;
+ int mySourceCharTemp = 0x0000;
+ boolean gotoEscape = false;
+ boolean gotoGetTrailByte = false;
+
+ if (myConverterData.key != 0) {
+ /* continue with a partial escape sequence */
+ // goto escape;
+ gotoEscape = true;
+ } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
+ /* continue with a partial double-byte character */
+ mySourceChar = toUBytesArray[0];
+ toULength = 0;
+ // goto getTrailByte
+ gotoGetTrailByte = true;
+ }
+
+ while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
+ targetUniChar = UConverterConstants.missingCharMarker;
+
+ if (target.hasRemaining() || gotoEscape) {
+ if (gotoEscape) {
+ mySourceChar = ESC_2022; // goto escape label
+ mySourceCharTemp = mySourceChar;
+ } else if (gotoGetTrailByte) {
+ mySourceCharTemp = 0xff; // goto getTrailByte; set mySourceCharTemp to go to default
+ } else {
+ mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();
+ mySourceCharTemp = mySourceChar;
+ }
+
+ switch (mySourceCharTemp) {
+ case UConverterConstants.SI:
+ myConverterData.toU2022State.g = 0;
+ if (myConverterData.isEmptySegment) {
+ myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
+ err = CoderResult.malformedForLength(1);
+ this.toUBytesArray[0] = (byte)mySourceChar;
+ this.toULength = 1;
+ return err;
+ }
+ continue;
+
+ case UConverterConstants.SO:
+ if (myConverterData.toU2022State.cs[1] != 0) {
+ myConverterData.toU2022State.g = 1;
+ myConverterData.isEmptySegment = true; /* Begin a new segment, empty so far */
+ continue;
+ } else {
+ /* illegal to have SO before a matching designator */
+ myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */
+ break;
+ }
+
+ case ESC_2022:
+ if (!gotoEscape) {
+ source.position(source.position()-1);
+ }
+// escape label
+ gotoEscape = false;
+ {
+ int mySourceBefore = source.position();
+ int toULengthBefore = this.toULength;
+
+ err = changeState_2022(this, source, ISO_2022_CN);
+
+ /* After SO there must be at least one character before a designator (designator error handled separately) */
+ if(myConverterData.key == 0 && !err.isError() && myConverterData.isEmptySegment) {
+ err = CoderResult.malformedForLength(source.position() - mySourceBefore);
+ this.toULength = toULengthBefore + (source.position() - mySourceBefore);
+ }
+ }
+
+ /* invalid or illegal escape sequence */
+ if(err.isError()){
+ myConverterData.isEmptySegment = false; /* Reset to avoid future spurious errors */
+ return err;
+ }
+ continue;
+
+ /*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
+ case CR:
+ /* falls through */
+ case LF:
+ myConverterData.toU2022State.reset();
+ /* falls through */
+ default:
+ /* converter one or two bytes */
+ myConverterData.isEmptySegment = false;
+ if (myConverterData.toU2022State.g != 0 || gotoGetTrailByte) {
+ if (source.hasRemaining() || gotoGetTrailByte) {
+ UConverterSharedData cnv;
+ byte tempState;
+ int tempBufLen;
+ byte trailByte;
+// getTrailByte: label
+ gotoGetTrailByte = false; // reset gotoGetTrailByte
+
+ trailByte = source.get();
+ tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
+ if (tempState > CNS_11643_0) {
+ cnv = myConverterData.myConverterArray[CNS_11643];
+ tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));
+ tempBuf[1] = (byte)(mySourceChar);
+ tempBuf[2] = trailByte;
+ tempBufLen = 3;
+ } else {
+ cnv = myConverterData.myConverterArray[tempState];
+ tempBuf[0] = (byte)(mySourceChar);
+ tempBuf[1] = trailByte;
+ tempBufLen = 2;
+ }
+ mySourceChar = (mySourceChar << 8) | (UConverterConstants.UNSIGNED_BYTE_MASK & trailByte);
+ if (myConverterData.toU2022State.g >= 2) {
+ /* return from a single-shift state to the previous one */
+ myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
+ }
+ ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);
+ tempBuffer.limit(tempBufLen);
+ tempBuffer.position(0);
+ targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
+ } else {
+ toUBytesArray[0] = (byte)mySourceChar;
+ toULength = 1;
+ return err;
+ }
+ } else {
+ if (mySourceChar <= 0x7f) {
+ targetUniChar = (char)mySourceChar;
+ }
+ }
+ break;
+ }
+ if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) < (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker-1))) {
+ if (offsets != null) {
+ offsets.array()[target.position()] = source.remaining() - (mySourceChar <= 0xff ? 1 : 2);
+ }
+ target.put((char)targetUniChar);
+ } else if ((UConverterConstants.UNSIGNED_INT_MASK&targetUniChar) > (UConverterConstants.UNSIGNED_INT_MASK&(UConverterConstants.missingCharMarker))) {
+ /* disassemble the surrogate pair and write to output */
+ targetUniChar -= 0x0010000;
+ target.put((char)(0xd800+(char)(targetUniChar>>10)));
+ if (offsets != null) {
+ offsets.array()[target.position()-1] = (int)(source.position() - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ if (target.hasRemaining()) {
+ target.put((char)(0xdc00+(char)(targetUniChar&0x3ff)));
+ if (offsets != null) {
+ offsets.array()[target.position()-1] = (int)(source.position() - (mySourceChar <= 0xff ? 1 : 2));
+ }
+ } else {
+ charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff));
+ }
+ } else {
+ /* Call the callback function */
+ err = toUnicodeCallback(this, mySourceChar, targetUniChar);
+ break;
+ }
+
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+
+ return err;
+ }
+
+ }
+ /************************ ISO-2022-KR ********************/
+ private class CharsetDecoderISO2022KR extends CharsetDecoderICU {
+ public CharsetDecoderISO2022KR(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected void implReset() {
+ super.implReset();
+ setInitialStateToUnicodeKR();
+ myConverterData.reset();
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ char mySourceChar = 0x0000;
+ int targetUniChar = 0x0000;
+ byte[] tempBuf = new byte[2];
+ boolean usingFallback;
+ boolean gotoGetTrailByte = false;
+ boolean gotoEscape = false;
+
+ if (myConverterData.version == 1) {
+ return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush);
+ }
+
+ /* initialize state */
+ usingFallback = isFallbackUsed();
+
+ if (myConverterData.key != 0) {
+ /* continue with a partial escape sequence */
+ gotoEscape = true;
+ } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
+ /* continue with a partial double-byte character */
+ mySourceChar = (char)toUBytesArray[0];
+ toULength = 0;
+ gotoGetTrailByte = true;
+ }
+
+ while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
+ if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {
+ if (!gotoGetTrailByte && !gotoEscape) {
+ mySourceChar = (char)(source.get()&UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+
+ if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {
+ myConverterData.toU2022State.g = 0;
+ if (myConverterData.isEmptySegment) {
+ myConverterData.isEmptySegment = false; /* we are handling it, reset to avoid future spurious errors */
+ err = CoderResult.malformedForLength(1);
+ this.toUBytesArray[0] = (byte)mySourceChar;
+ this.toULength = 1;
+ return err;
+ }
+ /* consume the source */
+ continue;
+ } else if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SO) {
+ myConverterData.toU2022State.g = 1;
+ myConverterData.isEmptySegment = true;
+ /* consume the source */
+ continue;
+ } else if (!gotoGetTrailByte && (gotoEscape || mySourceChar == ESC_2022)) {
+ if (!gotoEscape) {
+ source.position(source.position()-1);
+ }
+// escape label
+ gotoEscape = false; // reset gotoEscape flag
+ myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */
+ err = changeState_2022(this, source, ISO_2022_KR);
+ if (err.isError()) {
+ return err;
+ }
+ continue;
+ }
+ myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */
+ if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) {
+ if (source.hasRemaining() || gotoGetTrailByte) {
+// getTrailByte label
+ gotoGetTrailByte = false; // reset gotoGetTrailByte flag
+
+ byte trailByte;
+ trailByte = source.get();
+ tempBuf[0] = (byte)(mySourceChar + 0x80);
+ tempBuf[1] = (byte)(trailByte + 0x80);
+ mySourceChar = (char)((mySourceChar << 8) | (short)(trailByte&UConverterConstants.UNSIGNED_BYTE_MASK));
+ if ((mySourceChar & 0x8080) == 0) {
+ targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback);
+ } else {
+ /* illegal bytes > 0x7f */
+ targetUniChar = UConverterConstants.missingCharMarker;
+ }
+ } else {
+ toUBytesArray[0] = (byte)mySourceChar;
+ toULength = 1;
+ break;
+ }
+ } else {
+ int oldSourceLimit = source.limit();
+ source.limit(source.position());
+ source.position(source.position()-1);
+ targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);
+ source.limit(oldSourceLimit);
+ }
+ if (targetUniChar < 0xfffe) {
+ target.put((char)targetUniChar);
+ if (offsets != null) {
+ offsets.array()[target.position()] = source.position() - (mySourceChar <= 0xff ? 1 : 2);
+ }
+ } else {
+ /* Call the callback function */
+ err = toUnicodeCallback(this, mySourceChar, targetUniChar);
+ break;
+ }
+ } else {
+ err = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+
+ return err;
+ }
+
+ protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+ CoderResult err = CoderResult.UNDERFLOW;
+ int sourceStart;
+ int sourceLimit;
+ int argSource;
+ int argTarget;
+ boolean gotoEscape = false;
+ int oldSourceLimit;
+
+ /* remember the original start of the input for offsets */
+ sourceStart = argSource = source.position();
+
+ if (myConverterData.key != 0) {
+ /* continue with a partial escape sequence */
+ gotoEscape = true;
+ }
+
+ while (gotoEscape || (!err.isError() && source.hasRemaining())) {
+ if (!gotoEscape) {
+ /* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */
+ int oldSourcePos = source.position();
+ sourceLimit = getEndOfBuffer_2022(source, flush);
+ source.position(oldSourcePos);
+ if (source.position() != sourceLimit) {
+ /*
+ * get the current partial byte sequence
+ *
+ * it needs to be moved between the public and the subconverter
+ * so that the conversion frameword, which only sees the public
+ * converter, can handle truncated and illegal input etc.
+ */
+ if (toULength > 0) {
+ cnv.toUBytesArray = (byte[])(toUBytesArray.clone());
+ }
+ cnv.toULength = toULength;
+
+ /*
+ * Convert up to the end of the input, or to before the next escape character.
+ * Does not handle conversion extensions because the preToU[] state etc.
+ * is not copied.
+ */
+ argTarget = target.position();