ICU-2401 r20715 => tags/post-cvs2svn-cleanedup
X-SVN-Rev: 20718
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..5e81efe
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,118 @@
+* text=auto !eol
+
+*.c text !eol
+*.cc text !eol
+*.classpath text !eol
+*.cpp text !eol
+*.css text !eol
+*.dsp text !eol
+*.dsw text !eol
+*.filters text !eol
+*.h text !eol
+*.htm text !eol
+*.html text !eol
+*.in text !eol
+*.java text !eol
+*.launch text !eol
+*.mak text !eol
+*.md text !eol
+*.MF text !eol
+*.mk text !eol
+*.pl text !eol
+*.pm text !eol
+*.project text !eol
+*.properties text !eol
+*.py text !eol
+*.rc text !eol
+*.sh text eol=lf
+*.sln text !eol
+*.stub text !eol
+*.txt text !eol
+*.ucm text !eol
+*.vcproj text !eol
+*.vcxproj text !eol
+*.xml text !eol
+*.xsl text !eol
+*.xslt text !eol
+Makefile text !eol
+configure text !eol
+LICENSE text !eol
+README text !eol
+
+*.bin -text
+*.brk -text
+*.cnv -text
+*.icu -text
+*.res -text
+*.nrm -text
+*.spp -text
+*.tri2 -text
+
+/ee.foundation.jar -text
+src/com/ibm/icu/dev/data/rbbi/english.dict -text
+src/com/ibm/icu/dev/data/testdata.jar -text
+src/com/ibm/icu/dev/data/thai6.ucs -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Asian.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Chinese.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Japanese.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Japanese_h.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Japanese_k.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Korean.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Latin.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Russian.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_SerbianSH.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_SerbianSR.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Simplified_Chinese.txt -text
+src/com/ibm/icu/dev/test/perf/data/collation/TestNames_Thai.txt -text
+src/com/ibm/icu/dev/test/rbbi/rbbitst.txt -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.JDKTimeZone.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.OlsonTimeZone.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.TimeZoneAdapter.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.BigDecimal.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.MathContext.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ArabicShapingException.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.ChineseDateFormatSymbols.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DateFormatSymbols.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.DecimalFormatSymbols.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.MessageFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.NumberFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.RuleBasedNumberFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.SimpleDateFormat.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.text.StringPrepParseException.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.BuddhistCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Calendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ChineseCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.CopticCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.Currency.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.EthiopicCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.GregorianCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.HebrewCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.IslamicCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.JapaneseCalendar.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.SimpleTimeZone.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.TimeZone.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.ULocale.dat -text
+src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.util.UResourceTypeMismatchException.dat -text
+src/com/ibm/icu/dev/tool/docs/icu4j28.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j30.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j32.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j34.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j341.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j342.api.gz -text
+src/com/ibm/icu/dev/tool/docs/icu4j343.api.gz -text
+src/com/ibm/icu/impl/data/icudata.jar -text
+src/com/ibm/icu/impl/data/th.brk -text
+src/com/ibm/richtext/textapps/resources/unicode.arabic.red -text
+src/com/ibm/richtext/textapps/resources/unicode.hebrew.red -text
+
+# The following file types are stored in Git-LFS.
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.dat filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1a59a29
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+/.classpath
+/.clover
+/.externalToolBuilders
+/.project
+/classes
+/doc
diff --git a/APIChangeReport.html b/APIChangeReport.html
new file mode 100644
index 0000000..3f22050
--- /dev/null
+++ b/APIChangeReport.html
@@ -0,0 +1,632 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>ICU4J API Comparison: ICU4J 3.4 with ICU4J 3.6</title>
+<!-- Copyright 2006, IBM, All Rights Reserved. -->
+</head>
+<body>
+<h1>ICU4J API Comparison: ICU4J 3.4 with ICU4J 3.6</h1>
+
+<hr/>
+<h2>Removed from ICU4J 3.4</h2>
+
+<h3>Package com.ibm.icu.util</h3>
+<ul>
+UResourceBundle
+<ul>
+<li>(draft) protected static UResourceBundle <i>instantiateICUResource</i>(java.lang.String, java.lang.String, java.lang.ClassLoader)</li>
+</ul>
+</ul>
+
+
+<hr/>
+<h2>Withdrawn, Deprecated, or Obsoleted in ICU4J 3.6</h2>
+
+<h3>Package com.ibm.icu.lang</h3>
+<ul>
+UCharacter
+<ul>
+<li>(deprecated) public static boolean <i>isJavaLetter</i>(int)</li>
+<li>(deprecated) public static boolean <i>isJavaLetterOrDigit</i>(int)</li>
+<li>(deprecated) public static boolean <i>isSpace</i>(int)</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.text</h3>
+<ul>
+NumberFormat
+<ul>
+<li>(deprecated) protected static java.lang.String <i>getPattern</i>(java.util.Locale, int)</li>
+</ul>
+RuleBasedBreakIterator
+<ul>
+<li><span style='color:red'>*internal* </span>public static RuleBasedBreakIterator <i>getInstanceFromCompiledRules</i>(java.io.InputStream)</li>
+</ul>
+</ul>
+
+
+<hr/>
+<h2>Changed in ICU4J 3.6 (old, new)</h2>
+
+<h3>Package com.ibm.icu.text</h3>
+<ul>
+<li> (stable) public class <i>DictionaryBasedBreakIterator</i> extends com.ibm.icu.text.RuleBasedBreakIterator_Old</br>
+(stable) public class <i>DictionaryBasedBreakIterator</i> extends com.ibm.icu.text.RuleBasedBreakIterator</li>
+<li> (stable) public class <i>UnicodeSet</i> extends com.ibm.icu.text.UnicodeFilter</br>
+(stable) public class <i>UnicodeSet</i> extends com.ibm.icu.text.UnicodeFilter implements com.ibm.icu.util.Freezable</li>
+</ul>
+
+
+<hr/>
+<h2>Promoted to stable in ICU4J 3.6</h2>
+
+<h3>Package com.ibm.icu.lang</h3>
+<ul>
+<li>(stable) public class <i>UCharacterEnums</i></li>
+<li>(stable) public static interface <i>UCharacterEnums.ECharacterCategory</i></li>
+<li>(stable) public static interface <i>UCharacterEnums.ECharacterDirection</i></li>
+UCharacter
+<ul>
+<li>(stable) public static final int MAX_CODE_POINT</li>
+<li>(stable) public static final char MAX_HIGH_SURROGATE</li>
+<li>(stable) public static final char MAX_LOW_SURROGATE</li>
+<li>(stable) public static final char MAX_SURROGATE</li>
+<li>(stable) public static final int MIN_CODE_POINT</li>
+<li>(stable) public static final char MIN_HIGH_SURROGATE</li>
+<li>(stable) public static final char MIN_LOW_SURROGATE</li>
+<li>(stable) public static final int MIN_SUPPLEMENTARY_CODE_POINT</li>
+<li>(stable) public static final char MIN_SURROGATE</li>
+<li>(stable) public static int <i>charCount</i>(int)</li>
+<li>(stable) public static final int <i>codePointAt</i>(char[], int)</li>
+<li>(stable) public static final int <i>codePointAt</i>(char[], int, int)</li>
+<li>(stable) public static final int <i>codePointAt</i>(java.lang.CharSequence, int)</li>
+<li>(stable) public static final int <i>codePointBefore</i>(char[], int)</li>
+<li>(stable) public static final int <i>codePointBefore</i>(char[], int, int)</li>
+<li>(stable) public static final int <i>codePointBefore</i>(java.lang.CharSequence, int)</li>
+<li>(stable) public static int <i>codePointCount</i>(char[], int, int)</li>
+<li>(stable) public static int <i>codePointCount</i>(java.lang.CharSequence, int, int)</li>
+<li>(stable) public static char <i>forDigit</i>(int, int)</li>
+<li>(stable) public static byte <i>getDirectionality</i>(int)</li>
+<li>(stable) public static boolean <i>isHighSurrogate</i>(char)</li>
+<li>(stable) public static boolean <i>isLowSurrogate</i>(char)</li>
+<li>(stable) public static final boolean <i>isSupplementaryCodePoint</i>(int)</li>
+<li>(stable) public static final boolean <i>isSurrogatePair</i>(char, char)</li>
+<li>(stable) public static final boolean <i>isValidCodePoint</i>(int)</li>
+<li>(stable) public static int <i>offsetByCodePoints</i>(char[], int, int, int, int)</li>
+<li>(stable) public static int <i>offsetByCodePoints</i>(java.lang.CharSequence, int, int)</li>
+<li>(stable) public static final char[] <i>toChars</i>(int)</li>
+<li>(stable) public static final int <i>toChars</i>(int, char[], int)</li>
+<li>(stable) public static final int <i>toCodePoint</i>(char, char)</li>
+</ul>
+UCharacter.LineBreak
+<ul>
+<li>(stable) public static final int INSEPARABLE</li>
+</ul>
+UCharacter.UnicodeBlock
+<ul>
+<li>(stable) public static final UCharacter.UnicodeBlock CYRILLIC_SUPPLEMENT</li>
+<li>(stable) public static final int CYRILLIC_SUPPLEMENT_ID</li>
+<li>(stable) public static final UCharacter.UnicodeBlock <i>forName</i>(java.lang.String)</li>
+</ul>
+UProperty
+<ul>
+<li>(stable) public static final int LEAD_CANONICAL_COMBINING_CLASS</li>
+<li>(stable) public static final int NFC_INERT</li>
+<li>(stable) public static final int NFC_QUICK_CHECK</li>
+<li>(stable) public static final int NFD_INERT</li>
+<li>(stable) public static final int NFD_QUICK_CHECK</li>
+<li>(stable) public static final int NFKC_INERT</li>
+<li>(stable) public static final int NFKC_QUICK_CHECK</li>
+<li>(stable) public static final int NFKD_INERT</li>
+<li>(stable) public static final int NFKD_QUICK_CHECK</li>
+<li>(stable) public static final int SEGMENT_STARTER</li>
+<li>(stable) public static final int S_TERM</li>
+<li>(stable) public static final int TRAIL_CANONICAL_COMBINING_CLASS</li>
+<li>(stable) public static final int VARIATION_SELECTOR</li>
+</ul>
+UScript
+<ul>
+<li>(stable) public static final int KATAKANA_OR_HIRAGANA</li>
+<li>(stable) public static final int[] <i>getCode</i>(ULocale)</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.text</h3>
+<ul>
+<li>(stable) public abstract class <i>MeasureFormat</i></li>
+<li>(stable) public class <i>MessageFormat</i></li>
+ChineseDateFormatSymbols
+<ul>
+<li>(stable) protected void <i>initializeData</i>(ULocale, CalendarData)</li>
+</ul>
+Collator
+<ul>
+<li>(stable) public static final ULocale[] <i>getAvailableULocales</i>()</li>
+<li>(stable) public static final ULocale <i>getFunctionalEquivalent</i>(java.lang.String, ULocale)</li>
+<li>(stable) public static final ULocale <i>getFunctionalEquivalent</i>(java.lang.String, ULocale, boolean[])</li>
+<li>(stable) public static final Collator <i>getInstance</i>(ULocale)</li>
+<li>(stable) public static final java.lang.String[] <i>getKeywordValues</i>(java.lang.String)</li>
+<li>(stable) public static final java.lang.String[] <i>getKeywords</i>()</li>
+</ul>
+DateFormat
+<ul>
+<li>(stable) public static final int DOW_LOCAL_FIELD</li>
+<li>(stable) public static final int EXTENDED_YEAR_FIELD</li>
+<li>(stable) public static final int FIELD_COUNT</li>
+<li>(stable) public static final int FRACTIONAL_SECOND_FIELD</li>
+<li>(stable) public static final int JULIAN_DAY_FIELD</li>
+<li>(stable) public static final int MILLISECONDS_IN_DAY_FIELD</li>
+<li>(stable) public static final int TIMEZONE_RFC_FIELD</li>
+<li>(stable) public static final int YEAR_WOY_FIELD</li>
+</ul>
+DateFormatSymbols
+<ul>
+<li>(stable) protected void <i>initializeData</i>(ULocale, CalendarData)</li>
+<li>(stable) protected void <i>initializeData</i>(ULocale, java.lang.String)</li>
+</ul>
+DecimalFormat
+<ul>
+<li>(stable) public boolean <i>areSignificantDigitsUsed</i>()</li>
+<li>(stable) public int <i>getMaximumSignificantDigits</i>()</li>
+<li>(stable) public int <i>getMinimumSignificantDigits</i>()</li>
+<li>(stable) public void <i>setMaximumSignificantDigits</i>(int)</li>
+<li>(stable) public void <i>setMinimumSignificantDigits</i>(int)</li>
+<li>(stable) public void <i>setSignificantDigitsUsed</i>(boolean)</li>
+</ul>
+DecimalFormatSymbols
+<ul>
+<li>(stable) public char <i>getSignificantDigit</i>()</li>
+<li>(stable) public void <i>setSignificantDigit</i>(char)</li>
+</ul>
+NumberFormat
+<ul>
+<li>(stable) public final java.lang.String <i>format</i>(CurrencyAmount)</li>
+<li>(stable) public java.lang.StringBuffer <i>format</i>(CurrencyAmount, java.lang.StringBuffer, java.text.FieldPosition)</li>
+</ul>
+RuleBasedNumberFormat
+<ul>
+<li>(stable) public java.lang.String <i>getDefaultRuleSetName</i>()</li>
+</ul>
+Transliterator
+<ul>
+<li>(stable) public Transliterator[] <i>getElements</i>()</li>
+</ul>
+UTF16
+<ul>
+<li>(stable) public static java.lang.StringBuffer <i>appendCodePoint</i>(java.lang.StringBuffer, int)</li>
+<li>(stable) public static java.lang.String <i>newString</i>(int[], int, int)</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.util</h3>
+<ul>
+<li>(stable) public class <i>CurrencyAmount</i></li>
+<li>(stable) public abstract class <i>Measure</i></li>
+<li>(stable) public abstract class <i>MeasureUnit</i></li>
+<li>(stable) public abstract class <i>UResourceBundle</i></li>
+<li>(stable) public class <i>UResourceTypeMismatchException</i></li>
+LocaleData
+<ul>
+<li>(stable) public static UnicodeSet <i>getExemplarSet</i>(ULocale, int)</li>
+<li>(stable) public static final LocaleData.MeasurementSystem <i>getMeasurementSystem</i>(ULocale)</li>
+<li>(stable) public static final LocaleData.PaperSize <i>getPaperSize</i>(ULocale)</li>
+</ul>
+ULocale
+<ul>
+<li>(stable) public static final ULocale CANADA</li>
+<li>(stable) public static final ULocale CANADA_FRENCH</li>
+<li>(stable) public static final ULocale CHINA</li>
+<li>(stable) public static final ULocale CHINESE</li>
+<li>(stable) public static final ULocale ENGLISH</li>
+<li>(stable) public static final ULocale FRANCE</li>
+<li>(stable) public static final ULocale FRENCH</li>
+<li>(stable) public static final ULocale GERMAN</li>
+<li>(stable) public static final ULocale GERMANY</li>
+<li>(stable) public static final ULocale ITALIAN</li>
+<li>(stable) public static final ULocale ITALY</li>
+<li>(stable) public static final ULocale JAPAN</li>
+<li>(stable) public static final ULocale JAPANESE</li>
+<li>(stable) public static final ULocale KOREA</li>
+<li>(stable) public static final ULocale KOREAN</li>
+<li>(stable) public static final ULocale PRC</li>
+<li>(stable) public static final ULocale SIMPLIFIED_CHINESE</li>
+<li>(stable) public static final ULocale TAIWAN</li>
+<li>(stable) public static final ULocale TRADITIONAL_CHINESE</li>
+<li>(stable) public static final ULocale UK</li>
+<li>(stable) public static final ULocale US</li>
+<li>(stable) public <i>ULocale</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public <i>ULocale</i>(java.lang.String, java.lang.String, java.lang.String)</li>
+<li>(stable) public static java.lang.String <i>canonicalize</i>(java.lang.String)</li>
+<li>(stable) public java.lang.Object <i>clone</i>()</li>
+<li>(stable) public static ULocale <i>createCanonical</i>(java.lang.String)</li>
+<li>(stable) public boolean <i>equals</i>(java.lang.Object)</li>
+<li>(stable) public static ULocale <i>forLocale</i>(java.util.Locale)</li>
+<li>(stable) public static ULocale[] <i>getAvailableLocales</i>()</li>
+<li>(stable) public java.lang.String <i>getBaseName</i>()</li>
+<li>(stable) public static java.lang.String <i>getBaseName</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getCountry</i>()</li>
+<li>(stable) public static java.lang.String <i>getCountry</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getDisplayCountry</i>()</li>
+<li>(stable) public java.lang.String <i>getDisplayCountry</i>(ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayCountry</i>(java.lang.String, ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayCountry</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public static java.lang.String <i>getDisplayKeyword</i>(java.lang.String)</li>
+<li>(stable) public static java.lang.String <i>getDisplayKeyword</i>(java.lang.String, ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayKeyword</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getDisplayKeywordValue</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getDisplayKeywordValue</i>(java.lang.String, ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayKeywordValue</i>(java.lang.String, java.lang.String, ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayKeywordValue</i>(java.lang.String, java.lang.String, java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getDisplayLanguage</i>()</li>
+<li>(stable) public java.lang.String <i>getDisplayLanguage</i>(ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayLanguage</i>(java.lang.String, ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayLanguage</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getDisplayName</i>()</li>
+<li>(stable) public java.lang.String <i>getDisplayName</i>(ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayName</i>(java.lang.String, ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayName</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getDisplayScript</i>()</li>
+<li>(stable) public java.lang.String <i>getDisplayScript</i>(ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayScript</i>(java.lang.String, ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayScript</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getDisplayVariant</i>()</li>
+<li>(stable) public java.lang.String <i>getDisplayVariant</i>(ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayVariant</i>(java.lang.String, ULocale)</li>
+<li>(stable) public static java.lang.String <i>getDisplayVariant</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public ULocale <i>getFallback</i>()</li>
+<li>(stable) public static java.lang.String <i>getFallback</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getISO3Country</i>()</li>
+<li>(stable) public static java.lang.String <i>getISO3Country</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getISO3Language</i>()</li>
+<li>(stable) public static java.lang.String <i>getISO3Language</i>(java.lang.String)</li>
+<li>(stable) public static java.lang.String[] <i>getISOCountries</i>()</li>
+<li>(stable) public static java.lang.String[] <i>getISOLanguages</i>()</li>
+<li>(stable) public java.lang.String <i>getKeywordValue</i>(java.lang.String)</li>
+<li>(stable) public static java.lang.String <i>getKeywordValue</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public java.util.Iterator <i>getKeywords</i>()</li>
+<li>(stable) public static java.util.Iterator <i>getKeywords</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getLanguage</i>()</li>
+<li>(stable) public static java.lang.String <i>getLanguage</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getName</i>()</li>
+<li>(stable) public static java.lang.String <i>getName</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getScript</i>()</li>
+<li>(stable) public static java.lang.String <i>getScript</i>(java.lang.String)</li>
+<li>(stable) public java.lang.String <i>getVariant</i>()</li>
+<li>(stable) public static java.lang.String <i>getVariant</i>(java.lang.String)</li>
+<li>(stable) public int <i>hashCode</i>()</li>
+<li>(stable) public static synchronized void <i>setDefault</i>(ULocale)</li>
+<li>(stable) public ULocale <i>setKeywordValue</i>(java.lang.String, java.lang.String)</li>
+<li>(stable) public static java.lang.String <i>setKeywordValue</i>(java.lang.String, java.lang.String, java.lang.String)</li>
+<li>(stable) public java.lang.String <i>toString</i>()</li>
+</ul>
+</ul>
+
+
+<hr/>
+<h2>Added in ICU4J 3.6</h2>
+
+<h3>Package com.ibm.icu.lang</h3>
+<ul>
+<li><span style='color:red'>*internal* </span>public final class <i>UScriptRun</i></li>
+UCharacter
+<ul>
+<li><span style='color:red'>*internal* </span>public static java.lang.String <i>getName</i>(java.lang.String, java.lang.String)</li>
+<li><span style='color:red'>*internal* </span>public static java.lang.String <i>getStringPropertyValue</i>(int, int, int)</li>
+</ul>
+UCharacter.UnicodeBlock
+<ul>
+<li>(draft) public static final UCharacter.UnicodeBlock BALINESE</li>
+<li>(draft) public static final int BALINESE_ID</li>
+<li>(draft) public static final UCharacter.UnicodeBlock COUNTING_ROD_NUMERALS</li>
+<li>(draft) public static final int COUNTING_ROD_NUMERALS_ID</li>
+<li>(draft) public static final UCharacter.UnicodeBlock CUNEIFORM</li>
+<li>(draft) public static final int CUNEIFORM_ID</li>
+<li>(draft) public static final UCharacter.UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION</li>
+<li>(draft) public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID</li>
+<li>(draft) public static final UCharacter.UnicodeBlock LATIN_EXTENDED_C</li>
+<li>(draft) public static final int LATIN_EXTENDED_C_ID</li>
+<li>(draft) public static final UCharacter.UnicodeBlock LATIN_EXTENDED_D</li>
+<li>(draft) public static final int LATIN_EXTENDED_D_ID</li>
+<li>(draft) public static final UCharacter.UnicodeBlock NKO</li>
+<li>(draft) public static final int NKO_ID</li>
+<li>(draft) public static final UCharacter.UnicodeBlock PHAGS_PA</li>
+<li>(draft) public static final int PHAGS_PA_ID</li>
+<li>(draft) public static final UCharacter.UnicodeBlock PHOENICIAN</li>
+<li>(draft) public static final int PHOENICIAN_ID</li>
+</ul>
+UScript
+<ul>
+<li>(draft) public static final int BALINESE</li>
+<li>(draft) public static final int BATAK</li>
+<li>(draft) public static final int BLISSYMBOLS</li>
+<li>(draft) public static final int BRAHMI</li>
+<li>(draft) public static final int CHAM</li>
+<li>(draft) public static final int CIRTH</li>
+<li>(draft) public static final int CUNEIFORM</li>
+<li>(draft) public static final int DEMOTIC_EGYPTIAN</li>
+<li>(draft) public static final int EASTERN_SYRIAC</li>
+<li>(draft) public static final int EGYPTIAN_HIEROGLYPHS</li>
+<li>(draft) public static final int ESTRANGELO_SYRIAC</li>
+<li>(draft) public static final int HARAPPAN_INDUS</li>
+<li>(draft) public static final int HIERATIC_EGYPTIAN</li>
+<li>(draft) public static final int JAVANESE</li>
+<li>(draft) public static final int KAYAH_LI</li>
+<li>(draft) public static final int KHUTSURI</li>
+<li>(draft) public static final int LATIN_FRAKTUR</li>
+<li>(draft) public static final int LATIN_GAELIC</li>
+<li>(draft) public static final int LEPCHA</li>
+<li>(draft) public static final int LINEAR_A</li>
+<li>(draft) public static final int MANDAEAN</li>
+<li>(draft) public static final int MAYAN_HIEROGLYPHS</li>
+<li>(draft) public static final int MEROITIC</li>
+<li>(draft) public static final int NKO</li>
+<li>(draft) public static final int OLD_CHURCH_SLAVONIC_CYRILLIC</li>
+<li>(draft) public static final int OLD_HUNGARIAN</li>
+<li>(draft) public static final int OLD_PERMIC</li>
+<li>(draft) public static final int ORKHON</li>
+<li>(draft) public static final int PAHAWH_HMONG</li>
+<li>(draft) public static final int PHAGS_PA</li>
+<li>(draft) public static final int PHOENICIAN</li>
+<li>(draft) public static final int PHONETIC_POLLARD</li>
+<li>(draft) public static final int RONGORONGO</li>
+<li>(draft) public static final int SARATI</li>
+<li>(draft) public static final int SIMPLIFIED_HAN</li>
+<li>(draft) public static final int TENGWAR</li>
+<li>(draft) public static final int TRADITIONAL_HAN</li>
+<li>(draft) public static final int UNKNOWN</li>
+<li>(draft) public static final int UNWRITTEN_LANGUAGES</li>
+<li>(draft) public static final int VAI</li>
+<li>(draft) public static final int VISIBLE_SPEECH</li>
+<li>(draft) public static final int WESTERN_SYRIAC</li>
+<li><span style='color:red'>*internal* </span>public static final int <i>getCodeFromName</i>(java.lang.String)</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.text</h3>
+<ul>
+<li><span style='color:red'>*internal* </span>public class <i>BreakDictionary</i></li>
+<li>(draft) public class <i>DateTimePatternGenerator</i></li>
+<li><span style='color:red'>*internal* </span>public static class <i>DateTimePatternGenerator.FormatParser</i></li>
+<li>(draft) public static final class <i>DateTimePatternGenerator.PatternInfo</i></li>
+<li><span style='color:red'>*internal* </span>public static class <i>DateTimePatternGenerator.VariableField</i></li>
+<li>(stable) public static class <i>NumberFormat.Field</i></li>
+<li><span style='color:red'>*internal* </span>public class <i>RuleBasedTransliterator</i></li>
+<li><span style='color:red'>*internal* </span>public static abstract class <i>UnicodeSet.XSymbolTable</i></li>
+ArabicShapingException
+<ul>
+<li><span style='color:red'>*internal* </span>public <i>ArabicShapingException</i>(java.lang.String)</li>
+</ul>
+BreakIterator
+<ul>
+<li><span style='color:red'>*internal* </span>public static BreakIterator <i>getBreakInstance</i>(ULocale, int)</li>
+</ul>
+CanonicalIterator
+<ul>
+<li><span style='color:red'>*internal* </span>public static void <i>permute</i>(java.lang.String, boolean, java.util.Set)</li>
+</ul>
+ChineseDateFormat
+<ul>
+<li><span style='color:red'>*internal* </span>protected void <i>subFormat</i>(java.lang.StringBuffer, char, int, int, java.text.FieldPosition, Calendar)</li>
+</ul>
+DateFormat
+<ul>
+<li>(draft) public static final int QUARTER_FIELD</li>
+<li>(draft) public static final int STANDALONE_QUARTER_FIELD</li>
+</ul>
+DateFormatSymbols
+<ul>
+<li><span style='color:red'>*internal* </span>public static final int DT_CONTEXT_COUNT</li>
+<li><span style='color:red'>*internal* </span>public static final int DT_WIDTH_COUNT</li>
+<li>(draft) public java.lang.String[] <i>getQuarters</i>(int, int)</li>
+<li><span style='color:red'>*internal* </span>public void <i>setEraNames</i>(java.lang.String[])</li>
+<li><span style='color:red'>*internal* </span>public void <i>setMonths</i>(java.lang.String[], int, int)</li>
+<li><span style='color:red'>*internal* </span>public void <i>setQuarters</i>(java.lang.String[], int, int)</li>
+<li><span style='color:red'>*internal* </span>public void <i>setWeekdays</i>(java.lang.String[], int, int)</li>
+</ul>
+DecimalFormat
+<ul>
+<li>(stable) public java.text.AttributedCharacterIterator <i>formatToCharacterIterator</i>(java.lang.Object)</li>
+<li><span style='color:red'>*internal* </span>protected Currency <i>getEffectiveCurrency</i>()</li>
+<li>(stable) public boolean <i>isParseBigDecimal</i>()</li>
+<li>(stable) public void <i>setParseBigDecimal</i>(boolean)</li>
+<li>(draft) public void <i>setRoundingIncrement</i>(BigDecimal)</li>
+</ul>
+DecimalFormatSymbols
+<ul>
+<li>(draft) public char <i>getMonetaryGroupingSeparator</i>()</li>
+<li>(draft) public void <i>setMonetaryGroupingSeparator</i>(char)</li>
+</ul>
+DictionaryBasedBreakIterator
+<ul>
+<li><span style='color:red'>*internal* </span>public <i>DictionaryBasedBreakIterator</i>(java.io.InputStream, java.io.InputStream)</li>
+<li>(draft) public int <i>getRuleStatus</i>()</li>
+<li>(draft) public int <i>getRuleStatusVec</i>(int[])</li>
+<li><span style='color:red'>*internal* </span>protected int <i>handleNext</i>()</li>
+</ul>
+MeasureFormat
+<ul>
+<li><span style='color:red'>*internal* </span>protected <i>MeasureFormat</i>()</li>
+</ul>
+Normalizer
+<ul>
+<li><span style='color:red'>*internal* </span>public static int <i>getFC_NFKC_Closure</i>(int, char[])</li>
+<li><span style='color:red'>*internal* </span>public static java.lang.String <i>getFC_NFKC_Closure</i>(int)</li>
+<li><span style='color:red'>*internal* </span>public static boolean <i>isNFSkippable</i>(int, Normalizer.Mode)</li>
+</ul>
+NumberFormat
+<ul>
+<li><span style='color:red'>*internal* </span>protected Currency <i>getEffectiveCurrency</i>()</li>
+<li>(draft) public boolean <i>isParseStrict</i>()</li>
+<li>(draft) public void <i>setParseStrict</i>(boolean)</li>
+</ul>
+RuleBasedBreakIterator
+<ul>
+<li><span style='color:red'>*internal* </span>protected static java.lang.String fDebugEnv</li>
+<li><span style='color:red'>*internal* </span>protected int fDictionaryCharCount</li>
+<li><span style='color:red'>*internal* </span>protected RBBIDataWrapper fRData</li>
+<li><span style='color:red'>*internal* </span>public static boolean fTrace</li>
+<li><span style='color:red'>*internal* </span>public <i>RuleBasedBreakIterator</i>()</li>
+<li>(stable) protected static final void <i>checkOffset</i>(int, java.text.CharacterIterator)</li>
+<li><span style='color:red'>*internal* </span>public void <i>dump</i>()</li>
+</ul>
+SimpleDateFormat
+<ul>
+<li><span style='color:red'>*internal* </span>public <i>SimpleDateFormat</i>(java.lang.String, DateFormatSymbols, ULocale)</li>
+<li>(stable) protected int <i>matchQuarterString</i>(java.lang.String, int, int, java.lang.String[], Calendar)</li>
+<li>(stable) public void <i>setNumberFormat</i>(NumberFormat)</li>
+<li><span style='color:red'>*internal* </span>protected void <i>subFormat</i>(java.lang.StringBuffer, char, int, int, java.text.FieldPosition, Calendar)</li>
+<li><span style='color:red'>*internal* </span>protected void <i>zeroPaddingNumber</i>(java.lang.StringBuffer, int, int, int)</li>
+</ul>
+Transliterator
+<ul>
+<li>(draft) public static void <i>registerAlias</i>(java.lang.String, java.lang.String)</li>
+</ul>
+UTF16
+<ul>
+<li>(stable) public static int <i>charAt</i>(java.lang.CharSequence, int)</li>
+</ul>
+UnicodeFilter
+<ul>
+<li><span style='color:red'>*internal* </span>protected <i>UnicodeFilter</i>()</li>
+</ul>
+UnicodeSet
+<ul>
+<li><span style='color:red'>*internal* </span>public static final int CASE</li>
+<li><span style='color:red'>*internal* </span>public static final int IGNORE_SPACE</li>
+<li><span style='color:red'>*internal* </span>public <i>UnicodeSet</i>(java.lang.String, int)</li>
+<li><span style='color:red'>*internal* </span>public java.lang.StringBuffer <i>_generatePattern</i>(java.lang.StringBuffer, boolean, boolean)</li>
+<li><span style='color:red'>*internal* </span>public UnicodeSet <i>applyPattern</i>(java.lang.String, int)</li>
+<li><span style='color:red'>*internal* </span>public java.lang.Object <i>cloneAsThawed</i>()</li>
+<li><span style='color:red'>*internal* </span>public UnicodeSet <i>closeOver</i>(int)</li>
+<li><span style='color:red'>*internal* </span>public java.lang.Object <i>freeze</i>()</li>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getRegexEquivalent</i>()</li>
+<li><span style='color:red'>*internal* </span>public boolean <i>isFrozen</i>()</li>
+<li><span style='color:red'>*internal* </span>public int <i>matchesAt</i>(java.lang.CharSequence, int)</li>
+</ul>
+UnicodeSetIterator
+<ul>
+<li><span style='color:red'>*internal* </span>protected int endElement</li>
+<li><span style='color:red'>*internal* </span>protected int nextElement</li>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getString</i>()</li>
+<li><span style='color:red'>*internal* </span>protected void <i>loadRange</i>(int)</li>
+</ul>
+</ul>
+
+<h3>Package com.ibm.icu.util</h3>
+<ul>
+<li><span style='color:red'>*internal* </span>public final class <i>CompactByteArray</i></li>
+<li><span style='color:red'>*internal* </span>public final class <i>CompactCharArray</i></li>
+<li><span style='color:red'>*internal* </span>public interface <i>Freezable</i></li>
+<li>(draft) public class <i>GlobalizationPreferences</i></li>
+<li><span style='color:red'>*internal* </span>public class <i>OverlayBundle</i></li>
+BuddhistCalendar
+<ul>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getType</i>()</li>
+</ul>
+Calendar
+<ul>
+<li>(draft) protected int <i>getDefaultDayInMonth</i>(int, int)</li>
+<li>(draft) protected int <i>getDefaultMonthInYear</i>(int)</li>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getType</i>()</li>
+</ul>
+CaseInsensitiveString
+<ul>
+<li>(stable) public java.lang.String <i>toString</i>()</li>
+</ul>
+ChineseCalendar
+<ul>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getType</i>()</li>
+</ul>
+CopticCalendar
+<ul>
+<li><span style='color:red'>*internal* </span>public static java.lang.Integer[] <i>getDateFromJD</i>(int)</li>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getType</i>()</li>
+</ul>
+Currency
+<ul>
+<li><span style='color:red'>*internal* </span>public static java.lang.String <i>parse</i>(ULocale, java.lang.String, java.text.ParsePosition)</li>
+</ul>
+EthiopicCalendar
+<ul>
+<li><span style='color:red'>*internal* </span>public static java.lang.Integer[] <i>getDateFromJD</i>(int)</li>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getType</i>()</li>
+</ul>
+GregorianCalendar
+<ul>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getType</i>()</li>
+</ul>
+HebrewCalendar
+<ul>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getType</i>()</li>
+</ul>
+IslamicCalendar
+<ul>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getType</i>()</li>
+</ul>
+JapaneseCalendar
+<ul>
+<li>(draft) protected int <i>getDefaultDayInMonth</i>(int, int)</li>
+<li>(draft) protected int <i>getDefaultMonthInYear</i>(int)</li>
+<li><span style='color:red'>*internal* </span>public java.lang.String <i>getType</i>()</li>
+</ul>
+MeasureUnit
+<ul>
+<li><span style='color:red'>*internal* </span>protected <i>MeasureUnit</i>()</li>
+</ul>
+SimpleTimeZone
+<ul>
+<li><span style='color:red'>*internal* </span>public <i>SimpleTimeZone</i>(int, java.lang.String, int, int, int, int, int, int, int, int, int, int, int)</li>
+<li><span style='color:red'>*internal* </span>public <i>SimpleTimeZone</i>(java.util.SimpleTimeZone, java.lang.String)</li>
+<li>(draft) public java.lang.Object <i>clone</i>()</li>
+<li>(draft) public boolean <i>equals</i>(java.lang.Object)</li>
+<li><span style='color:red'>*internal* </span>public int <i>getOffset</i>(int, int, int, int, int, int)</li>
+<li><span style='color:red'>*internal* </span>public int <i>getOffset</i>(int, int, int, int, int, int, int)</li>
+<li><span style='color:red'>*internal* </span>public int <i>getRawOffset</i>()</li>
+<li><span style='color:red'>*internal* </span>public boolean <i>hasSameRules</i>(TimeZone)</li>
+<li>(draft) public int <i>hashCode</i>()</li>
+<li><span style='color:red'>*internal* </span>public boolean <i>inDaylightTime</i>(java.util.Date)</li>
+<li>(draft) public java.lang.String <i>toString</i>()</li>
+<li><span style='color:red'>*internal* </span>public boolean <i>useDaylightTime</i>()</li>
+</ul>
+StringTokenizer
+<ul>
+<li><span style='color:red'>*internal* </span>public <i>StringTokenizer</i>(java.lang.String, UnicodeSet, boolean, boolean)</li>
+<li><span style='color:red'>*internal* </span>public <i>StringTokenizer</i>(java.lang.String, java.lang.String, boolean, boolean)</li>
+</ul>
+TimeZone
+<ul>
+<li><span style='color:red'>*internal* </span>protected static final int MILLIS_PER_DAY</li>
+<li><span style='color:red'>*internal* </span>protected static final int MILLIS_PER_HOUR</li>
+<li>(draft) public boolean <i>equals</i>(java.lang.Object)</li>
+<li>(draft) public int <i>hashCode</i>()</li>
+</ul>
+UResourceBundle
+<ul>
+<li><span style='color:red'>*internal* </span>protected static synchronized void <i>addToCache</i>(java.lang.ClassLoader, java.lang.String, ULocale, UResourceBundle)</li>
+<li><span style='color:red'>*internal* </span>public static UResourceBundle <i>getBundleInstance</i>(java.lang.String, ULocale, java.lang.ClassLoader)</li>
+<li><span style='color:red'>*internal* </span>public static UResourceBundle <i>getBundleInstance</i>(java.lang.String, java.util.Locale, java.lang.ClassLoader)</li>
+<li><span style='color:red'>*internal* </span>protected static synchronized UResourceBundle <i>loadFromCache</i>(java.lang.ClassLoader, java.lang.String, ULocale)</li>
+<li><span style='color:red'>*internal* </span>protected abstract void <i>setLoadingStatus</i>(int)</li>
+</ul>
+UniversalTimeScale
+<ul>
+<li><span style='color:red'>*internal* </span>public static final int EPOCH_OFFSET_MINUS_1_VALUE</li>
+<li><span style='color:red'>*internal* </span>public static final int MAX_ROUND_VALUE</li>
+<li><span style='color:red'>*internal* </span>public static final int MAX_SCALE_VALUE</li>
+<li><span style='color:red'>*internal* </span>public static final int MIN_ROUND_VALUE</li>
+<li><span style='color:red'>*internal* </span>public static final int UNITS_ROUND_VALUE</li>
+<li><span style='color:red'>*internal* </span>public static BigDecimal <i>toBigDecimalTrunc</i>(BigDecimal, int)</li>
+</ul>
+VersionInfo
+<ul>
+<li><span style='color:red'>*internal* </span>public static final java.lang.String ICU_DATA_VERSION</li>
+<li>(draft) public static final VersionInfo UNICODE_5_0</li>
+<li><span style='color:red'>*internal* </span>public static VersionInfo <i>javaVersion</i>()</li>
+</ul>
+</ul>
+
+<hr/>
+<p><i><font size="-1">Contents generated by ReportAPI tool on Mon Sep 18 18:07:27 EDT 2006<br/>Copyright (C) 2006, International Business Machines Corporation, All Rights Reserved.</font></i></p>
+</body>
+</html>
diff --git a/build.properties b/build.properties
new file mode 100644
index 0000000..db82f7e
--- /dev/null
+++ b/build.properties
@@ -0,0 +1,14 @@
+#*
+#*******************************************************************************
+#* Copyright (C) 2006, International Business Machines Corporation and *
+#* others. All Rights Reserved. *
+#*******************************************************************************
+#* This is the properties file for ICU4J builds.
+#*
+icu4j.version.string=3.6
+icu4j.version.number=36
+current.year=2006
+icu4j.previous.version.number=34
+api.report.out=${api.dir}/icu4j_compare_${icu4j.previous.version.number}_${icu4j.version.number}.html
+copyright=Copyright (c) 2000-2006, International Business Machines Corporation and others. All Rights Reserved.
+corp=IBM Corporation
\ No newline at end of file
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..889dbea
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,2455 @@
+<!--
+/*
+*******************************************************************************
+* Copyright (C) 1997-2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+* This is the ant build file for ICU4J. See readme.html for more information.
+*/
+-->
+
+<project name="ICU4J" default="core" basedir=".">
+
+ <!-- ### Begin Clover 1.2 setup ### -->
+ <typedef resource="clovertypes"/>
+ <taskdef resource="clovertasks"/>
+
+ <target name="with.clover">
+ <mkdir dir="tmp/"/>
+ <clover-setup initString="tmp/icu4j.db">
+ <files>
+ <exclude name="**/dev/**/*.java"/>
+ </files>
+ <methodContext name="API" regexp="(.* )?public .*"/>
+ </clover-setup>
+ </target>
+
+ <target name="clover.summary" depends="with.clover">
+ <clover-report >
+ <current outfile="icu4j_html" summary="true">
+ <format type="html" srcLevel="false"/>
+ </current>
+ </clover-report>
+ </target>
+ <target name="clover.report" depends="with.clover">
+ <clover-report >
+ <current outfile="icu4j_html">
+ <format type="html"/>
+ </current>
+ </clover-report>
+ </target>
+ <target name="clover.log" depends="with.clover">
+ <clover-log level="method" filter="private">
+ <package name="com.ibm.icu.text"/>
+ <package name="com.ibm.icu.lang"/>
+ <package name="com.ibm.icu.math"/>
+ <package name="com.ibm.icu.util"/>
+ </clover-log>
+ </target>
+
+ <!-- ### End Clover 1.2 setup ### -->
+
+ <target name="anthack1">
+ <!-- It's a real pain to set properties conditionally.
+ Ant 1.5 isn't really any better than 1.4, in fact 1.5 enforces that
+ properties are write-once so you can't even set a default and then
+ override it, which we used to do for Ant 1.4. -->
+ <condition property="version.1.4">
+ <or>
+ <equals arg1="${ant.java.version}" arg2="1.4"/>
+ <equals arg1="${ant.java.version}" arg2="1.5"/>
+ <equals arg1="${ant.java.version}" arg2="1.6"/>
+ </or>
+ </condition>
+
+ <condition property="doc.params"
+ value="-breakiterator -use -tagletpath ./classes -taglet com.ibm.icu.dev.tool.docs.ICUTaglet -group 'ICU Core' 'com.ibm.icu.lang*:com.ibm.icu.math*:com.ibm.icu.text*:com.ibm.icu.util*:com.ibm.icu.charset' -group 'ICU Tests' 'com.ibm.icu.dev.test*' -group 'Demos' 'com.ibm.icu.dev.demo*' -group 'ICU Tools' 'com.ibm.icu.dev*'">
+ <equals arg1="${version.1.4}" arg2="true"/>
+ </condition>
+
+ <condition property="richeditdoc.params"
+ value="-breakiterator -use -tagletpath ./classes -taglet com.ibm.icu.dev.tool.docs.ICUTaglet -group 'Rich Text Editing' 'com.ibm.richtext*'">
+ <equals arg1="${version.1.4}" arg2="true"/>
+ </condition>
+ </target>
+
+ <target name="anthack2" unless="doc.params">
+ <property name="doc.params" value=""/>
+ </target>
+
+ <target name="init">
+ <tstamp/>
+ <property name="src.dir" value="src"/>
+ <property name="build.dir" value="classes"/>
+ <property name="api.dir" value="${src.dir}/com/ibm/icu/dev/tool/docs"/>
+ <property name="doc.dir" value="doc"/>
+ <property name="testjar.file" value="icu4jtests.jar"/>
+ <property name="jar.file" value="icu4j.jar"/>
+ <property name="charsets.jar.file" value="icu4j-charsets.jar"/>
+ <property name="jarSrc.file" value="icu4jsrc.jar"/>
+ <property name="zipCoreSrc.file" value="icu4jcsrc.zip"/>
+ <property name="zipTestSrc.file" value="icu4jtsrc.zip"/>
+ <property name="jdk.wrapper.jar.file" value="icu4jwrapper.jar"/>
+ <property name="eclipse.dir" value="${src.dir}/com/ibm/icu/dev/eclipse"/>
+ <property name="eclipse.projects.dir" value="eclipseProjects"/>
+ <property name="wrapper.build.dir" value="classes.wrapper"/>
+ <property name="jarDocs.file" value="icu4jdocs.jar"/>
+ <property name="ime.translit.jar.file" value="icutransime.jar"/>
+ <property name="ime.indic.jar.file" value="icuindicime.jar"/>
+ <property name="ime.translit.manifest" value="${src.dir}/com/ibm/icu/dev/tool/ime/translit/manifest.stub"/>
+ <property name="ime.indic.manifest" value="${src.dir}/com/ibm/icu/dev/tool/ime/indic/manifest.stub"/>
+ <property name="zip.file" value="../icu4j${DSTAMP}.zip"/>
+ <property name="zipSrc.file" value="../icu4jSrc${DSTAMP}.zip"/>
+
+ <property file="build.properties" />
+ <!-- fix the data folder every time there is a version update-->
+
+ <property name="icu4j.data.path" value="com/ibm/icu/impl/data/icudt${icu4j.version.number}b/" />
+ <property name="icu4j.testdata.path" value="com/ibm/icu/dev/data/testdata/" />
+ <property name="icu4j.javac.source" value="1.3"/>
+ <property name="icu4j.javac.target" value="1.3"/>
+
+ <path id="build.classpath">
+ <pathelement path="${build.dir}"/>
+ </path>
+
+ <property name="richedit.dir" value="richedit"/>
+ <property name="richedit.doc.dir" value="${richedit.dir}/doc"/>
+ <property name="richedit.jar.file" value="${richedit.dir}/richedit.jar"/>
+ <property name="richedit.zip.file" value="${richedit.dir}/richedit.zip"/>
+ <property name="richedit.manifest" value="${src.dir}/com/ibm/richtext/manifest.stub"/>
+
+ <!-- Load environment variables -->
+ <property environment="env"/>
+
+ <!-- Capture the computer name in a cross-platform manner -->
+ <property name="env.COMPUTERNAME" value="${env.HOSTNAME}"/>
+
+ <mkdir dir="${build.dir}"/>
+ <mkdir dir="${wrapper.build.dir}"/>
+
+ <echo message="java home: ${java.home}"/>
+ <echo message="java version: ${java.version}"/>
+ <echo message="ant java version: ${ant.java.version}"/>
+ <echo message="${ant.version}"/>
+ <echo message="${env.COMPUTERNAME} with ${os.name} ${os.version} on ${os.arch}"/>
+ <echo message="clover initstring = '${clover.initstring}'"/>
+
+ <uptodate property="icu4j.resources" targetfile="${src.dir}/com/ibm/icu/impl/data">
+ <!-- note must not have '/' before 'com' in srcfiles includes arg! -->
+ <srcfiles dir="${build.dir}" includes="${icu4j.data.path}/res_index.res"/>
+ </uptodate>
+
+ <uptodate property="icu4j.testdata.resources" targetfile="${src.dir}/com/ibm/icu/dev/data">
+ <!-- note must not have '/' before 'com' in srcfiles includes arg! -->
+ <srcfiles dir="${build.dir}" includes="${icu4j.testdatadata.path}/root.res"/>
+ </uptodate>
+
+ <uptodate property="icu4j.module.resources" targetfile="${src.dir}/com/ibm/icu/impl/data/icudata.jar" >
+ <srcfiles dir="${build.dir}" includes="${icu4j.data.path}/*.icu"/>
+ </uptodate>
+ <!-- <echo message="icu4j.module.resources result: ${icu4j.module.resources}" /> -->
+ </target>
+
+ <!-- build everything but dist-related stuff -->
+ <target name="all" depends="core,tests,tools,richedit,demos,jar,docs" description="build all primary targets"/>
+
+ <target name="resources" depends="init, icudata, coreData, testdata" description="builds all the resources"/>
+
+ <target name="icudata" depends="init" if="icu4j.resources">
+ <!-- use this target to force resources to be rebuilt -->
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}"/>
+ <touch file="${build.dir}/${icu4j.data.path}/res_index.res"/>
+ <copy todir="${build.dir}/META-INF">
+ <fileset dir="${src.dir}/META-INF" includes="**/*"/>
+ </copy>
+ </target>
+
+<!--
+ use this target to conditionally build resources only if icu4j.resources is set
+ <target name="icu" depends="init" if="icu4j.resources">
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/ICULocaleData.jar" dest="${build.dir}"/>
+ <touch file="${build.dir}/com/ibm/icu/impl/data/LocaleElements_index.class"/>
+ </target>
+ -->
+
+ <!-- core does not build richedit or tests -->
+ <target name="core" depends="init,coreData,icudata" description="build core classes and data">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off"
+ encoding="ascii">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/charset/**/*.java"/>
+ <include name="com/ibm/icu/impl/**/*.java"/>
+ <include name="com/ibm/icu/lang/*.java"/>
+ <include name="com/ibm/icu/math/**/*.java"/>
+ <include name="com/ibm/icu/text/**/*.java"/>
+ <include name="com/ibm/icu/util/**/*.java"/>
+ </javac>
+ </target>
+
+ <target name="tests" depends="core,testdata" description="build tests">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/dev/test/calendar/**/*.java"/>
+ </javac>
+
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/dev/test/collator/**/*.java"/>
+
+ </javac>
+
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/dev/test/format/**/*.java"/>
+ </javac>
+
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/dev/test/translit/**/*.java"/>
+ </javac>
+
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/dev/test/lang/**/*.java"/>
+ <include name="com/ibm/icu/dev/test/normalizer/**/*.java"/>
+ </javac>
+
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/dev/test/serializable/**/*.java"/>
+ </javac>
+
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/dev/test/**/*.java"/>
+ </javac>
+
+ </target>
+
+ <target name="demos" depends="core,testdata" description="build demos">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/dev/demo/**/*.java"/>
+ </javac>
+ </target>
+
+ <target name="buildmangle" depends="init,anthack1" if="version.1.4">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}">
+
+ <include name="com/ibm/icu/dev/tool/docs/CodeMangler.java"/>
+ </javac>
+ </target>
+
+ <target name="mangle" depends="buildmangle" if="version.1.4">
+ <!-- this is a hack for now, need a way to express dependency on the .jpp files
+ and to provide directories or trees of files to the code mangler tool. -->
+ <java classname="com.ibm.icu.dev.tool.docs.CodeMangler" classpath="${build.dir}" logError="true">
+ <arg value="-dVERSION_${ant.java.version}"/>
+ <arg value="${src.dir}/com/ibm/icu/dev/tool/docs/ICUTaglet.java"/>
+ </java>
+ </target>
+
+ <target name="indices" depends="icudata,build_indexgenerator">
+ <java classname="com.ibm.icu.dev.tool.index.IndexGenerator" classpath="${build.dir}" logError="true">
+ <arg value="${build.dir}/${icu4j.data.path}" />
+ </java>
+ <java classname="com.ibm.icu.dev.tool.index.IndexGenerator" classpath="${build.dir}" logError="true">
+ <arg value="${build.dir}/${icu4j.data.path}/rbnf" />
+ </java>
+ <java classname="com.ibm.icu.dev.tool.index.IndexGenerator" classpath="${build.dir}" logError="true">
+ <arg value="${build.dir}/${icu4j.data.path}/coll" />
+ </java>
+ <java classname="com.ibm.icu.dev.tool.index.IndexGenerator" classpath="${build.dir}" logError="true">
+ <arg value="${build.dir}/${icu4j.data.path}/translit" />
+ </java>
+ </target>
+
+ <target name="one4tools" depends="anthack1,anthack2,mangle" if="version.1.4">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+
+ <include name="com/ibm/icu/dev/localeconverter/docs/*.java"/>
+ <include name="com/ibm/icu/dev/tool/docs/*.java"/>
+ </javac>
+ </target>
+
+ <target name="tools" depends="core,one4tools" description="build tools">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="com/ibm/icu/dev/tool/cldr/*"/>
+ <exclude name="com/ibm/icu/dev/tool/localeconverter/*"/>
+ <exclude name="com/ibm/icu/dev/tool/docs/*"/>
+ <exclude name="**/CVS/**/*"/>
+
+ <include name="com/ibm/icu/dev/tool/**/*.java"/>
+ </javac>
+ </target>
+
+ <target name="build_indexgenerator" depends="core">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <include name="com/ibm/icu/dev/tool/index/IndexGenerator.java"/>
+ </javac>
+ </target>
+
+ <!-- use excludefiles below when we move to ant 1.5 -->
+ <target name="docs" depends="anthack1,anthack2,tools" description="build user javadoc">
+ <echo message="doc params: ${doc.params}"/>
+ <mkdir dir="${doc.dir}"/>
+ <javadoc
+ packagenames="com.ibm.icu.lang,com.ibm.icu.text,com.ibm.icu.util,com.ibm.icu.math,com.ibm.icu.charset,com.ibm.icu.dev"
+ sourcepath="${src.dir}"
+ destdir="${doc.dir}"
+ nodeprecatedlist="true"
+ windowtitle="icu4j"
+ doctitle="icu4j"
+ encoding="iso-8859-1"
+ docencoding="iso-8859-1"
+ bottom="<font size=-1>Copyright (c) ${current.year} IBM Corporation and others.</font>"
+ additionalparam="${doc.params}"
+ link="http://java.sun.com/j2se/1.5/docs/api"
+ source="1.4"/>
+ </target>
+
+ <target name="fulldocs" depends="anthack1,anthack2,tools" description="build all javadoc">
+ <echo message="doc params: ${doc.params}"/>
+ <mkdir dir="${doc.dir}"/>
+ <javadoc
+ packagenames="com.ibm.icu.*"
+ sourcepath="${src.dir}"
+ destdir="${doc.dir}"
+ nodeprecatedlist="true"
+ windowtitle="icu4j"
+ doctitle="icu4j"
+ encoding="iso-8859-1"
+ docencoding="iso-8859-1"
+ bottom="<font size=-1>Copyright (c) 2004 IBM Corporation and others.</font>"
+ additionalparam="${doc.params}"
+ link="http://java.sun.com/j2se/1.4/docs/api"
+ source="1.4"/>
+ </target>
+
+ <target name ="coreData" depends="init">
+ <copy todir="${build.dir}/com/ibm/icu/impl/data">
+ <fileset dir="${src.dir}/com/ibm/icu/impl/data"
+ includes="*.icu,*.spp,*.brk"
+ excludes="**/CVS/**/*,Transliterator_Han_Latin_*.txt"/>
+ </copy>
+ </target>
+
+ <target name ="testdata" depends="init">
+ <copy file="${src.dir}/com/ibm/icu/dev/data/rbbi/english.dict"
+ todir="${build.dir}/com/ibm/icu/dev/data/rbbi"/>
+ <copy file="${src.dir}/com/ibm/icu/dev/test/rbbi/rbbitst.txt"
+ todir="${build.dir}/com/ibm/icu/dev/test/rbbi"/>
+ <copy file="${src.dir}/com/ibm/icu/dev/test/charsetdet/CharsetDetectionTests.xml"
+ todir="${build.dir}/com/ibm/icu/dev/test/charsetdet"/>
+ <copy todir="${build.dir}/com/ibm/icu/dev/test/serializable/data">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/test/serializable/data">
+ <include name="**/*.dat"/>
+ </fileset>
+ </copy>
+ <copy file="${src.dir}/com/ibm/icu/dev/data/riwords.txt"
+ todir="${build.dir}/com/ibm/icu/dev/data"/>
+ <copy file="${src.dir}/com/ibm/icu/dev/data/IDNATestInput.txt"
+ todir="${build.dir}/com/ibm/icu/dev/data"/>
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+
+ <include name="com/ibm/icu/dev/data/*.java"/>
+ <include name="com/ibm/icu/dev/data/resources/*.java"/>
+ </javac>
+ <copy todir="${build.dir}/com/ibm/icu/dev/data">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/data"
+ includes="*.spp,*.txt"/>
+ </copy>
+ <copy todir="${build.dir}/com/ibm/icu/dev/data/unicode/">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/data/unicode/"
+ includes="*.txt"/>
+ </copy>
+ <copy file="${src.dir}/com/ibm/icu/dev/test/format/NumberFormatTestCases.txt"
+ todir="${build.dir}/com/ibm/icu/dev/test/format"/>
+ <copy todir="${build.dir}/com/ibm/icu/dev/data/resources/">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/data/resources/"
+ includes="*.properties"/>
+ </copy>
+ <!-- use this target to force resources to be rebuilt -->
+ <unjar src="${src.dir}/com/ibm/icu/dev/data/testdata.jar" dest="${build.dir}"/>
+ <touch file="${build.dir}/${icu4j.testdata.path}/root.res"/>
+ </target>
+
+ <!-- builds richedit and richedit tests -->
+ <target name="richedit" depends="init" description="build richedit classes and tests">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/richtext/**/*.java"/>
+ </javac>
+ <copy todir="${build.dir}/com/ibm/richtext/textapps/resources" overwrite="yes" includeEmptyDirs="no">
+ <fileset dir="${src.dir}/com/ibm/richtext/textapps/resources" includes="*.red"/>
+ </copy>
+ </target>
+
+ <!-- Creates the 3 release jar archives for distribution -->
+ <target name="jarRelease" depends="jar,jarSrc,jarDocs"/>
+
+ <target name="jar" depends="core,indices,charsetsJar" description="build 'icu4j.jar' jar file">
+ <jar jarfile="${jar.file}" compress="true">
+ <fileset dir="${basedir}" includes="license.html, unicode-license.txt" />
+ <fileset dir="${build.dir}"
+ includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*"
+ excludes="META-INF/services/**/*,com/ibm/icu/charset/**/*,${icu4j.data.path}/*.cnv, ${icu4j.data.path}/cnvalias.icu"/>
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value=" ICU for Java"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="charsetsJar" depends="core,indices" description="build 'icu4j-charstes.jar' jar file">
+ <jar jarfile="${charsets.jar.file}" compress="true">
+ <fileset dir="${basedir}" includes="license.html, unicode-license.txt" />
+ <fileset dir="${build.dir}"
+ includes="META-INF/services/**/*,com/ibm/icu/charset/**/*,${icu4j.data.path}/*.cnv, ${icu4j.data.path}/cnvalias.icu"/>
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Charsets"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value=" ICU for Java Charsets"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="jarTests" depends="tests,jar" description="build runtime 'icu4jtests.jar' jar file">
+ <jar jarfile="${testjar.file}" compress="true">
+ <fileset dir="${build.dir}" includes="com/ibm/icu/dev/test/**/*"/>
+ <fileset dir="${build.dir}" includes="com/ibm/icu/dev/data/**/*"/>
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Tests"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value=" ICU for Java Tests"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="jarSrc" depends="init" description="build source 'icu4jsrc.jar' jar file">
+ <!--Create a jar archive of just the source for distribution. The
+ jar file will be created in the directory above the root ICU4J
+ directory. The exclude pattern ${src.dir}/com/ibm/icu/dev/data/unicode/UnicodeData-*.txt
+ and ${src.dir}/com/ibm/icu/dev/data/unicode/SpecialCasing-*.txt
+ eliminates old archive copies like SpecialCasing-2.txt and
+ UnicodeData-2.1.8.txt -->
+ <jar jarfile="${jarSrc.file}"
+ compress="true"
+ basedir="."
+ excludes="${richedit.dir}/**/*,test_*,${src.dir}/com/ibm/icu/dev/data/unicode//UnicodeData-*.txt,${src.dir}/com/ibm/icu/dev/data/unicode/SpecialCasing-*.txt,${src.dir}/com/ibm/icu/dev/data/unicode/CompositionExclusions-*.txt,${obsolete.dir}/**,**/*~,${src.dir}/**/*.class,${build.dir}/**,**/CVS/**/*,${doc.dir}/**,*.jar,*.zip, *.gz">
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Source Files"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value=" ICU for Java"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="jarDocs" depends="docs" description="build documentation 'icu4jdocs.jar' jar file">
+ <jar jarfile="${jarDocs.file}"
+ compress="true"
+ basedir="${doc.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Source Files"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value=" ICU for Java"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="translitIMEJar" depends="collator, transliterator" description="build transliterator IME 'icutransime.jar' jar file">
+ <javac includes="com/ibm/icu/dev/tool/ime/translit/*.java"
+ excludes="**/CVS/**/*"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off"/>
+ <copy file="${src.dir}/com/ibm/icu/dev/tool/ime/translit/Transliterator.properties"
+ todir="${build.dir}/com/ibm/icu/dev/tool/ime/translit"/>
+ <jar jarfile="${ime.translit.jar.file}"
+ compress="true"
+ basedir="${build.dir}"
+ includes="com/ibm/icu/dev/tool/ime/translit/**/*"
+ manifest="${ime.translit.manifest}">
+ <metainf dir="${src.dir}/com/ibm/icu/dev/tool/ime/translit" includes="services/*"/>
+ </jar>
+ </target>
+
+ <target name="indicIMEJar" depends="init" description="build indic IME 'icuindicime.jar' jar file">
+ <javac includes="com/ibm/icu/dev/tool/ime/indic/*.java"
+ excludes="**/CVS/**/*"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off"/>
+ <copy file="${src.dir}/com/ibm/icu/dev/tool/ime/indic/DisplayNames.properties"
+ todir="${build.dir}/com/ibm/icu/dev/tool/ime/indic"/>
+ <jar jarfile="${ime.indic.jar.file}"
+ compress="true"
+ basedir="${build.dir}"
+ includes="com/ibm/icu/dev/tool/ime/indic/**/*"
+ manifest="${ime.indic.manifest}">
+ <metainf dir="${src.dir}/com/ibm/icu/dev/tool/ime/indic" includes="services/*"/>
+ </jar>
+ </target>
+
+<!--
+ <target name="translitimeStandaloneJar" depends="collator, transliterator">
+ <javac includes="com/ibm/icu/dev/tool/ime/translit/*.java"
+ excludes="**/CVS/**/*"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off"/>
+ <copy toDir="${build.dir}/com/ibm/icu/dev/tool/ime/translit">
+ <fileset dir="${src.dir}/com/ibm/icu/dev/tool/ime/translit" includes="*.properties"/>
+ </copy>
+ <jar jarfile="${ime.jar.file}"
+ compress="true"
+ basedir="${build.dir}"
+ includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*,com/ibm/icu/dev/tool/ime/translit/**/*"
+ manifest="${ime.translit.manifest}">
+ <metainf dir="${src.dir}/com/ibm/icu/dev/tool/ime/translit" includes="services/*"/>
+ </jar>
+ </target>
+-->
+
+ <!-- jars up richedit but without tests -->
+ <target name="richeditJar" depends="richedit" description="build richedit runtime 'richedit.jar' jar file">
+ <mkdir dir="${richedit.dir}"/>
+ <jar jarfile="${richedit.jar.file}"
+ compress="true"
+ includes="com/ibm/richtext/**/*"
+ excludes="com/ibm/richtext/test/**/*"
+ basedir="${build.dir}"
+ manifest="${richedit.manifest}"/>
+ </target>
+
+ <!--
+ Note: I used to use -linkoffline http://java.sun.com/products/jdk/1.2/docs/api/ C:\jdk1.2.2\docs\api
+ This links to core docs on Sun's site - a very nice feature. But it requires the 1.2 docs to be
+ on the build machine at a known location.
+ -->
+
+ <!-- the 'public' docs for richedit -->
+ <target name="richeditDocs" depends="init" description="build richedit javadoc">
+ <mkdir dir="${richedit.doc.dir}"/>
+ <javadoc packagenames="com.ibm.richtext.demo,com.ibm.richtext.awtui,com.ibm.richtext.swingui,com.ibm.richtext.textpanel,com.ibm.richtext.styledtext,com.ibm.richtext.textlayout.attributes,com.ibm.richtext.print"
+ sourcepath="${src.dir}"
+ destdir="${richedit.doc.dir}"
+ nodeprecatedlist="true"
+ windowtitle="RichEdit Control"
+ doctitle="RichEdit Control"
+ encoding="iso-8859-1"
+ docencoding="iso-8859-1"
+ bottom="<font size=-1>Copyright (c) 1998-2004 IBM Corporation and others.</font>"/>
+ </target>
+
+ <!-- richedit alphaworks distribution - jar and docs, but no source -->
+ <target name="richeditZip" depends="richeditJar,richeditDocs" description="build richedit zip file">
+ <!--Create a zip archive of the richedit jar and readme -->
+ <copy file="license.html" todir="${richedit.dir}"/>
+ <zip zipfile="${richedit.zip.file}"
+ basedir="${richedit.dir}"
+ includes="doc/**/*,richedit.jar,license.html"/>
+ </target>
+ <target name="deleteCore" depends="init" >
+ <delete failonerror="no">
+ <fileset dir="${build.dir}/com/ibm/icu/text"/>
+ <fileset dir="${build.dir}/com/ibm/icu/impl"/>
+ <fileset dir="${build.dir}/com/ibm/icu/math"/>
+ <fileset dir="${build.dir}/com/ibm/icu/lang"/>
+ <fileset dir="${build.dir}/com/ibm/icu/util"/>
+ <fileset dir="${build.dir}/com/ibm/icu/charset"/>
+ </delete>
+ </target>
+ <target name="check" depends="tests, jar, deleteCore" description="run standard icu4j test suite">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <jvmarg value="-ea"/> <!-- enable assertion checks -->
+ <arg value="-n"/>
+ <classpath>
+ <pathelement path="${java.class.path}/"/>
+ <pathelement location="${jar.file}"/>
+ <pathelement location="${charsets.jar.file}"/>
+ <pathelement location="clover.jar"/>
+ <pathelement path="${build.dir}"/>
+ </classpath>
+ </java>
+ </target>
+
+ <target name="exhaustiveCheck" depends="tests, jar, deleteCore" description="run standard icu4j test suite in exhaustive mode">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <jvmarg value="-ea"/> <!-- enable assertion checks -->
+ <jvmarg value="-Xmx128m"/> <!-- set the max heap size to 128m -->
+ <arg value="-n"/>
+ <arg value="-e10"/>
+ <classpath>
+ <pathelement path="${java.class.path}/"/>
+ <pathelement location="${jar.file}"/>
+ <pathelement location="${charsets.jar.file}"/>
+ <pathelement location="clover.jar"/>
+ <pathelement path="${build.dir}"/>
+ </classpath>
+ </java>
+ </target>
+
+ <target name="secure" depends="jarTests" description="builds icu4j.jar and icu4jtests.jar"/>
+
+ <target name="secureCheck" depends="secure" description="run secure (applet-like) icu4j test suite">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <!-- jvmarg value="-verbose"/ -->
+ <!-- (use for debugging, LOTS of output) <jvmarg value="-Djava.security.debug=access:trace"/ -->
+ <!-- <jvmarg value="-Djava.security.debug=access:failure,domain"/> -->
+ <jvmarg value="-Djava.security.manager"/>
+ <jvmarg value="-Djava.security.policy=${src.dir}/com/ibm/icu/dev/test/security.policy"/>
+ <!--<jvmarg value="-Djava.security.debug=access:failure"/>-->
+ <arg value="-w"/>
+ <arg value="-nothrow"/>
+ <classpath>
+ <pathelement location="clover.jar"/>
+ <pathelement location="${jar.file}"/>
+ <pathelement location="${charsets.jar.file}"/>
+ <pathelement location="${testjar.file}"/>
+ </classpath>
+ </java>
+ </target>
+
+ <target name="noData" depends="core, tests" description="builds ICU4J without any data">
+ <delete failonerror="no">
+ <fileset dir="${build.dir}/${icu4j.data.path}/../" includes="Holiday*.class"/>
+ <fileset dir="${build.dir}/${icu4j.data.path}/../" includes="BreakIterator*.class"/>
+ <fileset dir="${build.dir}/${icu4j.data.path}"/>
+ <fileset dir="${build.dir}/${icu4j.testdata.path}"/>
+ <fileset dir="${build.dir}/${icu4j.testdata.path}../"/>
+ </delete>
+ </target>
+ <target name="noDataCheck" depends="noData" description="runs the tests when no data is present">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <arg value="-nothrow"/>
+ <arg value="-nodata"/>
+ <classpath>
+ <pathelement location="clover.jar"/>
+ <pathelement location="${build.dir}"/>
+ </classpath>
+ </java>
+ </target>
+ <target name="_requires14" unless="version.1.4">
+ <echo message="One of your targets requires JAVA_HOME to be set to JDK version 1.4"/>
+ </target>
+
+ <target name="_checktags" depends="anthack1,anthack2,tools" if="version.1.4">
+ <echo message="doc params: ${doc.params}"/>
+ <javadoc sourcepath="${src.dir}"
+ packagenames="com.ibm.icu.*"
+ excludepackagenames="com.ibm.icu.dev.*,com.ibm.icu.impl.*"
+ classpath="${build.dir}"
+ source="1.4">
+ <doclet name="com.ibm.icu.dev.tool.docs.CheckTags"
+ path="${build.dir}">
+ <!-- <param name="-short"/> -->
+ </doclet>
+ </javadoc>
+ </target>
+
+ <target name="checktags" depends="anthack1,anthack2,_requires14,_checktags" description="check API tags before release"/>
+
+ <target name="gatherapi" depends="tools" if="version.1.4" description="run API database generator tool">
+ <javadoc classpath="${build.dir}"
+ sourcepath="${src.dir}"
+ packagenames="com.ibm.icu.lang,com.ibm.icu.math,com.ibm.icu.text,com.ibm.icu.util"
+ source="1.4">
+ <doclet name="com.ibm.icu.dev.tool.docs.GatherAPIData"
+ path="${build.dir}">
+ <param name="-name" value="ICU4J ${icu4j.version.string}"/>
+ <param name="-output" value="${api.dir}/icu4j${icu4j.version.number}.api"/>
+ <param name="-internal"/>
+ <param name="-gzip"/>
+ </doclet>
+ </javadoc>
+ </target>
+
+ <target name="apireport" depends="tools, gatherapi" if="version.1.4" description="run API report generator tool">
+ <java classname="com.ibm.icu.dev.tool.docs.ReportAPI" classpath="${build.dir}" failonerror="true">
+ <arg value="-old:"/>
+ <arg value="${api.dir}/icu4j${icu4j.previous.version.number}.api.gz"/>
+ <arg value="-new:"/>
+ <arg value="${api.dir}/icu4j${icu4j.version.number}.api.gz"/>
+ <arg value="-html"/>
+ <arg value="-internal"/>
+ <arg value="-out:"/>
+ <arg value="${api.report.out}"/>
+ </java>
+ </target>
+
+ <target name="clean" depends="init" description="remove all build targets">
+ <delete dir="${build.dir}"/>
+ <delete dir="${doc.dir}"/>
+ <delete file="${jar.file}"/>
+ <delete file="${charsets.jar.file}"/>
+ <delete dir="${richedit.dir}"/>
+ <delete file="${testjar.file}"/>
+ <delete file="${jarSrc.file}"/>
+
+ <delete dir="${eclipse.projects.dir}"/>
+ <delete dir="${wrapper.build.dir}"/>
+ <delete file="${zipCoreSrc.file}"/>
+ <delete file="${zipTestSrc.file}"/>
+ <delete file="${jdk.wrapper.jar.file}"/>
+ </target>
+
+ <!--
+ Targets to copy the required *.icu files
+ -->
+ <target name="normalizerData" depends="init" >
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}">
+ <patternset>
+ <!-- arrgh, includes attributes of patternset does not work here in ant 1.5.3 -->
+ <include name="**/unorm.icu"/>
+ <include name="**/uprops.icu"/>
+ <include name="**/pnames.icu"/>
+ <include name="**/unames.icu"/>
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu"/>
+ </target>
+
+ <target name="copyFullPropsData" depends="init" >
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}">
+ <patternset>
+ <!-- arrgh, includes attributes of patternset does not work here in ant 1.5.3 -->
+ <include name="**/unorm.icu"/>
+ <include name="**/uprops.icu"/>
+ <include name="**/ubidi.icu"/>
+ <include name="**/ucase.icu"/>
+ <include name="**/unames.icu"/>
+ <include name="**/pnames.icu"/>
+ <include name="**/*.res"/>
+ <include name="**/*.brk"/>
+ <exclude name="**/coll/*.res"/>
+ <exclude name="**/translit/*.res"/>
+ <exclude name="**/rbnf/*.res"/>
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu"/>
+ </target>
+
+ <target name="copyBasicPropsData" depends="init" >
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}">
+ <patternset>
+ <!-- arrgh, includes attributes of patternset does not work here in ant 1.5.3 -->
+ <include name="**/unorm.icu"/>
+ <include name="**/uprops.icu"/>
+ <include name="**/ubidi.icu"/>
+ <include name="**/ucase.icu"/>
+ <include name="**/pnames.icu"/>
+ <include name="**/unames.icu"/>
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu"/>
+ </target>
+
+ <target name="copyCollatorData" depends="init">
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}">
+ <patternset>
+ <!-- arrgh, includes attributes of patternset does not work here in ant 1.5.3 -->
+ <include name="**/*.icu"/>
+ <include name="**/coll/*.res"/>
+ <include name="**/*.brk"/>
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu"/>
+ </target>
+
+ <target name="copyCalendarData" depends="init">
+ <!-- Calendar does not require Collation data, BreakIterator data -->
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}">
+ <patternset>
+ <!-- arrgh, includes attributes of patternset does not work here in ant 1.5.3 -->
+ <include name="**/unorm.icu"/>
+ <include name="**/uprops.icu"/>
+ <include name="**/pnames.icu"/>
+ <include name="**/unames.icu"/>
+ <include name="**/*.res"/>
+ <exclude name="**/coll/*.res"/>
+ <exclude name="**/translit/*.res"/>
+ <exclude name="**/rbnf/*.res"/>
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu"/>
+ </target>
+ <target name="copyTranslitData" depends="init">
+ <!-- Calendar does not require Collation data, BreakIterator data -->
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}">
+ <patternset>
+ <!-- arrgh, includes attributes of patternset does not work here in ant 1.5.3 -->
+ <include name="**/unorm.icu"/>
+ <include name="**/uprops.icu"/>
+ <include name="**/ubidi.icu"/>
+ <include name="**/ucase.icu"/>
+ <include name="**/unames.icu"/>
+ <include name="**/pnames.icu"/>
+ <include name="**/*.brk"/>
+ <include name="**/translit/*.res"/>
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu"/>
+ </target>
+
+ <target name="copyFormatData" depends="init" if="icu4j.module.resources">
+ <!-- Format does not require BreakIterator data -->
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}">
+ <patternset>
+ <!-- arrgh, includes attributes of patternset does not work here in ant 1.5.3 -->
+ <include name="**/*.icu"/>
+ <include name="**/*.res"/>
+ <exclude name="**/translit/*.res"/>
+ </patternset>
+ </unjar>
+ <touch file="${build.dir}/${icu4j.data.path}/uprops.icu"/>
+ </target>
+
+ <target name="copyAllICUData" depends="init">
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}"/>
+ </target>
+
+ <target name="moduleJar" depends="init" description="modular build of 'icu4j.jar' jar file">
+ <jar jarfile="${jar.file}"
+ compress="true"
+ includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*"
+ basedir="${build.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Modularized Build"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value=" ICU for Java Module"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <!--
+ Targets to build only Normalization API and Normalization tests
+ -->
+
+ <target name="normalizer" depends="init,normalizerData" description="modular build of normalizer services">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/lang/UScriptRun.java"/>
+ <exclude name="com/ibm/icu/impl/ICUListResourceBundle.java"/>
+ <exclude name="com/ibm/icu/impl/UtilityExtensions.java"/>
+ <exclude name="com/ibm/icu/impl/TrieBuilder.java"/>
+ <exclude name="com/ibm/icu/impl/IntTrieBuilder.java"/>
+ <exclude name="com/ibm/icu/impl/BOCU.java"/>
+ <exclude name="com/ibm/icu/impl/UnicodeCharacterIterator.java"/>
+
+ <include name="com/ibm/icu/impl/Assert.java"/>
+ <include name="com/ibm/icu/impl/CharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/CharTrie.java"/>
+ <include name="com/ibm/icu/impl/ICUBinary.java"/>
+ <include name="com/ibm/icu/impl/ICUBinaryStream.java"/>
+ <include name="com/ibm/icu/impl/ICUData.java"/>
+ <include name="com/ibm/icu/impl/ICUDebug.java"/>
+ <include name="com/ibm/icu/impl/ICUListResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleImpl.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleIterator.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleReader.java"/>
+ <include name="com/ibm/icu/impl/IntTrie.java"/>
+ <include name="com/ibm/icu/impl/LocaleUtility.java"/>
+ <include name="com/ibm/icu/impl/NormalizerImpl.java"/>
+ <include name="com/ibm/icu/impl/NormalizerDataReader.java"/>
+ <include name="com/ibm/icu/impl/ReplaceableUCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/ResourceBundleWrapper.java"/>
+ <include name="com/ibm/icu/impl/RuleCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/SortedSetRelation.java"/>
+ <include name="com/ibm/icu/impl/Trie.java"/>
+ <include name="com/ibm/icu/impl/TrieIterator.java"/>
+ <include name="com/ibm/icu/impl/UBiDiProps.java"/>
+ <include name="com/ibm/icu/impl/UCaseProps.java"/>
+ <include name="com/ibm/icu/impl/UCharacter*.java"/>
+ <include name="com/ibm/icu/impl/UCharArrayIterator.java"/>
+ <include name="com/ibm/icu/impl/UPropertyAliases.java"/>
+ <include name="com/ibm/icu/impl/URLHandler.java"/>
+ <include name="com/ibm/icu/impl/USerializedSet.java"/>
+ <include name="com/ibm/icu/impl/Utility.java"/>
+ <include name="com/ibm/icu/lang/**"/>
+ <include name="com/ibm/icu/text/BreakIterator.java"/>
+ <include name="com/ibm/icu/text/CanonicalIterator.java"/>
+ <include name="com/ibm/icu/text/Normalizer.java"/>
+ <include name="com/ibm/icu/text/UCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UForwardCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UTF16.java"/>
+ <include name="com/ibm/icu/text/Replaceable.java"/>
+ <include name="com/ibm/icu/text/ReplaceableString.java"/>
+ <include name="com/ibm/icu/text/SymbolTable.java"/>
+ <include name="com/ibm/icu/text/UnicodeFilter.java"/>
+ <include name="com/ibm/icu/text/UnicodeMatcher.java"/>
+ <include name="com/ibm/icu/text/UnicodeSet.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/util/Freezable.java"/>
+ <include name="com/ibm/icu/util/RangeValueIterator.java"/>
+ <include name="com/ibm/icu/util/StringTokenizer.java"/>
+ <include name="com/ibm/icu/util/ULocale.java"/>
+ <include name="com/ibm/icu/util/UResourceBundle.java"/>
+ <include name="com/ibm/icu/util/UResourceTypeMismatchException.java"/>
+ <include name="com/ibm/icu/util/ValueIterator.java"/>
+ <include name="com/ibm/icu/util/VersionInfo.java"/>
+
+ </javac>
+
+ </target>
+
+ <target name="normalizerTests" depends="normalizer, testdata">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/dev/test/normalizer/TestDeprecatedNormalizerAPI.java"/>
+
+ <include name="com/ibm/icu/dev/test/normalizer/**/*.java"/>
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+
+ </javac>
+ </target>
+
+ <!--
+ Targets to build only Collation API and Collation tests
+ -->
+
+ <target name="collator" depends="init,copyCollatorData" description="modular build of collator services">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/impl/UnicodeCharacterIterator.java"/>
+ <exclude name="com/ibm/icu/impl/UtilityExtensions.java"/>
+ <exclude name="com/ibm/icu/lang/UScriptRun.java"/>
+
+ <include name="com/ibm/icu/impl/Assert.java"/>
+ <include name="com/ibm/icu/impl/BOCU.java"/>
+ <include name="com/ibm/icu/impl/CharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/CharTrie.java"/>
+ <include name="com/ibm/icu/impl/data/*Break*.java"/>
+ <include name="com/ibm/icu/impl/ICUBinary.java"/>
+ <include name="com/ibm/icu/impl/ICUBinaryStream.java"/>
+ <include name="com/ibm/icu/impl/ICUData.java"/>
+ <include name="com/ibm/icu/impl/ICUDebug.java"/>
+ <include name="com/ibm/icu/impl/ICULocaleService.java"/>
+ <include name="com/ibm/icu/impl/ICUNotifier.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleImpl.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleIterator.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleReader.java"/>
+ <include name="com/ibm/icu/impl/ICURWLock.java"/>
+ <include name="com/ibm/icu/impl/ICUService.java"/>
+ <include name="com/ibm/icu/impl/ImplicitCEGenerator.java"/>
+ <include name="com/ibm/icu/impl/IntTrie.java"/>
+ <include name="com/ibm/icu/impl/IntTrieBuilder.java"/>
+ <include name="com/ibm/icu/impl/LocaleUtility.java"/>
+ <include name="com/ibm/icu/impl/NormalizerDataReader.java"/>
+ <include name="com/ibm/icu/impl/NormalizerImpl.java"/>
+ <include name="com/ibm/icu/impl/ReplaceableUCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/ResourceBundleWrapper.java"/>
+ <include name="com/ibm/icu/impl/RuleCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/SortedSetRelation.java"/>
+ <include name="com/ibm/icu/impl/StringUCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/Trie.java"/>
+ <include name="com/ibm/icu/impl/TrieBuilder.java"/>
+ <include name="com/ibm/icu/impl/TrieIterator.java"/>
+ <include name="com/ibm/icu/impl/UBiDiProps.java"/>
+ <include name="com/ibm/icu/impl/UCaseProps.java"/>
+ <include name="com/ibm/icu/impl/UCharArrayIterator.java"/>
+ <include name="com/ibm/icu/impl/UCharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/UCharacterName.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameChoice.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterProperty.java"/>
+ <include name="com/ibm/icu/impl/UCharacterPropertyReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterUtility.java"/>
+ <include name="com/ibm/icu/impl/URLHandler.java"/>
+ <include name="com/ibm/icu/impl/UPropertyAliases.java"/>
+ <include name="com/ibm/icu/impl/USerializedSet.java"/>
+ <include name="com/ibm/icu/impl/Utility.java"/>
+ <include name="com/ibm/icu/lang/*.java"/>
+ <include name="com/ibm/icu/text/BreakDictionary.java"/>
+ <include name="com/ibm/icu/text/BreakIterator.java"/>
+ <include name="com/ibm/icu/text/BreakIteratorFactory.java"/>
+ <include name="com/ibm/icu/text/CanonicalIterator.java"/>
+ <include name="com/ibm/icu/text/Collation.java"/>
+ <include name="com/ibm/icu/text/CollationElementIterator.java"/>
+ <include name="com/ibm/icu/text/CollationKey.java"/>
+ <include name="com/ibm/icu/text/CollationParsedRuleBuilder.java"/>
+ <include name="com/ibm/icu/text/CollationRuleParser.java"/>
+ <include name="com/ibm/icu/text/Collator.java"/>
+ <include name="com/ibm/icu/text/CollatorReader.java"/>
+ <include name="com/ibm/icu/text/CollatorServiceShim.java"/>
+ <include name="com/ibm/icu/text/DictionaryBasedBreakIterator.java"/>
+ <include name="com/ibm/icu/text/Normalizer.java"/>
+ <include name="com/ibm/icu/text/RawCollationKey.java"/>
+ <include name="com/ibm/icu/text/RBBIDataWrapper.java"/>
+ <include name="com/ibm/icu/text/Replaceable.java"/>
+ <include name="com/ibm/icu/text/ReplaceableString.java"/>
+ <include name="com/ibm/icu/text/RuleBasedBreakIterator.java"/>
+ <include name="com/ibm/icu/text/RuleBasedBreakIterator_Old.java"/>
+ <include name="com/ibm/icu/text/RuleBasedBreakIterator_New.java"/>
+ <include name="com/ibm/icu/text/RuleBasedCollator.java"/>
+ <include name="com/ibm/icu/text/SymbolTable.java"/>
+ <include name="com/ibm/icu/text/UCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UForwardCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UnicodeFilter.java"/>
+ <include name="com/ibm/icu/text/UnicodeMatcher.java"/>
+ <include name="com/ibm/icu/text/UnicodeSet.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/text/UTF16.java"/>
+ <include name="com/ibm/icu/util/ByteArrayWrapper.java"/>
+ <include name="com/ibm/icu/util/CompactByteArray.java"/>
+ <include name="com/ibm/icu/util/RangeValueIterator.java"/>
+ <include name="com/ibm/icu/util/StringTokenizer.java"/>
+ <include name="com/ibm/icu/util/ULocale.java"/>
+ <include name="com/ibm/icu/util/UResourceBundle.java"/>
+ <include name="com/ibm/icu/util/UResourceTypeMismatchException.java"/>
+ <include name="com/ibm/icu/util/ValueIterator.java"/>
+ <include name="com/ibm/icu/util/VersionInfo.java"/>
+
+ </javac>
+ </target>
+
+ <target name="collatorTests" depends="collator, testdata" description="test collator modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/dev/test/collator/RandomCollator.java"/>
+
+ <include name="com/ibm/icu/dev/test/collator/**/*.java"/>
+ <include name="com/ibm/icu/dev/test/search/**/*.java"/>
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+ </javac>
+ </target>
+
+ <!--
+ Targets to build only Character properties API and tests
+ -->
+
+ <target name="propertiesBasic" depends="init, propertiesClasses, copyBasicPropsData" description="modular build of basic character properties">
+ </target>
+
+ <target name="propertiesFull" depends="init,propertiesClasses,copyFullPropsData" description="modular build of full character properties">
+ </target>
+
+ <target name="propertiesClasses">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/impl/UtilityExtensions.java"/>
+
+ <include name="com/ibm/icu/lang/**/*"/>
+ <include name="com/ibm/icu/impl/data/*Break*.java"/>
+ <include name="com/ibm/icu/impl/Assert.java"/>
+ <include name="com/ibm/icu/impl/CharTrie.java"/>
+ <include name="com/ibm/icu/impl/CharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/ICUBinary.java"/>
+ <include name="com/ibm/icu/impl/ICUBinaryStream.java"/>
+ <include name="com/ibm/icu/impl/ICUData.java"/>
+ <include name="com/ibm/icu/impl/ICUDebug.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleImpl.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleIterator.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleReader.java"/>
+ <include name="com/ibm/icu/impl/IntTrie.java"/>
+ <include name="com/ibm/icu/impl/LocaleUtility.java"/>
+ <include name="com/ibm/icu/impl/NormalizerDataReader.java"/>
+ <include name="com/ibm/icu/impl/NormalizerImpl.java"/>
+ <include name="com/ibm/icu/impl/ResourceBundleWrapper.java"/>
+ <include name="com/ibm/icu/impl/ReplaceableUCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/RuleCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/SortedSetRelation.java"/>
+ <include name="com/ibm/icu/impl/Trie.java"/>
+ <include name="com/ibm/icu/impl/TrieBuilder.java"/>
+ <include name="com/ibm/icu/impl/TrieIterator.java"/>
+ <include name="com/ibm/icu/impl/UBiDiProps.java"/>
+ <include name="com/ibm/icu/impl/UCaseProps.java"/>
+ <include name="com/ibm/icu/impl/UCharArrayIterator.java"/>
+ <include name="com/ibm/icu/impl/UCharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/UCharacterName.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameChoice.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterProperty.java"/>
+ <include name="com/ibm/icu/impl/UCharacterPropertyReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterUtility.java"/>
+ <include name="com/ibm/icu/impl/UPropertyAliases.java"/>
+ <include name="com/ibm/icu/impl/URLHandler.java"/>
+ <include name="com/ibm/icu/impl/USerializedSet.java"/>
+ <include name="com/ibm/icu/impl/Utility.java"/>
+ <include name="com/ibm/icu/text/BreakIterator.java"/>
+ <include name="com/ibm/icu/text/BreakDictionary.java"/>
+ <include name="com/ibm/icu/text/Normalizer.java"/>
+ <include name="com/ibm/icu/text/Replaceable.java"/>
+ <include name="com/ibm/icu/text/ReplaceableString.java"/>
+ <include name="com/ibm/icu/text/SymbolTable.java"/>
+ <include name="com/ibm/icu/text/UCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UForwardCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UnicodeFilter.java"/>
+ <include name="com/ibm/icu/text/UnicodeMatcher.java"/>
+ <include name="com/ibm/icu/text/UnicodeSet.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/text/UTF16.java"/>
+ <include name="com/ibm/icu/util/CompactByteArray.java"/>
+ <include name="com/ibm/icu/util/RangeValueIterator.java"/>
+ <include name="com/ibm/icu/util/StringTokenizer.java"/>
+ <include name="com/ibm/icu/util/ULocale.java"/>
+ <include name="com/ibm/icu/util/UResourceBundle.java"/>
+ <include name="com/ibm/icu/util/UResourceTypeMismatchException.java"/>
+ <include name="com/ibm/icu/util/ValueIterator.java"/>
+ <include name="com/ibm/icu/util/VersionInfo.java"/>
+ </javac>
+ </target>
+
+ <target name="propertiesBasicTests" depends="propertiesBasic" description="test propertiesBasic modular build target">
+ <copy file="${src.dir}/com/ibm/icu/dev/data/unicode/SpecialCasing.txt"
+ todir="${build.dir}/com/ibm/icu/dev/data/unicode/"/>
+
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/dev/test/lang/TestUScriptRun.java"/>
+
+ <include name="com/ibm/icu/dev/test/lang/**/*.java"/>
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+ </javac>
+ </target>
+
+ <target name="propertiesFullTests" depends="propertiesFull" description="test propertiesFull modular build target">
+ <copy file="${src.dir}/com/ibm/icu/dev/data/unicode/SpecialCasing.txt"
+ todir="${build.dir}/com/ibm/icu/dev/data/unicode/"/>
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/dev/test/lang/TestUScriptRun.java"/>
+
+ <include name="com/ibm/icu/dev/test/lang/**/*.java"/>
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+ <include name="com/ibm/icu/impl/data/*Break*.java"/>
+ <include name="com/ibm/icu/text/*BreakDictionary*.java"/>
+ <include name="com/ibm/icu/text/*BreakIterator*.java"/>
+ </javac>
+ </target>
+
+ <!--
+ Targets to build only Compression API API and tests
+ -->
+ <target name="compression" depends="init" description="modular build of compression services">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/text/SCSU.java"/>
+ <include name="com/ibm/icu/text/UnicodeCompressor.java"/>
+ <include name="com/ibm/icu/text/UnicodeDecompressor.java"/>
+
+ </javac>
+ </target>
+
+ <target name="compressionTests" depends="compression" description="test compression modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+ <include name="com/ibm/icu/dev/test/compression/**/*.java"/>
+ </javac>
+ </target>
+
+ <!--
+ Targets to build only Calendar API and Calendar tests
+ -->
+ <target name="calendar" depends="init,copyCalendarData" description="modular build of calendar services">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/impl/UtilityExtensions.java"/>
+ <exclude name="com/ibm/icu/lang/UScriptRun.java"/>
+
+ <include name="com/ibm/icu/impl/data/*Calendar*.java"/>
+ <include name="com/ibm/icu/impl/data/*Holiday*.java"/>
+ <include name="com/ibm/icu/impl/Assert.java"/>
+ <include name="com/ibm/icu/impl/BOCU.java"/>
+ <include name="com/ibm/icu/impl/CalendarAstronomer.java"/>
+ <include name="com/ibm/icu/impl/CalendarCache.java"/>
+ <include name="com/ibm/icu/impl/CalendarData.java"/>
+ <include name="com/ibm/icu/impl/CharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/CharTrie.java"/>
+ <include name="com/ibm/icu/impl/ICUBinary.java"/>
+ <include name="com/ibm/icu/impl/ICUBinaryStream.java"/>
+ <include name="com/ibm/icu/impl/ICUData.java"/>
+ <include name="com/ibm/icu/impl/ICUDebug.java"/>
+ <include name="com/ibm/icu/impl/ICUListResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICULocaleData.java"/>
+ <include name="com/ibm/icu/impl/ICULocaleService.java"/>
+ <include name="com/ibm/icu/impl/ICUNotifier.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleImpl.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleIterator.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleReader.java"/>
+ <include name="com/ibm/icu/impl/ICURWLock.java"/>
+ <include name="com/ibm/icu/impl/ICUService.java"/>
+ <include name="com/ibm/icu/impl/ImplicitCEGenerator.java"/>
+ <include name="com/ibm/icu/impl/IntTrie.java"/>
+ <include name="com/ibm/icu/impl/IntTrieBuilder.java"/>
+ <include name="com/ibm/icu/impl/JDKTimeZone.java"/>
+ <include name="com/ibm/icu/impl/LocaleUtility.java"/>
+ <include name="com/ibm/icu/impl/NormalizerDataReader.java"/>
+ <include name="com/ibm/icu/impl/NormalizerImpl.java"/>
+ <include name="com/ibm/icu/impl/OlsonTimeZone.java"/>
+ <include name="com/ibm/icu/impl/ReplaceableUCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/ResourceBundleReader.java"/>
+ <include name="com/ibm/icu/impl/ResourceBundleWrapper.java"/>
+ <include name="com/ibm/icu/impl/RuleCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/SortedSetRelation.java"/>
+ <include name="com/ibm/icu/impl/StringUCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/TimeZoneAdapter.java"/>
+ <include name="com/ibm/icu/impl/Trie.java"/>
+ <include name="com/ibm/icu/impl/TrieBuilder.java"/>
+ <include name="com/ibm/icu/impl/TrieIterator.java"/>
+ <include name="com/ibm/icu/impl/UBiDiProps.java"/>
+ <include name="com/ibm/icu/impl/UCaseProps.java"/>
+ <include name="com/ibm/icu/impl/UCharArrayIterator.java"/>
+ <include name="com/ibm/icu/impl/UCharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/UCharacterName.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameChoice.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterProperty.java"/>
+ <include name="com/ibm/icu/impl/UCharacterPropertyReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterUtility.java"/>
+ <include name="com/ibm/icu/impl/URLHandler.java"/>
+ <include name="com/ibm/icu/impl/USerializedSet.java"/>
+ <include name="com/ibm/icu/impl/Utility.java"/>
+ <include name="com/ibm/icu/impl/UPropertyAliases.java"/>
+ <include name="com/ibm/icu/impl/ZoneInfoExt.java"/>
+ <include name="com/ibm/icu/impl/ZoneMeta.java"/>
+ <include name="com/ibm/icu/lang/*"/>
+ <include name="com/ibm/icu/math/BigDecimal.java"/>
+ <include name="com/ibm/icu/math/MathContext.java"/>
+ <include name="com/ibm/icu/text/BreakIterator.java"/>
+ <include name="com/ibm/icu/text/CanonicalIterator.java"/>
+ <include name="com/ibm/icu/text/ChineseDateFormat.java"/>
+ <include name="com/ibm/icu/text/ChineseDateFormatSymbols.java"/>
+ <include name="com/ibm/icu/text/CollationElementIterator.java"/>
+ <include name="com/ibm/icu/text/CollationKey.java"/>
+ <include name="com/ibm/icu/text/CollationRuleParser.java"/>
+ <include name="com/ibm/icu/text/CollationParsedRuleBuilder.java"/>
+ <include name="com/ibm/icu/text/Collator.java"/>
+ <include name="com/ibm/icu/text/CollatorReader.java"/>
+ <include name="com/ibm/icu/text/DateFormat.java"/>
+ <include name="com/ibm/icu/text/DateFormatSymbols.java"/>
+ <include name="com/ibm/icu/text/DecimalFormat.java"/>
+ <include name="com/ibm/icu/text/DecimalFormatSymbols.java"/>
+ <include name="com/ibm/icu/text/DigitList.java"/>
+ <include name="com/ibm/icu/text/MessageFormat.java"/>
+ <include name="com/ibm/icu/text/NFRule.java"/>
+ <include name="com/ibm/icu/text/NFRuleSet.java"/>
+ <include name="com/ibm/icu/text/NFSubstitution.java"/>
+ <include name="com/ibm/icu/text/Normalizer.java"/>
+ <include name="com/ibm/icu/text/NumberFormat.java"/>
+ <include name="com/ibm/icu/text/NumberFormat*Shim.java"/>
+ <include name="com/ibm/icu/text/RawCollationKey.java"/>
+ <include name="com/ibm/icu/text/RBNFPostProcessor.java"/>
+ <include name="com/ibm/icu/text/Replaceable.java"/>
+ <include name="com/ibm/icu/text/ReplaceableString.java"/>
+ <include name="com/ibm/icu/text/RuleBasedCollator.java"/>
+ <include name="com/ibm/icu/text/RuleBasedNumberFormat.java"/>
+ <include name="com/ibm/icu/text/SimpleDateFormat.java"/>
+ <include name="com/ibm/icu/text/SymbolTable.java"/>
+ <include name="com/ibm/icu/text/UCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UFormat.java"/>
+ <include name="com/ibm/icu/text/UForwardCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UnicodeFilter.java"/>
+ <include name="com/ibm/icu/text/UnicodeMatcher.java"/>
+ <include name="com/ibm/icu/text/UnicodeSet.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/text/UTF16.java"/>
+ <include name="com/ibm/icu/util/ByteArrayWrapper.java"/>
+ <include name="com/ibm/icu/util/Currency.java"/>
+ <include name="com/ibm/icu/util/CurrencyAmount.java"/>
+ <include name="com/ibm/icu/util/*Calendar*.java"/>
+ <include name="com/ibm/icu/util/*Holiday*.java"/>
+ <include name="com/ibm/icu/util/DateRule.java"/>
+ <include name="com/ibm/icu/util/Measure.java"/>
+ <include name="com/ibm/icu/util/MeasureUnit.java"/>
+ <include name="com/ibm/icu/util/RangeDateRule.java"/>
+ <include name="com/ibm/icu/util/RangeValueIterator.java"/>
+ <include name="com/ibm/icu/util/SimpleDateRule.java"/>
+ <include name="com/ibm/icu/util/SimpleTimeZone.java"/>
+ <include name="com/ibm/icu/util/StringTokenizer.java"/>
+ <include name="com/ibm/icu/util/STZInfo.java"/>
+ <include name="com/ibm/icu/util/TimeZone.java"/>
+ <include name="com/ibm/icu/util/ULocale.java"/>
+ <include name="com/ibm/icu/util/UResourceBundle.java"/>
+ <include name="com/ibm/icu/util/UResourceTypeMismatchException.java"/>
+ <include name="com/ibm/icu/util/ValueIterator.java"/>
+ <include name="com/ibm/icu/util/VersionInfo.java"/>
+
+ </javac>
+ </target>
+
+ <target name="calendarTests" depends="calendar" description="test calendar modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+
+ <include name="com/ibm/icu/dev/test/calendar/**/*"/>
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+ </javac>
+ </target>
+
+ <!--
+ Targets to build only Transliterator API and tests
+ -->
+
+ <target name="transliterator" depends="init,propertiesClasses,copyTranslitData" description="modular build of unicode transform services">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/impl/BOCU.java"/>
+ <exclude name="com/ibm/icu/impl/IntTrieBuilder.java"/>
+ <exclude name="com/ibm/icu/impl/TrieBuilder.java"/>
+ <exclude name="com/ibm/icu/lang/UScriptRun.java"/>
+ <exclude name="com/ibm/icu/text/UnicodeFilterLogic*.java"/>
+
+ <include name="com/ibm/icu/impl/data/ResourceReader.java"/>
+ <include name="com/ibm/icu/impl/*Iterator*.java"/>
+ <include name="com/ibm/icu/impl/*Property*.java"/>
+ <include name="com/ibm/icu/impl/ICULocaleService.java"/>
+ <include name="com/ibm/icu/impl/ICUNotifier.java"/>
+ <include name="com/ibm/icu/impl/ICURWLock.java"/>
+ <include name="com/ibm/icu/impl/ICUService.java"/>
+ <include name="com/ibm/icu/impl/UtilityExtensions.java"/>
+ <include name="com/ibm/icu/text/BreakIteratorFactory.java"/>
+ <include name="com/ibm/icu/text/*Transliterator*.java"/>
+ <include name="com/ibm/icu/text/DictionaryBasedBreakIterator.java"/>
+ <include name="com/ibm/icu/text/FunctionReplacer.java"/>
+ <include name="com/ibm/icu/text/Quantifier.java"/>
+ <include name="com/ibm/icu/text/RBBIDataWrapper.java"/>
+ <include name="com/ibm/icu/text/ReplaceableContextIterator.java"/>
+ <include name="com/ibm/icu/text/RuleBasedBreakIterator.java"/>
+ <include name="com/ibm/icu/text/RuleBasedBreakIterator_New.java"/>
+ <include name="com/ibm/icu/text/RuleBasedBreakIterator_Old.java"/>
+ <include name="com/ibm/icu/text/StringMatcher.java"/>
+ <include name="com/ibm/icu/text/StringReplacer.java"/>
+ <include name="com/ibm/icu/text/TransliterationRule.java"/>
+ <include name="com/ibm/icu/text/TransliterationRuleSet.java"/>
+ <include name="com/ibm/icu/text/UnicodeReplacer.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/util/CaseInsensitiveString.java"/>
+ </javac>
+ </target>
+
+ <target name="transliteratorTests" depends="transliterator" description = "test transliterator modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/dev/test/translit/UnicodeFilterLogic*.java"/>
+
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/translit/**/*"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+ </javac>
+ </target>
+
+ <!--
+ Targets to build only Format API
+ -->
+
+ <target name="format" depends="init,copyFormatData" description="modular build of formatting services">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/impl/UtilityExtensions.java"/>
+ <exclude name="com/ibm/icu/lang/UScriptRun.java"/>
+
+ <include name="com/ibm/icu/impl/data/CalendarData*.java"/>
+ <include name="com/ibm/icu/impl/Assert.java"/>
+ <include name="com/ibm/icu/impl/BOCU.java"/>
+ <include name="com/ibm/icu/impl/CalendarAstronomer.java"/>
+ <include name="com/ibm/icu/impl/CalendarCache.java"/>
+ <include name="com/ibm/icu/impl/CalendarData.java"/>
+ <include name="com/ibm/icu/impl/CharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/CharTrie.java"/>
+ <include name="com/ibm/icu/impl/ICUBinary.java"/>
+ <include name="com/ibm/icu/impl/ICUBinaryStream.java"/>
+ <include name="com/ibm/icu/impl/ICUData.java"/>
+ <include name="com/ibm/icu/impl/ICUDebug.java"/>
+ <include name="com/ibm/icu/impl/ICUListResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICULocaleData.java"/>
+ <include name="com/ibm/icu/impl/ICULocaleService.java"/>
+ <include name="com/ibm/icu/impl/ICUNotifier.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleImpl.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleIterator.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleReader.java"/>
+ <include name="com/ibm/icu/impl/ICURWLock.java"/>
+ <include name="com/ibm/icu/impl/ICUService.java"/>
+ <include name="com/ibm/icu/impl/ImplicitCEGenerator.java"/>
+ <include name="com/ibm/icu/impl/IntTrie.java"/>
+ <include name="com/ibm/icu/impl/IntTrieBuilder.java"/>
+ <include name="com/ibm/icu/impl/JDKTimeZone.java"/>
+ <include name="com/ibm/icu/impl/LocaleUtility.java"/>
+ <include name="com/ibm/icu/impl/NormalizerDataReader.java"/>
+ <include name="com/ibm/icu/impl/NormalizerImpl.java"/>
+ <include name="com/ibm/icu/impl/OlsonTimeZone.java"/>
+ <include name="com/ibm/icu/impl/ReplaceableUCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/ResourceBundleWrapper.java"/>
+ <include name="com/ibm/icu/impl/RuleCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/SortedSetRelation.java"/>
+ <include name="com/ibm/icu/impl/StringUCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/TimeZoneAdapter.java"/>
+ <include name="com/ibm/icu/impl/Trie.java"/>
+ <include name="com/ibm/icu/impl/TrieBuilder.java"/>
+ <include name="com/ibm/icu/impl/TrieIterator.java"/>
+ <include name="com/ibm/icu/impl/UBiDiProps.java"/>
+ <include name="com/ibm/icu/impl/UCaseProps.java"/>
+ <include name="com/ibm/icu/impl/UCharArrayIterator.java"/>
+ <include name="com/ibm/icu/impl/UCharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/UCharacterName.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameChoice.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterProperty.java"/>
+ <include name="com/ibm/icu/impl/UCharacterPropertyReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterUtility.java"/>
+ <include name="com/ibm/icu/impl/UPropertyAliases.java"/>
+ <include name="com/ibm/icu/impl/URLHandler.java"/>
+ <include name="com/ibm/icu/impl/UResourceTypeMismatchException.java"/>
+ <include name="com/ibm/icu/impl/USerializedSet.java"/>
+ <include name="com/ibm/icu/impl/Utility.java"/>
+ <include name="com/ibm/icu/impl/ZoneInfoExt.java"/>
+ <include name="com/ibm/icu/impl/ZoneMeta.java"/>
+ <include name="com/ibm/icu/lang/*"/>
+ <include name="com/ibm/icu/math/BigDecimal.java"/>
+ <include name="com/ibm/icu/math/MathContext.java"/>
+ <include name="com/ibm/icu/text/BreakIterator.java"/>
+ <include name="com/ibm/icu/text/*Format*.java"/>
+ <include name="com/ibm/icu/text/*Collator*.java"/>
+ <include name="com/ibm/icu/text/CanonicalIterator.java"/>
+ <include name="com/ibm/icu/text/ChineseDateFormat.java"/>
+ <include name="com/ibm/icu/text/CollationElementIterator.java"/>
+ <include name="com/ibm/icu/text/CollationKey.java"/>
+ <include name="com/ibm/icu/text/CollationParsedRuleBuilder.java"/>
+ <include name="com/ibm/icu/text/CollationRuleParser.java"/>
+ <include name="com/ibm/icu/text/CollatorReader.java"/>
+ <include name="com/ibm/icu/text/NFRule.java"/>
+ <include name="com/ibm/icu/text/NFRuleSet.java"/>
+ <include name="com/ibm/icu/text/NFSubstitution.java"/>
+ <include name="com/ibm/icu/text/Normalizer.java"/>
+ <include name="com/ibm/icu/text/DigitList.java"/>
+ <include name="com/ibm/icu/text/RawCollationKey.java"/>
+ <include name="com/ibm/icu/text/RBNFPostProcessor.java"/>
+ <include name="com/ibm/icu/text/Replaceable.java"/>
+ <include name="com/ibm/icu/text/ReplaceableString.java"/>
+ <include name="com/ibm/icu/text/SymbolTable.java"/>
+ <include name="com/ibm/icu/text/UForwardCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UnicodeFilter.java"/>
+ <include name="com/ibm/icu/text/UnicodeMatcher.java"/>
+ <include name="com/ibm/icu/text/UnicodeSet.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/text/UTF16.java"/>
+ <include name="com/ibm/icu/util/ByteArrayWrapper.java"/>
+ <include name="com/ibm/icu/util/BuddhistCalendar.java"/>
+ <include name="com/ibm/icu/util/Calendar.java"/>
+ <include name="com/ibm/icu/util/CalendarData.java"/>
+ <include name="com/ibm/icu/util/CalendarFactory.java"/>
+ <include name="com/ibm/icu/util/CECalendar.java"/>
+ <include name="com/ibm/icu/util/ChineseCalendar.java"/>
+ <include name="com/ibm/icu/util/CopticCalendar.java"/>
+ <include name="com/ibm/icu/util/Currency.java"/>
+ <include name="com/ibm/icu/util/CurrencyAmount.java"/>
+ <include name="com/ibm/icu/util/EthiopicCalendar.java"/>
+ <include name="com/ibm/icu/util/GregorianCalendar.java"/>
+ <include name="com/ibm/icu/util/HebrewCalendar.java"/>
+ <include name="com/ibm/icu/util/IslamicCalendar.java"/>
+ <include name="com/ibm/icu/util/JapaneseCalendar.java"/>
+ <include name="com/ibm/icu/util/Measure.java"/>
+ <include name="com/ibm/icu/util/MeasureUnit.java"/>
+ <include name="com/ibm/icu/util/RangeValueIterator.java"/>
+ <include name="com/ibm/icu/util/SimpleTimeZone.java"/>
+ <include name="com/ibm/icu/util/STZInfo.java"/>
+ <include name="com/ibm/icu/util/StringTokenizer.java"/>
+ <include name="com/ibm/icu/util/TimeZone.java"/>
+ <include name="com/ibm/icu/util/ULocale.java"/>
+ <include name="com/ibm/icu/util/UResourceBundle.java"/>
+ <include name="com/ibm/icu/util/UResourceTypeMismatchException.java"/>
+ <include name="com/ibm/icu/util/ValueIterator.java"/>
+ <include name="com/ibm/icu/util/VersionInfo.java"/>
+ </javac>
+ </target>
+
+ <target name="formatTests" depends="format" description="test format modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+
+ <include name="com/ibm/icu/dev/test/format/**/*"/>
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+ </javac>
+ <copy file="${src.dir}/com/ibm/icu/dev/test/format/NumberFormatTestCases.txt"
+ todir="${build.dir}/com/ibm/icu/dev/test/format"/>
+ </target>
+
+ <!--
+ Targets to build only BreakIterator API
+ -->
+
+ <target name="breakIterator" depends="init,copyFullPropsData" description="modular build of break iterator services">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/impl/BOCU.java"/>
+ <exclude name="com/ibm/icu/impl/UtilityExtensions.java"/>
+ <exclude name="com/ibm/icu/lang/UScriptRun.java"/>
+
+ <include name="com/ibm/icu/impl/data/*Break*.java"/>
+ <include name="com/ibm/icu/impl/Assert.java"/>
+ <include name="com/ibm/icu/impl/CharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/CharTrie.java"/>
+ <include name="com/ibm/icu/impl/ICUBinary.java"/>
+ <include name="com/ibm/icu/impl/ICUBinaryStream.java"/>
+ <include name="com/ibm/icu/impl/ICUDebug.java"/>
+ <include name="com/ibm/icu/impl/ICUData.java"/>
+ <include name="com/ibm/icu/impl/ICUListResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICUListResourceBundleImpl.java"/>
+ <include name="com/ibm/icu/impl/ICULocaleData.java"/>
+ <include name="com/ibm/icu/impl/ICULocaleService.java"/>
+ <include name="com/ibm/icu/impl/ICUNotifier.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundle.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleImpl.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleIterator.java"/>
+ <include name="com/ibm/icu/impl/ICUResourceBundleReader.java"/>
+ <include name="com/ibm/icu/impl/ICURWLock.java"/>
+ <include name="com/ibm/icu/impl/ICUService.java"/>
+ <include name="com/ibm/icu/impl/IntTrie.java"/>
+ <include name="com/ibm/icu/impl/IntTrieBuilder.java"/>
+ <include name="com/ibm/icu/impl/LocaleUtility.java"/>
+ <include name="com/ibm/icu/impl/NormalizerDataReader.java"/>
+ <include name="com/ibm/icu/impl/NormalizerImpl.java"/>
+ <include name="com/ibm/icu/impl/ReplaceableUCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/ResourceBundleWrapper.java"/>
+ <include name="com/ibm/icu/impl/RuleCharacterIterator.java"/>
+ <include name="com/ibm/icu/impl/SortedSetRelation.java"/>
+ <include name="com/ibm/icu/impl/Trie.java"/>
+ <include name="com/ibm/icu/impl/TrieIterator.java"/>
+ <include name="com/ibm/icu/impl/TrieBuilder.java"/>
+ <include name="com/ibm/icu/impl/UBiDiProps.java"/>
+ <include name="com/ibm/icu/impl/UCaseProps.java"/>
+ <include name="com/ibm/icu/impl/UCharacterIteratorWrapper.java"/>
+ <include name="com/ibm/icu/impl/UCharacterName.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameChoice.java"/>
+ <include name="com/ibm/icu/impl/UCharacterNameReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterProperty.java"/>
+ <include name="com/ibm/icu/impl/UCharacterPropertyReader.java"/>
+ <include name="com/ibm/icu/impl/UCharacterUtility.java"/>
+ <include name="com/ibm/icu/impl/UCharArrayIterator.java"/>
+ <include name="com/ibm/icu/impl/UPropertyAliases.java"/>
+ <include name="com/ibm/icu/impl/URLHandler.java"/>
+ <include name="com/ibm/icu/impl/USerializedSet.java"/>
+ <include name="com/ibm/icu/impl/Utility.java"/>
+ <include name="com/ibm/icu/lang/**/*"/>
+ <include name="com/ibm/icu/text/*BreakIterator*.java"/>
+ <include name="com/ibm/icu/text/BreakDictionary.java"/>
+ <include name="com/ibm/icu/text/Normalizer.java"/>
+ <include name="com/ibm/icu/text/RBBIDataWrapper.java"/>
+ <include name="com/ibm/icu/text/Replaceable.java"/>
+ <include name="com/ibm/icu/text/ReplaceableString.java"/>
+ <include name="com/ibm/icu/text/SymbolTable.java"/>
+ <include name="com/ibm/icu/text/UCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UForwardCharacterIterator.java"/>
+ <include name="com/ibm/icu/text/UnicodeFilter.java"/>
+ <include name="com/ibm/icu/text/UnicodeMatcher.java"/>
+ <include name="com/ibm/icu/text/UnicodeSet.java"/>
+ <include name="com/ibm/icu/text/UnicodeSetIterator.java"/>
+ <include name="com/ibm/icu/text/UTF16.java"/>
+ <include name="com/ibm/icu/util/CompactByteArray.java"/>
+ <include name="com/ibm/icu/util/RangeValueIterator.java"/>
+ <include name="com/ibm/icu/util/StringTokenizer.java"/>
+ <include name="com/ibm/icu/util/UResourceBundle.java"/>
+ <include name="com/ibm/icu/util/UResourceTypeMismatchException.java"/>
+ <include name="com/ibm/icu/util/ULocale.java"/>
+ <include name="com/ibm/icu/util/ValueIterator.java"/>
+ <include name="com/ibm/icu/util/VersionInfo.java"/>
+ </javac>
+
+ <copy file="${src.dir}/com/ibm/icu/impl/data/th.brk"
+ todir="${build.dir}/com/ibm/icu/impl/data/"/>
+ </target>
+
+ <target name="breakIteratorTests" depends="breakIterator" description="test breakIterator modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+
+ <include name="com/ibm/icu/dev/test/rbbi/**/*"/>
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+ </javac>
+ <copy file="${src.dir}/com/ibm/icu/dev/test/rbbi/rbbitst.txt"
+ todir="${build.dir}/com/ibm/icu/dev/test/rbbi"/>
+ </target>
+
+ <!--
+ Targets to build only StringPrep and IDNA API
+ -->
+ <target name="copyStringPrepData" depends="init">
+ <unjar src="${src.dir}/com/ibm/icu/impl/data/icudata.jar" dest="${build.dir}">
+ <patternset>
+ <!-- arrgh, includes attributes of patternset does not work here in ant 1.5.3 -->
+ <include name="**/*.spp"/>
+ <include name="**/ubidi.icu"/>
+ <include name="**/ucase.icu"/>
+ <include name="**/unorm.icu"/>
+ <include name="**/uprops.icu"/>
+ <include name="**/pnames.icu"/>
+ <include name="**/unames.icu"/>
+ </patternset>
+ </unjar>
+ </target>
+
+ <target name="stringPrep" depends="init,normalizer,copyStringPrepData" description="modular build of stringprep services">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <exclude name="com/ibm/icu/impl/BOCU.java"/>
+ <exclude name="com/ibm/icu/impl/Calendar*.java"/>
+ <exclude name="com/ibm/icu/impl/IntTrieBuilder.java"/>
+ <exclude name="com/ibm/icu/impl/TrieBuilder.java"/>
+ <exclude name="com/ibm/icu/impl/UtilityExtensions.java"/>
+ <exclude name="com/ibm/icu/lang/UScriptRun.java"/>
+
+ <include name="com/ibm/icu/impl/*StringPrep*.java"/>
+ <include name="com/ibm/icu/text/*StringPrep*.java"/>
+ <include name="com/ibm/icu/text/*IDNA*.java"/>
+ <include name="com/ibm/icu/text/Punycode.java"/>
+
+ </javac>
+ </target>
+
+ <target name="stringPrepTests" depends="stringPrep" description="test stringPrep modular build target">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="com/ibm/icu/dev/test/stringprep/**/*"/>
+ <include name="com/ibm/icu/dev/test/TestAll.java"/>
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/TestLog.java"/>
+ <include name="com/ibm/icu/dev/test/TestUtil.java"/>
+ <include name="com/ibm/icu/dev/test/UTF16Util.java"/>
+ </javac>
+ <unjar src="${src.dir}/com/ibm/icu/dev/data/testdata.jar" dest="${build.dir}">
+ <patternset>
+ <include name="**/*.spp"/>
+ <include name="**/idna_rules.res"/>
+ </patternset>
+ </unjar>
+ </target>
+
+ <target name="excludeCharset" depends="core" description="excludes charset module from ICU4J">
+ <delete failonerror="yes">
+ <fileset dir="${build.dir}/com/ibm/icu/impl/data/icudt${icu4j.version.number}b/">
+ <include name="*.cnv"/>
+ <include name="cnvalias.icu"/>
+ </fileset>
+ <fileset dir="${build.dir}/com/ibm/icu/charset"/>
+ <fileset dir="${build.dir}/META-INF/"/>
+ </delete>
+ </target>
+ <target name="excludeCharsetTests" depends="tests" description="excludes charset test from ICU4J">
+ <delete failonerror="yes">
+ <fileset dir="${build.dir}/com/ibm/icu/dev/test/charset"/>
+ </delete>
+ </target>
+
+ <target name="moduleCheck" depends="init" description="run tests for module jar">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <arg value="-nothrow"/>
+ <arg value="-nodata"/>
+ <classpath>
+ <pathelement location="${build.dir}"/>
+ </classpath>
+ </java>
+ </target>
+
+ <!-- build the extra Transliterator data pertaining to Han-Latin transliteration -->
+ <target name="extraTransliteratorData" depends="init" description="build han-to-latin transliterator data (normally omitted)">
+ <copy todir="${build.dir}/com/ibm/icu/impl/data">
+ <fileset dir="${src.dir}/com/ibm/icu/impl/data" includes="Transliterator_Han_Latin_*.txt"/>
+ </copy>
+ </target>
+
+ <!-- Target for builing XLIFF2ICUConverter Jar -->
+
+ <target name="xliff" depends="init" description="build xliff converter tool">
+ <javac sourcepath=""
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <include name="com/ibm/icu/dev/tool/localeconverter/CalculateCRC32.java"/>
+ <include name="com/ibm/icu/dev/tool/localeconverter/XLIFF2ICUConverter.java"/>
+ <include name="com/ibm/icu/dev/tool/UOption.java"/>
+ <include name="com/ibm/icu/dev/tool/xmlcomparator/XMLValidator.java"/>
+ </javac>
+ <jar jarfile="xliff.jar"
+ compress="true"
+ includes="com/ibm/icu/dev/tool/localeconverter/XLIFF2ICUConverter*.class,com/ibm/icu/dev/tool/localeconverter/CalculateCRC32.class,com/ibm/icu/dev/tool/UOption.class"
+ basedir="${build.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="XLIFF To ICU Converter"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value="XLIFF2ICUConverter"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+
+ <jar jarfile="xliff-src.jar"
+ compress="true"
+ includes="com/ibm/icu/dev/tool/localeconverter/XLIFF2ICUConverter.java,com/ibm/icu/dev/tool/localeconverter/CalculateCRC32.java,com/ibm/icu/dev/tool/UOption.java"
+ basedir="${src.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="XLIFF To ICU Converter Sources"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value="XLIFF2ICUConverter Sources"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <!-- Target for builing Utilities Jar for CLDR -->
+
+ <target name="cldrUtil" depends="init" description="build Utilities for CLDR">
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ classpathref="build.classpath"
+ debug="on" deprecation="off">
+
+ <include name="com/ibm/icu/dev/test/TestFmwk.java"/>
+ <include name="com/ibm/icu/dev/test/util/*.java"/>
+ <include name="com/ibm/icu/dev/tool/UOption.java"/>
+ </javac>
+ <jar jarfile="utilities.jar"
+ compress="true"
+ includes="com/ibm/icu/dev/test/util/*.class,com/ibm/icu/dev/test/TestFmwk*.class,com/ibm/icu/dev/test/AbstractTest*.class,com/ibm/icu/dev/test/TestLog*.class,com/ibm/icu/dev/tool/UOption*.class"
+ basedir="${build.dir}">
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="Utilities for CLDR Tools"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value="CLDR Utilities"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <!--Target for building jars -->
+ <target name="distJars" depends="jarSrc, jar, jarDocs" />
+
+ <!-- Target for checking the loading of bundles from the default package.
+ This is hack to get around Eclipse's build problems.
+ -->
+ <target name="defaultPackage" depends="tests" description="Tests for loading resources in the default package">
+ <copy file="${src.dir}/com/ibm/icu/dev/test/util/TestDefaultPackageLoading.jpp"
+ tofile="${src.dir}/TestDefaultPackageLoading.java"/>
+ <copy file="${src.dir}/com/ibm/icu/dev/test/util/TestData_en.jpp"
+ tofile="${src.dir}/TestData_en.java"/>
+ <javac srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off">
+
+ <exclude name="**/CVS/**/*"/>
+ <include name="TestData_en.java"/>
+ <include name="TestDefaultPackageLoading.java"/>
+ </javac>
+ <!-- copy the file to default package and test if we can load it -->
+ <copy file="${build.dir}/com/ibm/icu/dev/data/testdata/te.res"
+ todir="${build.dir}/"/>
+ <!-- copy the file up one level and test if java style base name loading works -->
+ <copy file="${build.dir}/com/ibm/icu/dev/data/testdata/te.res"
+ tofile="${build.dir}/com/ibm/icu/dev/data/TestData_bge.res"/>
+ </target>
+
+ <target name="defaultPackageCheck" depends="tests, defaultPackage" description="Run Tests for loading resources in the default package">
+ <!--run the tests -->
+ <java classname="TestDefaultPackageLoading" fork="yes" failonerror="true">
+ <jvmarg value="-ea"/> <!-- enable assertion checks -->
+ <arg value="-n"/>
+ <classpath>
+ <pathelement path="${java.class.path}/"/>
+ <pathelement location="clover.jar"/>
+ <pathelement path="${build.dir}"/>
+ </classpath>
+ </java>
+ </target>
+
+ <!-- convert @deprecated @draft tags to @provisional -->
+ <target name="swatDeprecated" depends="init, tools" if="version.1.4">
+ <java classname="com.ibm.icu.dev.tool.docs.SwatDeprecated" classpath="${build.dir}" failonerror="true">
+ <arg value="-src"/>
+ <arg value="${src.dir}"/>
+ <arg value="-dst"/>
+ <arg value="${src.dir}"/>
+ <arg value="-overwrite"/>
+ <arg value="-verbose"/>
+ </java>
+ </target>
+
+ <!-- convert @provisional tags to @deprecated -->
+ <target name="restoreDeprecated" depends="init, tools" if="version.1.4">
+ <java classname="com.ibm.icu.dev.tool.docs.SwatDeprecated" classpath="${build.dir}" failonerror="true">
+ <arg value="-prov"/>
+ <arg value="-src"/>
+ <arg value="${src.dir}"/>
+ <arg value="-dst"/>
+ <arg value="${src.dir}"/>
+ <arg value="-overwrite"/>
+ <arg value="-verbose"/>
+ </java>
+ </target>
+
+ <!-- for building eclipse distribution -->
+ <target name="eclipseProjects"
+ depends="eclipseCoreProject,eclipseTestProject,eclipseWrapperProject"
+ description="create all eclipse icu projects"/>
+
+ <target name="undoEclipseMangle" depends="buildmangle" description="preprocess files back to default)">
+ <java classname="com.ibm.icu.dev.tool.docs.CodeMangler" classpath="${build.dir}" logError="true">
+ <arg value="@eclipseCoreArgs.txt"/>
+ <arg value="@eclipseTestArgs.txt"/>
+ </java>
+ </target>
+
+ <target name="eclipseCoreMangle" depends="buildmangle">
+ <java classname="com.ibm.icu.dev.tool.docs.CodeMangler" classpath="${build.dir}" logError="true">
+ <arg value="-dFOUNDATION"/>
+ <arg value="-t"/>
+ <arg value="@eclipseCoreArgs.txt"/>
+ </java>
+ </target>
+
+ <target name="eclipseCore" depends="init,coreData,icudata,eclipseCoreMangle">
+ <javac includes="com/ibm/icu/util/**/*.java,com/ibm/icu/text/**/*.java,com/ibm/icu/math/**/*.java,com/ibm/icu/impl/**/*.java,com/ibm/icu/lang/*.java"
+ compiler="javac1.3"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ bootclasspath="ee.foundation.jar"
+ source="1.3"
+ target="1.1"
+ debug="on"
+ deprecation="off"
+ encoding="ascii"/>
+ <antcall target="indices"/>
+ <jar jarfile="${jar.file}" compress="true">
+ <fileset dir="${basedir}" includes="license.html, unicode-license.txt" />
+ <fileset dir="${build.dir}"
+ includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*"
+ excludes="META-INF/services/**/*,com/ibm/icu/charset/**/*,${icu4j.data.path}/*.cnv, ${icu4j.data.path}/cnvalias.icu"/>
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J Eclipse Build"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value=" ICU for Java"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="eclipseCoreProject" depends="eclipseCore" description="gather eclipse icu core project files">
+ <delete failonerror="no">
+ <fileset dir="${eclipse.projects.dir}/com.ibm.icu"/>
+ <fileset dir="${eclipse.projects.dir}/com.ibm.icu-feature"/>
+ </delete>
+ <!-- collect source files -->
+ <copy toDir="${eclipse.projects.dir}/com.ibm.icu/src/com/ibm/icu">
+ <fileset dir="src/com/ibm/icu"
+ includes="impl/**/*,lang/**/*,math/**/*,text/**/*,util/**/*"
+ excludes="**/CVS/**/*,**/*.jar,**/Transliterator_Han_Latin*.txt"/>
+ </copy>
+ <!-- collect icu data files -->
+ <copy todir="${eclipse.projects.dir}/com.ibm.icu/src/com/ibm/icu/impl/data">
+ <fileset dir="${build.dir}/com/ibm/icu/impl/data"
+ includes="**/*.brk,**/*.icu,**/*.res,**/*.spp,**/*.txt"
+ excludes="**/Transliterator_Han_Latin*.txt"/>
+ </copy>
+ <!-- collect eclipse project files -->
+ <copy todir="${eclipse.projects.dir}">
+ <fileset dir="${eclipse.dir}"
+ includes="com.ibm.icu/**/*,com.ibm.icu-feature/**/*"
+ excludes="**/CVS/**/*"/>
+ </copy>
+ <!-- copy ICU license file -->
+ <copy file="license.html"
+ todir="${eclipse.projects.dir}/com.ibm.icu/about_files"/>
+ <!-- ucd terms -->
+ <copy file="src/com/ibm/icu/dev/data/unicode/ucdterms.txt"
+ todir="${eclipse.projects.dir}/com.ibm.icu/about_files"/>
+ <!-- copy ICU about file -->
+ <copy file="${eclipse.dir}/misc/about_icu.html"
+ tofile="${eclipse.projects.dir}/com.ibm.icu/about.html"/>
+ <!-- common eclipse about file -->
+ <copy file="${eclipse.dir}/misc/about.html"
+ todir="${eclipse.projects.dir}/com.ibm.icu-feature/sourceTemplatePlugin"/>
+ </target>
+
+
+ <!-- ensure eclipse wrapper code builds with foundation1.0/java1.3 -->
+ <target name="eclipseWrapper" depends="init">
+ <javac includes="com/ibm/icu/util/**/*.java,com/ibm/icu/text/**/*.java"
+ compiler="javac1.3"
+ srcdir="${src.dir}/com/ibm/icu/dev/eclipse/com.ibm.icu.base/src"
+ destdir="${wrapper.build.dir}"
+ classpathref="build.classpath"
+ bootclasspath="ee.foundation.jar"
+ source="1.3"
+ target="1.1"
+ debug="on"
+ deprecation="off"
+ encoding="ascii"/>
+ <jar jarfile="${jdk.wrapper.jar.file}"
+ compress="true"
+ basedir="${wrapper.build.dir}"/>
+ </target>
+
+ <target name="eclipseWrapperTest" depends="init">
+ <!-- TODO: build the wrapper test code this ant target -->
+ </target>
+
+ <target name="eclipseWrapperProject" depends="eclipseWrapper,eclipseWrapperTest" description="gather eclipse icu core project files">
+ <delete failonerror="no">
+ <fileset dir="${eclipse.projects.dir}/com.ibm.icu.base"/>
+ <fileset dir="${eclipse.projects.dir}/com.ibm.icu.base-feature"/>
+ <fileset dir="${eclipse.projects.dir}/com.ibm.icu.base.tests"/>
+ </delete>
+ <!-- collect eclipse project files -->
+ <copy todir="${eclipse.projects.dir}">
+ <fileset dir="${eclipse.dir}"
+ includes="com.ibm.icu.base/**/*,com.ibm.icu.base-feature/**/*,com.ibm.icu.base.tests/**/*"
+ excludes="**/CVS/**/*"/>
+ </copy>
+ <!-- copy ICU license file -->
+ <copy file="license.html"
+ todir="${eclipse.projects.dir}/com.ibm.icu.base/about_files"/>
+ <!-- copy ICU about file -->
+ <copy file="${eclipse.dir}/misc/about_icu.html"
+ tofile="${eclipse.projects.dir}/com.ibm.icu.base/about.html"/>
+ <!-- common eclipse about file -->
+ <copy file="${eclipse.dir}/misc/about.html"
+ todir="${eclipse.projects.dir}/com.ibm.icu.base-feature/sourceTemplatePlugin"/>
+ </target>
+
+ <target name="eclipseTestMangle" depends="buildmangle">
+ <java classname="com.ibm.icu.dev.tool.docs.CodeMangler" classpath="${build.dir}" logError="true">
+ <arg value="-dFOUNDATION"/>
+ <arg value="-t"/>
+ <arg value="@eclipseTestArgs.txt"/>
+ </java>
+ </target>
+
+ <target name="eclipseTests" depends="eclipseCore,testdata,eclipseTestMangle" description="build tests trimmed for eclipse API">
+ <!-- TODO: we should actually use ee.foundation.jar for eclipse env -->
+ <javac includes="com/ibm/icu/dev/test/**/*.java"
+ excludes="**/CVS/**/*,com/ibm/icu/dev/test/charset/**/*.*,com/ibm/icu/dev/test/perf/**/*.*"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="1.3"
+ target="1.3"
+ classpathref="build.classpath"
+ debug="on"
+ deprecation="off"/>
+ <antcall target="indices"/>
+ <jar jarfile="${testjar.file}" compress="true" >
+ <fileset dir="${build.dir}" includes="com/ibm/icu/dev/test/**/*"/>
+ <fileset dir="${build.dir}" includes="com/ibm/icu/dev/data/**/*"/>
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="Tests for ICU4J "/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value="Tests for ICU4J"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="eclipseTestProject" depends="eclipseTests,zipTestSrc" description="gather eclipse icu test project files">
+ <delete failonerror="no">
+ <fileset dir="${eclipse.projects.dir}/com.ibm.icu.tests"/>
+ </delete>
+ <!-- copy eclipse test source and project files -->
+ <copy toDir="${eclipse.projects.dir}/com.ibm.icu.tests">
+ <fileset dir="${eclipse.dir}/com.ibm.icu.tests"
+ includes="**/*"
+ excludes="**/CVS/**/"/>
+ </copy>
+ <!-- copy icu test jar -->
+ <copy toDir="${eclipse.projects.dir}/com.ibm.icu.tests" file="${testjar.file}"/>
+ <!-- copy icu test source zip -->
+ <copy toDir="${eclipse.projects.dir}/com.ibm.icu.tests" file="${zipTestSrc.file}"/>
+ </target>
+
+ <target name="zipCoreSrc" depends="init" description="build zip of core sources for debugging">
+ <!-- for eclipse -->
+ <zip destfile="${zipCoreSrc.file}"
+ compress="true"
+ basedir="${src.dir}"
+ includes="../license.html,../Copyright_Assignment.rtf,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*,com/ibm/icu/math/**/*,com/ibm/icu/text/**/*,com/ibm/icu/util/**/*"/>
+ </target>
+
+ <target name="zipTestSrc" depends="init" description="build zip of test sources for debugging">
+ <!-- for eclipse -->
+ <zip destfile="${zipTestSrc.file}"
+ compress="true"
+ basedir="${src.dir}"
+ includes="com/ibm/icu/dev/test/**/*"
+ excludes="com/ibm/icu/dev/test/cldr/**/*, com/ibm/icu/dev/test/perf/**/*"/>
+ </target>
+
+ <target name="gatherICU4JWrapperAPI" depends="tools" if="version.1.4">
+ <javadoc classpath="${build.dir}"
+ sourcepath="${src.dir}"
+ packagenames="com.ibm.icu.text,com.ibm.icu.util"
+ source="1.4">
+ <doclet name="com.ibm.icu.dev.tool.docs.GatherAPIData"
+ path="${build.dir}">
+ <param name="-name" value="ICU4J 3.6"/>
+ <param name="-base" value="com.ibm.icu"/>
+ <param name="-output" value="${api.dir}/icu4j36w_i.api"/>
+ <param name="-filter" value="BreakIterator|CollationKey|Collator|DateFormat|DecimalFormat|MessageFormat|NumberFormat|SimpleDateFormat|StringTokenizer|Calendar|TimeZone|ULocale|DateFormatSymbols|DecimalFormatSymbols|Calendar|TimeZone"/>
+ <!-- param name="-gzip"/ -->
+ </doclet>
+ </javadoc>
+ </target>
+
+ <target name="gatherEclipseWrapperAPI" depends="tools" if="version.1.4">
+ <javadoc classpath="${build.dir}"
+ sourcepath="${src.dir}/com/ibm/icu/dev/eclipse/com.ibm.icu.base/src"
+ packagenames="com.ibm.icu.text,com.ibm.icu.util"
+ source="1.4">
+ <doclet name="com.ibm.icu.dev.tool.docs.GatherAPIData"
+ path="${build.dir}">
+ <param name="-name" value="ICU4J 3.6 Wrapper"/>
+ <param name="-base" value="com.ibm.icu"/>
+ <param name="-output" value="${api.dir}/icu4j36w_e.api"/>
+ <!-- param name="-gzip"/ -->
+ </doclet>
+ </javadoc>
+ </target>
+
+ <target name="reportWrapperAPI" depends="tools" if="version.1.4">
+ <java classname="com.ibm.icu.dev.tool.docs.ReportAPI" classpath="${build.dir}" failonerror="true">
+ <arg value="-old:"/>
+ <arg value="${api.dir}/icu4j${icu4j.previous.version.number}w_i.api"/>
+ <arg value="-new:"/>
+ <arg value="${api.dir}/icu4j${icu4j.version.number}w_e.api"/>
+ <arg value="-html"/>
+ <arg value="-out:"/>
+ <arg value="${api.dir}/icu4j_compare_${icu4j.previous.version.number}_wrapper_java.html"/>
+ </java>
+ </target>
+
+ <target name="runCheck" depends="init,tests">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <arg value="-n"/>
+ <classpath>
+ <pathelement path="${java.class.path}/"/>
+ <pathelement location="${jar.file}"/>
+ <pathelement location="${charsets.jar.file}"/>
+ <pathelement location="clover.jar"/>
+ <pathelement path="${build.dir}"/>
+ </classpath>
+ </java>
+ </target>
+
+ <target name="eclipseRunCheck" depends="init,eclipseTests">
+ <java classname="com.ibm.icu.dev.test.TestAll" fork="yes" failonerror="true">
+ <arg value="-n"/>
+ <classpath>
+ <pathelement path="${java.class.path}/"/>
+ <pathelement location="icu4j.jar"/>
+ <pathelement location="clover.jar"/>
+ <pathelement path="${build.dir}"/>
+ </classpath>
+ </java>
+ </target>
+
+ <target name="eclipseCompat" depends="init, eclipseCore"/>
+ <target name="eclipseCompatTests" depends="init, eclipseTestMangle, eclipseTests" />
+ <target name="eclipseCompatCheck" depends="init, eclipseCompat, eclipseCompatTests, deleteCore, eclipseRunCheck"/>
+
+
+ <target name="core13" depends="init,coreData,icudata" description="build core classes and data">
+ <javac includes="com/ibm/icu/util/**/*.java,com/ibm/icu/text/**/*.java,com/ibm/icu/math/**/*.java,com/ibm/icu/impl/**/*.java,com/ibm/icu/lang/*.java,com/ibm/icu/charset/**/*.java"
+ excludes="**/CVS/**/*,com/ibm/icu/charset/**/*.*"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ classpathref="build.classpath"
+ source="${icu4j.javac.source}"
+ target="${icu4j.javac.target}"
+ debug="on" deprecation="off"
+ encoding="ascii"/>
+ </target>
+
+ <target name="jar13" depends="core13,indices" description="build full 'icu4j.jar' jar file for jdk1.3">
+ <jar jarfile="${jar.file}" compress="true">
+ <fileset dir="${basedir}" includes="license.html, unicode-license.txt" />
+ <fileset dir="${build.dir}"
+ includes="com/ibm/icu/util/**/*,com/ibm/icu/text/**/*,com/ibm/icu/math/**/*,com/ibm/icu/impl/**/*,com/ibm/icu/lang/**/*"
+ excludes="META-INF/services/**/*,com/ibm/icu/charset/**/*,${icu4j.data.path}/*.cnv, ${icu4j.data.path}/cnvalias.icu"/>
+
+ <manifest>
+ <attribute name="Built-By" value="${corp}"/>
+ <section name="common">
+ <attribute name="Specification-Title" value="ICU4J JDK 1.3 Compatible Build"/>
+ <attribute name="Specification-Version" value="${icu4j.version.string}"/>
+ <attribute name="Specification-Vendor" value="ICU"/>
+ <attribute name="Implementation-Title" value=" ICU for Java Charsets"/>
+ <attribute name="Implementation-Version" value="${icu4j.version.string}"/>
+ <attribute name="Implementation-Vendor" value="${corp}"/>
+ <attribute name="Implementation-Vendor-Id" value="com.ibm"/>
+ <attribute name="Copyright-Info" value="${copyright}"/>
+ <attribute name="Sealed" value="false"/>
+ </section>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="jdk13Compat" depends="init, eclipseCoreMangle, core13" description="builds ICU4J core with JDK 1.3"/>
+ <target name="jdk13CompatTests" depends="init, jdk13Compat, eclipseTestMangle, eclipseTests" description="builds ICU4J tests with JDK 1.3"/>
+ <target name="jdk13CompatCheck" depends="init, jdk13CompatTests, jar13, deleteCore, eclipseRunCheck" description="runs the tests with JDK 1.3"/>
+</project>
diff --git a/eclipseCoreArgs.txt b/eclipseCoreArgs.txt
new file mode 100644
index 0000000..c37d8c2
--- /dev/null
+++ b/eclipseCoreArgs.txt
@@ -0,0 +1,22 @@
+# Copyright (C) 2005-2006, International Business Machines Corporation and
+# others. All Rights Reserved.
+src/com/ibm/icu/impl/CollectionUtilities.java
+src/com/ibm/icu/impl/ICUResourceBundle.java
+src/com/ibm/icu/impl/ICUResourceBundleImpl.java
+src/com/ibm/icu/impl/ICUResourceBundleReader.java
+src/com/ibm/icu/impl/LRUMap.java
+src/com/ibm/icu/impl/PatternTokenizer.java
+src/com/ibm/icu/impl/Utility.java
+src/com/ibm/icu/lang/UCharacter.java
+src/com/ibm/icu/math/BigDecimal.java
+src/com/ibm/icu/text/DateTimePatternGenerator.java
+src/com/ibm/icu/text/DecimalFormat.java
+src/com/ibm/icu/text/DigitList.java
+src/com/ibm/icu/text/NumberFormat.java
+src/com/ibm/icu/text/RuleBasedCollator.java
+src/com/ibm/icu/text/RuleBasedBreakIterator.java
+src/com/ibm/icu/text/RuleBasedNumberFormat.java
+src/com/ibm/icu/text/UnicodeSet.java
+src/com/ibm/icu/text/UTF16.java
+src/com/ibm/icu/util/ByteArrayWrapper.java
+src/com/ibm/icu/util/GlobalizationPreferences.java
diff --git a/eclipseTestArgs.txt b/eclipseTestArgs.txt
new file mode 100644
index 0000000..3d22331
--- /dev/null
+++ b/eclipseTestArgs.txt
@@ -0,0 +1,40 @@
+# Copyright (C) 2005-2006, International Business Machines Corporation and
+# others. All Rights Reserved.
+src/com/ibm/icu/dev/test/bigdec/DiagBigDecimal.java
+src/com/ibm/icu/dev/test/cldr/TestCLDRVsICU.java
+src/com/ibm/icu/dev/test/charsetdet/TestCharsetDetector.java
+src/com/ibm/icu/dev/test/collator/RandomCollator.java
+src/com/ibm/icu/dev/test/collator/TestAll.java
+src/com/ibm/icu/dev/test/format/BigNumberFormatTest.java
+src/com/ibm/icu/dev/test/format/DateTimeGeneratorTest.java
+src/com/ibm/icu/dev/test/format/GlobalizationPreferencesTest.java
+src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatAPIC.java
+src/com/ibm/icu/dev/test/format/NumberFormatRegressionTest.java
+src/com/ibm/icu/dev/test/format/NumberFormatTest.java
+src/com/ibm/icu/dev/test/format/NumberRegression.java
+src/com/ibm/icu/dev/test/format/RbnfTest.java
+src/com/ibm/icu/dev/test/timezone/TimeZoneAliasTest.java
+src/com/ibm/icu/dev/test/timezone/TimeZoneRegression.java
+src/com/ibm/icu/dev/test/util/ICUResourceBundleTest.java
+src/com/ibm/icu/dev/test/util/UtilityTest.java
+src/com/ibm/icu/dev/test/util/UnicodeMap.java
+src/com/ibm/icu/dev/test/util/UnicodeProperty.java
+src/com/ibm/icu/dev/test/util/BagFormatter.java
+src/com/ibm/icu/dev/test/util/ICUPropertyFactory.java
+src/com/ibm/icu/dev/test/util/TestBagFormatter.java
+src/com/ibm/icu/dev/test/util/CollectionUtilities.java
+src/com/ibm/icu/dev/test/util/FileUtilities.java
+src/com/ibm/icu/dev/test/util/TransliteratorUtilities.java
+src/com/ibm/icu/dev/test/util/DataInputCompressor.java
+src/com/ibm/icu/dev/test/util/DataOutputCompressor.java
+src/com/ibm/icu/dev/test/util/TestUtilities.java
+src/com/ibm/icu/dev/test/util/Tokenizer.java
+src/com/ibm/icu/dev/test/util/BNF.java
+src/com/ibm/icu/dev/test/util/TestBNF.java
+src/com/ibm/icu/dev/test/serializable/FormatTests.java
+src/com/ibm/icu/dev/test/serializable/SerializableTest.java
+src/com/ibm/icu/dev/test/ResourceModule.java
+src/com/ibm/icu/dev/test/TestAll.java
+src/com/ibm/icu/dev/test/TestDataModule.java
+src/com/ibm/icu/dev/test/TestFmwk.java
+src/com/ibm/icu/dev/test/TestUtil.java
\ No newline at end of file
diff --git a/ee.foundation.jar b/ee.foundation.jar
new file mode 100644
index 0000000..60168e8
--- /dev/null
+++ b/ee.foundation.jar
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f730fd4a8ca71fcddc61a4f62490cea7805efffc7eb2b8669942f6d123f7249
+size 1059077
diff --git a/license.html b/license.html
new file mode 100644
index 0000000..b905ddf
--- /dev/null
+++ b/license.html
@@ -0,0 +1,51 @@
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=us-ascii"></meta>
+<title>ICU License - ICU 1.8.1 and later</title>
+</head>
+
+<body BGCOLOR="#ffffff">
+<h2>ICU License - ICU 1.8.1 and later</h2>
+
+<p>COPYRIGHT AND PERMISSION NOTICE</p>
+
+<p>
+Copyright (c) 1995-2006 International Business Machines Corporation and others
+</p>
+<p>
+All rights reserved.
+</p>
+<p>
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies
+of the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+</p>
+<p>
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
+THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
+OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
+USE OR PERFORMANCE OF THIS SOFTWARE.
+</p>
+<p>
+Except as contained in this notice, the name of a copyright holder shall not be
+used in advertising or otherwise to promote the sale, use or other dealings in
+this Software without prior written authorization of the copyright holder.
+</p>
+
+<hr>
+<p><small>
+All trademarks and registered trademarks mentioned herein are the property of their respective owners.
+</small></p>
+</body>
+</html>
diff --git a/readme.html b/readme.html
new file mode 100644
index 0000000..695f572
--- /dev/null
+++ b/readme.html
@@ -0,0 +1,1776 @@
+<!DOCTYPE html PUBLIC "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+ <meta http-equiv="Content-Type"
+ content="text/html; charset=iso-8859-1">
+ <meta http-equiv="Content-Style-Type" content="text/css2">
+ <title>ReadMe for ICU4J</title>
+<!--
+*******************************************************************************
+* Copyright (C) 2000-2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+-->
+ <style type="text/css">
+h3.doc { background: #CCCCFF }
+ </style>
+</head>
+<body style="background-color: rgb(255, 255, 255);" lang="EN-US"
+ link="#0000ff" vlink="#800080">
+<h2>International Components for Unicode for Java (ICU4J)</h2>
+<h3>Read Me for ICU4J 3.6</h3>
+<hr size="2" width="100%">
+<p><b>Release Date</b><br>
+October 1, 2006<br>
+</p>
+<p><b>Note:</b> This is major release of ICU4J. It contains bug fixes
+and adds implementations of inherited API and introduces new API
+or functionality.
+</p>
+<p>For the most recent release, see the <a
+ href="http://icu.sourceforge.net/download/"> ICU4J
+download site</a>. </p>
+<h3 class="doc">Contents</h3>
+<ul type="disc">
+ <li><a href="#introduction">Introduction to ICU4J</a></li>
+ <li><a href="#news">What Is New In This Release?</a></li>
+ <li><a href="#license">License Information</a></li>
+ <li><a href="#PlatformDependencies">Platform Dependencies</a></li>
+ <li><a href="#download">How to Download ICU4J</a></li>
+ <li><a href="#WhatContain">The Structure and Contents of ICU4J</a></li>
+ <li><a href="#API">Where to Get Documentation</a></li>
+ <li><a href="#HowToInstallJavac">How to Install and Build</a></li>
+ <li><a href="#HowToModularize">How to modularize ICU4J</a></li>
+ <li><a href="#tryingout">Trying Out ICU4J</a></li>
+ <li><a href="#resources">ICU4J Resource Information</a></li>
+ <li><a href="#WhereToFindMore">Where to Find More Information</a></li>
+ <li><a href="#SubmittingComments">Submitting Comments, Requesting
+Features and Reporting Bugs</a></li>
+</ul>
+<h3 class="doc"><a name="introduction"></a>Introduction to ICU4J</h3>
+<p>The International Components for Unicode (ICU) library provides
+robust and
+full-featured Unicode services on a wide variety of platforms. ICU
+supports the
+most current version of the Unicode standard, including support for
+supplementary characters (needed for GB 18030 repertoire support).</p>
+<p>Java provides a strong foundation for global programs, and IBM and
+the
+ICU team played a key role in providing globalization technology to
+Java. But because of its long release schedule, Java cannot always keep
+up with evolving standards. The ICU team continues to extend Java's
+Unicode and internationalization support, focusing on improving
+performance,
+keeping current with the Unicode standard, and providing richer APIs,
+while
+remaining as compatible as possible with the original Java text and
+internationalization API design.</p>
+<p>ICU4J is an add-on to the regular JVM that provides:
+</p>
+<ul>
+ <li><a
+ href="http://icu.sourceforge.net/userguide/Collate_Intro.html"><b>Collation</b></a>
+– rule-based, up-to-date Unicode Collation Algorithm (UCA) sorting order<br>
+ For fast multilingual string comparison; faster
+and more complete than
+the JDK</li>
+ <li><a href="http://icu.sourceforge.net/userguide/strings.html"><b>Supplementary
+Characters</b></a> – String manipulation and character properties<br>
+ Required for proper GB 18030 and JIS 213
+repertoire support</li>
+ <li><a href="http://icu.sourceforge.net/userguide/charsetDetection.html"><b>Charset
+Detection</b></a> – Recognition of various single and multibyte charsets<br>
+ Useful for recognizing untagged text data</li>
+ <li><a
+ href="http://icu.sourceforge.net/userguide/unicodeSet.html"><b>UnicodeSet</b></a>
+– standard set operations optimized for sets of Unicode characters<br>
+ UnicodeSets can be built from string patterns
+using any Unicode properties.</li>
+ <li><a href="http://icu.sourceforge.net/userguide/Transform.html"><b>Transforms</b></a>
+– a flexible mechanism for Unicode text conversions<br>
+ Including Full/Halfwidth conversions,
+Normalization, Case conversions, Hex
+conversions, and transliterations between scripts (50+ pairs)</li>
+ <li><a
+ href="http://icu.sourceforge.net/userguide/normalization.html"><b>Unicode
+Normalization</b></a> – NFC, NFD, NFKD, NFKC<br>
+ For canonical text representations, needed for
+XML and the net</li>
+ <li><a
+ href="http://icu.sourceforge.net/userguide/dateCalendar.html"><b>International
+Calendars</b></a> – Arabic, Buddhist, Hebrew, Japanese, Ethiopic, Islamic, Coptic and Chinese calendars<br>
+ Required for correct presentation of dates in
+certain countries</li>
+ <li><a
+ href="http://icu.sourceforge.net/userguide/formatNumbers.html"><b>Number
+Format
+Enhancements</b></a> – Scientific Notation, Spelled-out, etc.<br>
+ Enhancements to the normal Java number
+formatting. The spell-out format is
+used for checks and similar documents</li>
+ <li><a
+ href="http://icu.sourceforge.net/userguide/boundaryAnalysis.html"><b>Enhanced
+Word-Break Detection</b></a> – Rule-based, supports Thai<br>
+ Required for correct support of Thai</li>
+ <li><a
+ href="http://icu.sourceforge.net/userguide/compression.html"><b>Unicode
+Text
+Compression</b></a> – Standard compression of Unicode text<br>
+ Suitable for large numbers of small fields,
+where LZW and similar schemes
+do not apply</li>
+ <li><a
+ href="http://icu.sourceforge.net/userguide/conversion.html"><b>Charset Conversion</b></a> – Conversion to and from different charsets.<br>
+ Plugs into JDK's CharsetProvider Service Provider Interface (SPI)</li>
+
+</ul>
+<blockquote>
+ <p><b>Note:</b> We continue to provide assistance to Sun, and in some
+cases, ICU4J support has been rolled into a later release of Java. For
+example, the Thai word-break is now in Java 1.4. However, the most
+current and complete version is always found in ICU4J.</p>
+</blockquote>
+<!--
+
+<p><b>ICU4J is an add-on library that extends Java's globalization
+
+ technology.</b> Java provides a strong foundation for global
+
+ programs, but Java does not yet provide all the globalization
+
+ features that some products require. IBM played a key role in
+
+ providing globalization technology to Sun for use in Java. Over
+
+ the past seven years, the ICU team has continued to enhance and
+
+ extend this technology. IBM makes this technology available
+
+ in Java through the ICU4J open-source project.</p>
+
+
+
+<p>ICU4J provides the following tools:
+
+ <ul>
+
+ <li><b>Unicode Normalization</b> – NFC, NFD, NFKD, NFKC<br>
+
+ Produces canonical text representations, needed for XML and the net.
+
+ <li><b>International Calendars</b> – Arabic, Buddhist, Hebrew, and Japanese<br>
+
+ Required for correct presentation of dates in some countries.
+
+ <li><b>Number Format Enhancements</b> – Scientific Notation, Spelled-out Numbers<br>
+
+ Enhances standard Java number formatting. The spelled-out format is used
+
+for checks and similar documents.
+
+ <li><b>Enhanced word-break detection</b> – Rule-based, supports Thai<br>
+
+ Required for correct support of Thai.
+
+ <li><b>Unicode Text Compression</b> – Standard compression of Unicode text<br>
+
+ Suitable for large numbers of small fields, where LZW and similar schemes
+
+do not apply.
+
+ <li><b>Collation</b> – Rule-based, up-to-date Unicode Collation Algorithm (UCA) sorting order<br>
+
+ For fast multilingual string comparison
+
+ <li><b>Transforms</b> – Rule-based transformations of unicode data<br>
+
+ Useful for analysis and transliteration
+
+ </ul>
+
+
+
+In some cases, the above support has been rolled into a later release of
+
+Java. For example, the Thai word-break is now in Java 1.4. However, the most
+
+current and complete version of this support is always found in ICU4J.
+
+-->
+<h3 class="doc"><a name="news"></a>What Is New In This Release?</h3>
+<ul>
+ <li><strong>Unicode</strong>: ICU uses and supports Unicode 5.0, which is the
+ latest major release of Unicode. Unicode 5.0 will be used in many
+ operating systems and applications, and this version of ICU is important
+ maintain interoperability with these new operating systems and applications.
+ More information about Unicode 5.0 can be found in the <a href="http://www.unicode.org/press/pr-ucd5.0.html">Unicode press
+ release</a>.</li>
+ <li><strong>Locale Data</strong>: ICU uses and supports data from <a href="http://www.unicode.org/press/pr-cldr1.4.html">Common Locale Data
+ Repository (CLDR) 1.4</a>, which includes many improvements in quality
+ and quantity of data. There is 25% more CLDR locale data in 245 locales in ICU.</li>
+ <li><strong>Globalization Preferences</strong>: A new flexible container for locale data was added.</li>
+ <li><strong>Formatting</strong>: A <strong>preview</strong> of the flexible date/time format generator has been added. This allows multiple date and time format patterns to be generated that are valid for specific locales.</li>
+ <li><strong>Charset Conversion</strong>: A <strong>preview</strong> of the ICU4J implementation of the java.nio.charset.Charset API was added.</li>
+</ul>
+<p><i><font color="red"><strong>Note</strong>: Do not use preview APIs in production code. They may change drastically in subsequent releases.</font></i></p>
+
+<h3 class="doc"><a name="license"></a>License Information</h3>
+<p>
+The ICU projects (ICU4C and ICU4J) use the X license. The X
+license is <b>suitable for commercial use</b> and is a recommended free software license
+that is compatible with the GNU GPL license. This became
+effective with release 1.8.1 of ICU4C and release 1.3.1 of ICU4J in
+mid-2001. All new ICU releases will adopt the X license; previous ICU
+releases continue to utilize the IPL (IBM Public License). Users
+of previous releases of ICU who want to adopt new ICU releases will
+need to accept the terms and conditions of the X license.
+</p>
+<p>
+The main effect of the change is to provide GPL compatibility.
+The X license is listed as GPL compatible, see the GNU page at
+<a href="http://www.gnu.org/philosophy/license-list.html#GPLCompatibleLicenses">
+http://www.gnu.org/philosophy/license-list.html#GPLCompatibleLicenses</a>.
+This means that GPL projects can now use ICU code, it does <b>not</b>
+mean that projects using ICU become subject to GPL.
+</p>
+<p>
+ The IBM version contains the essential text of the license, omitting the
+X-specific trademarks and copyright notices. The full copy of <a
+ href="license.html">ICU's license</a> is included in the download
+package.
+</p>
+<h3 class="doc"><a name="PlatformDependencies"></a>Platform Dependencies</h3>
+<p> By default ICU4J depends on functionality that is only available
+in JDK 1.4. We provide the ability to build a variant of ICU4J
+that will run on JDK 1.3, but not all build targets work on that
+platform. Currently 1.1.x and 1.2.x JVMs are unsupported and untested,
+and you use the components on these JVMs at your own risk.</p>
+<p> The reference platforms which we support and test ICU4J on are:</p>
+<ul>
+ <li> WinXP, IBM JDK 1.5.0</li>
+ <li> Solaris 5.9, Sun JDK 1.5.0</li>
+ <li> AIX 5.2, IBM JDK 1.5.0</li>
+</ul>
+<p>Please use the most recent updates of the supported JDK versions.</p>
+<p>Additionally, we have built and tested ICU4J on the following <b>unsupported</b>
+platforms (note, only some build/test targets work on 1.3 platforms):
+</p>
+<ul>
+<li> WinXP / IBM JDK 1.4.2, 1.4.1, Sun JDK 1.5.0, 1.4.2</li>
+<li> Solaris 5.6 , Solaris 5.7 / Sun JDK 1.4.2, 1.4.1, 1.4.0</li>
+<li> Solaris 5.9 / Sun JDK 1.4.2, 1.4.1, 1.4.0</li>
+<li> AIX 5.2 / IBM JDK 1.4.2, 1.4.1</li>
+<li> AIX 5.1 / IBM JDK 1.4.2, 1.4.1</li>
+<li> RedHat Enterprise Linux 3/ IBM JDK 1.5.0, 1.4.2, Sun JDK 1.5.0, 1.4.2, 1.4.1, 1.4.0</li>
+<li> HP UX Sun JDK 1.5.0, 1.4.2</li>
+</ul>
+<h3 class="doc"><a name="download"></a>How to Download ICU4J</h3>
+<p>There are two ways to download the ICU4J releases.
+</p>
+<ul type="disc">
+ <li><b>Official Release Snapshot:</b><br>
+If you want to use ICU4J (as opposed to developing it), your best bet
+is to download an official, packaged version of the ICU4J source
+code. These versions are tested more thoroughly than day-to-day
+development builds, and they are packaged in jar files for convenient
+download. These packaged files can be found at the <a
+ href="http://icu.sourceforge.net/download/">ICU Downloads page</a>.
+A packaged snapshot is named <b>icu4j-XXX-src.jar</b>, where XXX
+is the release version number. Please unjar this file. It
+will reconstruct the source directory.</li>
+</ul>
+<ul type="disc">
+ <li><b>CVS Source Repository:</b><br>
+If you are interested in developing features, patches, or bug fixes for
+ICU4J, you should probably be working with the latest version of the
+ICU4J source code. You will need to check the code out of our CVS
+repository to ensure that you have the most recent version of all of
+the files. There are several ways to do this. Please follow the
+directions that are contained on the <a
+ href="http://www.ibm.com/software/globalization/icu/repository.jsp">Source
+ Repository page</a> for details.
+ </li>
+</ul>
+<p>For more details on how to download ICU4J directly from the web
+site, please see the ICU downloads page at <a
+ href="http://icu.sourceforge.net/download/">http://icu.sourceforge.net/download/</a>
+</p>
+<h3 class="doc"><a name="WhatContain"></a>The Structure and Contents of
+ICU4J</h3>
+<p>Below, <b>$Root</b> is the placement of the icu directory in your
+file system, like
+"drive:\...\icu4j" in your environment. "drive:\..." stands for any
+drive and any directory on that drive that you chose to install icu4j
+into. </p>
+<p><b>Information and build files:</b></p>
+<table bgcolor="#ccccff" cellpadding="3" frame="void" width="623">
+ <tbody>
+ <tr>
+ <td align="right" bgcolor="#ffffff" valign="baseline"><b>readme.html</b><br>
+(this file)</td>
+ <td bgcolor="#ffffff" valign="baseline">A description of ICU4J
+(International Components for Unicode for Java)</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">license.html</th>
+ <td bgcolor="#ffffff" valign="baseline">The X license, used by
+ICU4J</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">build.xml</th>
+ <td bgcolor="#ffffff" valign="baseline">Ant build file. See <a
+ href="#HowToInstallJavac">How to Install and Build</a> for more
+information</td>
+ </tr>
+ </tbody>
+</table>
+<p><b>The source directories mirror the package structure of the code.</b><br>
+<font color="red">Core</font> packages become part of the ICU4J jar
+file.<br>
+<font color="red">API</font> packages contain classes with supported
+API. <br>
+<font color="red">RichText</font> classes are Core and API, but can be
+removed from icu4j.jar, and can be built into their own jar.</p>
+<table bgcolor="#ccccff" border="0" cellpadding="3" frame="void"
+ width="623">
+ <tbody>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/src/com/ibm/icu/charset<br>
+ <font color="red">Core, API (Preview)</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Packages that provide Charset conversion
+ </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/src/com/ibm/icu/dev<br>
+ <font color="red">Non-Core, Non-API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Packages used for
+internal development:
+ <ul>
+ <li>Data: data used by tests and in building ICU</li>
+ <li>Demos: Calendar, Holiday, Break Iterator, Rule-based Number
+Format, Transformations<br>
+(See <a href="#tryingout">below</a> for more information about the
+demos.)</li>
+ <li>Tests: API and coverage tests of all functionality.<br>
+For information about running the tests, see
+$Root/src/com/ibm/icu/dev/test/TestAll.java.</li>
+ <li>Tools: tools used to build data tables, etc.</li>
+ </ul>
+ </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/src/com/ibm/icu/impl<br>
+ <font color="red">Core, Non-API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">These are utility classes
+used from different ICU4J core packages.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/src/com/ibm/icu/lang<br>
+ <font color="red">Core, API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Character properties
+package.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/src/com/ibm/icu/math<br>
+ <font color="red">Core, API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Additional math classes.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/src/com/ibm/icu/text<br>
+ <font color="red">Core, API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Additional text classes.
+These add to, and in some cases replace, related core Java classes:
+ <ul>
+ <li>Arabic shaping </li>
+ <li>Break iteration </li>
+ <li>Date formatting </li>
+ <li>Number formatting </li>
+ <li>Transliteration </li>
+ <li>Normalization </li>
+ <li>String manipulation </li>
+ <li>Collation </li>
+ <li>String search </li>
+ <li>Unicode compression </li>
+ <li>Unicode sets </li>
+ </ul>
+ </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/src/com/ibm/icu/util<br>
+ <font color="red">Core, API</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Additional utility
+classes:
+ <ul>
+ <li>Calendars - Gregorian, Buddhist, Coptic, Ethiopic, Hebrew, Islamic, Japanese, Chinese </li>
+ <li>Holiday</li>
+ <li>TimeZone</li>
+ <li>VersionInfo</li>
+ <li>Iteration</li>
+ </ul>
+ </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/src/com/ibm/richtext<br>
+ <font color="red">RichText</font></th>
+ <td bgcolor="#ffffff" valign="baseline">Styled text editing
+package. This includes demos, tests, and GUIs for editing and
+displaying styled text. The richtext package provides a scrollable
+display, typing, arrow-key support, tabs, alignment and justification,
+word- and sentence-selection (by double-clicking and triple-clicking,
+respectively), text styles, clipboard operations (cut, copy and paste)
+and a log of changes for undo-redo. Richtext uses Java's TextLayout and
+complex text support (provided to Sun by the ICU4J team).</td>
+ </tr>
+ </tbody>
+</table>
+<p><b>Building ICU4J creates and populates the following directories:</b></p>
+<table bgcolor="#ccccff" cellpadding="3" frame="void" width="623">
+ <tbody>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/classes</th>
+ <td bgcolor="#ffffff" valign="baseline">contains all class files</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">$Root/doc</th>
+ <td bgcolor="#ffffff" valign="baseline">contains JavaDoc for all
+packages</td>
+ </tr>
+ </tbody>
+</table>
+<br>
+<p><b>ICU4J data is stored in the following locations:</b></p>
+<table bgcolor="#ccccff" border="0" cellpadding="3" frame="void"
+ width="623">
+ <tbody>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline"><code>com.ibm.icu.impl.data</code></th>
+ <td bgcolor="#ffffff" valign="baseline">Holds data used by the
+ICU4J core packages (<code>com.ibm.icu.lang</code>, <code>com.ibm.icu.text</code>,
+ <code>com.ibm.icu.util</code>, <code>com.ibm.icu.math</code> and
+ <code>com.ibm.icu.text</code>). In particular, all resource
+information is stored here.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline"><code>com.ibm.icu.dev.data</code></th>
+ <td bgcolor="#ffffff" valign="baseline">Holds data that is not
+part of ICU4J core, but rather part of a test, sample, or demo.</td>
+ </tr>
+ </tbody>
+</table>
+<br>
+<h3 class="doc"><a name="API"></a>Where to get Documentation</h3>
+<p>The <a href="http://icu.sourceforge.net/userguide/">ICU user's
+guide</a> contains lots of general information about ICU, in its C,
+C++, and Java incarnations.</p>
+<p>The complete API documentation for ICU4J (javadoc) is available on
+the ICU4J web site, and can be built from the sources:
+</p>
+<ul>
+ <li><a href="http://icu.sourceforge.net/apiref/icu4j/">Index
+to all ICU4J API</a></li>
+ <li><a href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/text/CharsetDetector.html">Charset Detector</a> – Detection of charset from a byte stream</li>
+ <li>International Calendars – <a
+ <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/util/BuddhistCalendar.html">Buddhist</a>,
+ <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/util/ChineseCalendar.html">Chinese</a>,
+ <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/util/CopticCalendar.html">Coptic</a>,
+ <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/util/EthiopicCalendar.html">Ethiopic</a>,
+ <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/util/GregorianCalendar.html">Gregorian</a>,
+ <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/util/HebrewCalendar.html">Hebrew</a>,
+ <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/util/IslamicCalendar.html">Islamic</a>,
+ <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/util/JapaneseCalendar.html">Japanese</a>.</li>
+ <li><a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/text/Normalizer.html">Unicode
+Normalization</a> – Canonical text representation for W3C.</li>
+ <li><a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/text/NumberFormat.html">Number
+Format Enhancements</a> – Scientific Notation, Spelled out.</li>
+ <li><a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/text/BreakIterator.html">Enhanced
+word-break detection</a> – Rule-based, supports Thai</li>
+ <li><a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/text/Transliterator.html">Transliteration</a>
+– A general framework for onverting text from one format to another,
+e.g. Cyrillic to Latin, or Hex to Unicode. </li>
+ <li>Unicode Text <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/text/UnicodeCompressor.html">Compression</a>
+& <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/text/UnicodeDecompressor.html">Decompression</a>
+– 2:1 compression on English Unicode text.</li>
+ <li>Collation - <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/text/RuleBasedCollator.html">Rule-based
+sorting</a>, <a
+ href="http://icu.sourceforge.net/apiref/icu4j/com/ibm/icu/text/StringSearch.html">Efficient
+multi-lingual searching</a> </li>
+</ul>
+<h3 class="doc"><a name="HowToInstallJavac"></a>How to Install and Build</h3>
+<p>To install ICU4J, simply place the prebuilt jar file <strong>icu4j.jar</strong>
+on your
+Java CLASSPATH. If you need Charset API support please place <strong>icu4j-charsets.jar</strong> on your class path.
+No other files are needed.</p>
+<p><b>Eclipse users:</b> See the ICU4J site for information on<a
+ href="http://icu.sourceforge.net/docs/eclipse_howto/eclipse_howto.html">
+how to configure Eclipse</a> to build ICU4J.</p>
+<p>To build ICU4J, you will need a Java2 JDK and the Ant build system.
+We strongly recommend using the Ant build system to build ICU4J.
+It's recommended to install both the JDK and Ant somewhere <em>outside</em>
+the ICU4J directory. For example, on Linux you might install these in
+/usr/local.</p>
+<ul>
+ <li>Install a recent JDK, version 1.4.x will work.</li>
+ <li>Install the <a href="http://ant.apache.org/"><strong>Ant</strong></a>
+build system. Ant is a portable, Java-based build system similar to
+make. ICU4J uses Ant because it introduces no other dependencies, it's
+portable, and it's easier to manage than a collection of makefiles. We
+currently build ICU4J using a single makefile on both Windows 9x and
+Linux using Ant. The build system requires Ant 1.6 or later.
+ <p>Installing Ant is straightforward. Download it (see <a
+ href="http://ant.apache.org/bindownload.cgi">http://ant.apache.org/bindownload.cgi</a>),
+extract it onto your system, set some environment variables, and add
+its bin directory to your path. For example: </p>
+ <pre> set JAVA_HOME=C:\jdk1.5.0<br> set ANT_HOME=C:\ant<br> set PATH=%PATH%;%ANT_HOME%\bin</pre>
+ <p>See the current Ant documentation for details.</p>
+ </li>
+</ul>
+<p>Once the JDK and Ant are installed, building is just a matter of
+typing <strong>ant</strong> in the ICU4J root directory. This causes
+the Ant build system to perform a build as specified by the file
+<strong>build.xml</strong>, located in the ICU4J root directory. You
+can give Ant options like -verbose, and you can specify targets. Ant
+will only build what's been changed and will resolve dependencies
+properly. For example:</p>
+<blockquote>
+ <pre>F:\icu4j>ant tests<br>Buildfile: build.xml<br>Project base dir set to: F:\icu4j<br>Executing Target: core<br>Compiling 71 source files to F:\icu4j\classes<br>Executing Target: tests<br>Compiling 24 source files to F:\icu4j\classes<br>Completed in 19 seconds</pre>
+</blockquote>
+<i>Note: The above output is an example. The numbers are likely to be different with the current version ICU4J.</i>
+<p>The following are some targets that you can provide to <b>ant</b>.
+For more targets run <code>ant -projecthelp</code> or see the build.xml file.</p>
+<table bgcolor="#ccccff" border="0" cellpadding="3" frame="void"
+ width="623">
+ <tbody>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">all</th>
+ <td bgcolor="#ffffff" valign="baseline">Build all targets.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">core</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the main class
+files in the subdirectory <strong>classes</strong>. If no target is
+specified, core is assumed.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">tests</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the test class
+files.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">demos</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the demos.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">tools</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the tools.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">docs</th>
+ <td bgcolor="#ffffff" valign="baseline">Run javadoc over the main
+class files, generating an HTML documentation tree in the subdirectory <strong>doc</strong>.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">jar</th>
+ <td bgcolor="#ffffff" valign="baseline">Create a jar archive <strong>icu4j.jar</strong>
+in the root ICU4J directory containing the main class files.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">jarSrc</th>
+ <td bgcolor="#ffffff" valign="baseline">Like the <strong>jar</strong>
+target, but containing only the source files. </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">jarDocs</th>
+ <td bgcolor="#ffffff" valign="baseline">Like the <strong>jar</strong>
+target, but containing only the docs. </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">richedit</th>
+ <td bgcolor="#ffffff" valign="baseline">Build the richedit core
+class files and tests. </td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">richeditJar</th>
+ <td bgcolor="#ffffff" valign="baseline">Create the richedit jar
+file (which contains only the richedit core class files). The file <strong>richedit.jar</strong>
+will be created in the <strong>./richedit</strong> subdirectory. Any
+existing file of that name will be overwritten.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">richeditZip</th>
+ <td bgcolor="#ffffff" valign="baseline">Create a zip archive of
+the richedit docs and jar file for distribution. The zip file <strong>richedit.zip</strong>
+will be created in the <strong>./richedit</strong> subdirectory. Any
+existing file of that name will be overwritten.</td>
+ </tr>
+ <tr>
+ <th align="right" bgcolor="#ffffff" valign="baseline">clean</th>
+ <td bgcolor="#ffffff" valign="baseline">Remove all built targets,
+leaving the source.</td>
+ </tr>
+ </tbody>
+</table>
+<p>For more information, read the Ant documentation and the <strong>build.xml</strong>
+file.</p>
+<p>After doing a build it is a good idea to run all the icu4j tests by
+typing<br>
+<tt>"ant check"</tt> or
+"java
+-classpath $Root/classes
+com.ibm.icu.dev.test.TestAll -nothrow".</p>
+<p>(If you are allergic to build systems, as an alternative to using
+Ant you can build by running javac and javadoc directly. This
+is not recommended. You may have to manually create destination
+directories.)</p>
+<h3 class="doc"><a name="HowToModularize"></a>How to modularize ICU4J</h3>
+<p>Some clients may not wish to ship all of ICU4J with their
+application, since the application might only use a small part of
+ICU4J.
+ICU4J release 2.6 and later provide build options to build individual
+ICU4J 'modules' for a more compact distribution.
+The modules are based on a service and the APIs that define it, e.g.,
+the normalizer module supports all the APIs of the Normalizer class
+(and some others). Tests can be run to verify that the APIs supported
+by the module function correctly.
+Because of internal code dependencies, a module contains extra classes
+that are not part of the module's core service API. Some or most of the
+APIs of these extra classes will not work. <b>Only the module's core
+service API is guaranteed.</b> Other APIs may work partially or not at
+all, so client code should avoid them.</p>
+<p>
+Individual modules are not built directly into their own separate jar
+files. Since their dependencies
+often overlap, using separate modules to 'add on' ICU4J functionality
+would result in
+unwanted duplication of class files. Instead, building a module causes
+a subset of ICU4J's
+classes to be built and put into ICU4J's standard build directory.
+After one or more module targets are built, the 'moduleJar' target can
+then be
+built, which packages the class files into a 'module jar.' Other than
+the fact that it
+contains fewer class files, little distinguishes this jar file from a
+full ICU4J jar file,
+and in fact they share the same name.</p>
+<p>
+Currently ICU4J can be divided into the following modules:
+</p>
+<p><b>Key:</b></p>
+<table bgcolor="#ccccff" cellpadding="3" frame="void" width="623">
+ <tbody>
+ <tr>
+
+ <th align="left" valign="baseline"><b>Module Name</b></th>
+ <th align="left" valign="baseline"><b>Ant Targets</b></th>
+ <th align="left" valign="baseline"><b>Test Package Supported</b></th>
+ <th align="right" valign="baseline"><b>Size‡</b></th>
+ </tr>
+ <tr bgcolor="#ffffff">
+
+ <td colspan="4">
+ <table>
+ <tbody>
+ <tr>
+ <td valign="baseline">Package*</td>
+ <td valign="baseline">Main Classes† </td>
+ </tr>
+ </tbody>
+
+ </table>
+ </td>
+ </tr>
+ </tbody>
+</table>
+<b><font size="2">* com.ibm. should be prepended to the package names
+listed.
+<br>
+† Class name in bold indicates core service API. Only APIs in these classes are
+fully supported.
+<br>
+‡ Sizes are of the compressed jar file containing only this module.
+These sizes are approximate for release 3.6.
+</font></b>
+<p><b>Modules:</b></p>
+
+<table bgcolor="#ccccff" cellpadding="3" frame="void" width="623">
+
+ <tbody>
+ <tr>
+ <th align="left" valign="baseline">Normalizer</th>
+ <td align="left" valign="baseline">normalizer, normalizerTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.normalizer</td>
+ <td align="right" valign="baseline">698 KB</td>
+
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakIterator,
+ CanonicalIterator,
+ CollationElementIterator,
+ CollationKey,
+ Collator,
+ <b>Normalizer</b>,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedCollatior,
+ RuleBasedTransliterator,
+ SymbolTable,
+ Transliterator,
+ <b>UCharacterIterator</b>,
+ <b>UForwardCharacterIterator</b>,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ ByteArrayWrapper,
+ CaseInsensitiveString,
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">Collator</th>
+ <td align="left" valign="baseline">collator, collatorTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.collator</td>
+ <td align="right" valign="baseline">2,031 KB</td>
+
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+
+ UCharacter,
+ UCharacterCategory,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakDictionary,
+ BreakIterator,
+ CanonicalIterator,
+ <b>CollationElementIterator</b>,
+ <b>CollationKey</b>,
+ <b>Collator</b>,
+ DictionaryBasedBreakIterator,
+ <b>Normalizer</b>,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedBreakIterator,
+ <b>RuleBasedCollator</b>,
+ SymbolTable,
+ Transliterator,
+ <b>UCharacterIterator</b>,
+ <b>UForwardCharacterIterator</b>,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ ByteArrayWrapper,
+ CaseInsensitiveString,
+ CompactByteArray,
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">Calendar</th>
+ <td align="left" valign="baseline">calendar, calendarTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.calendar</td>
+ <td align="right" valign="baseline">2,006 KB</td>
+
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+
+ UCharacter,
+ UCharacterCategory,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.math:</td>
+ <td valign="baseline">
+ <b>BigDecimal</b>,
+ MathContext
+ </td>
+ </tr>
+
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakIterator,
+ CanonicalIterator,
+ <b>ChineseDateFormat</b>,
+ <b>ChineseDateFormatSymbols</b>,
+ CollationElementIterator,
+ CollationKey,
+ Collator,
+ <b>DateFormat</b>,
+ <b>DateFormatSymbols</b>,
+ <b>DecimalFormat</b>,
+ <b>DecimalFormatSymbols</b>,
+ MessageFormat,
+ Normalizer,
+ NumberFormat,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedCollator,
+ RuleBasedNumberFormat,
+ RuleBasedTransliterator,
+ <b>SimpleDateFormat</b>,
+ SymbolTable,
+ Transliterator,
+ <b>UCharacterIterator</b>,
+ UFormat,
+ <b>UForwardCharacterIterator</b>,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ <b>BuddhistCalendar</b>,
+ ByteArrayWrapper,
+ <b>Calendar</b>,
+ CaseInsensitiveString,
+ <b>ChineseCalendar</b>,
+ <b>CopticCalendar</b>,
+ <b>Currency</b>,
+ CurrencyAmount,
+ <b>DateRule</b>,
+ <b>EasterHoliday</b>,
+ <b>EthiopicCalendar</b>,
+ Freezable,
+ <b>GregorianCalendar</b>,
+ <b>HebrewCalendar</b>,
+ <b>HebrewHoliday</b>,
+ <b>Holiday</b>,
+ <b>IslamicCalendar</b>,
+ <b>JapaneseCalendar</b>,
+ Measure,
+ MeasureUnit,
+ <b>RangeDateRule</b>,
+ RangeValueIterator,
+ <b>SimpleDateRule</b>,
+ <b>SimpleHoliday</b>,
+ <b>SimpleTimeZone</b>,
+ StringTokenizer,
+ <b>TimeZone</b>,
+ ULocale,
+ UResourceBundle,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">BreakIterator</th>
+ <td align="left" valign="baseline">breakIterator,
+breakIteratorTests</td>
+
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.breakiterator</td>
+ <td align="right" valign="baseline">1,893 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+
+ <td valign="baseline">
+ <b>BreakDictionary</b>,
+ <b>BreakIterator</b>,
+ CanonicalIterator,
+ CollationElementIterator,
+ CollationKey,
+ Collator,
+ <b>DictionaryBasedBreakIterator</b>,
+ Normalizer,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ <b>RuleBasedBreakIterator</b>,
+ RuleBasedCollator,
+ RuleBasedTransliterator,
+ SymbolTable,
+ Transliterator,
+ UCharacterIterator,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16</td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+
+ <td valign="baseline">
+ ByteArrayWrapper,
+ CaseInsensitiveString,
+ CompactByteArray,
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+
+ <th align="left" valign="baseline">Basic Properties</th>
+ <td align="left" valign="baseline">propertiesBasic,
+propertiesBasicTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.lang</td>
+ <td align="right" valign="baseline">802 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ <b>UCharacter</b>,
+ <b>UCharacterCategory</b>,
+ <b>UCharacterDirection</b>,
+ <b>UCharacterEnums</b>,
+ <b>UProperty</b>,
+ <b>UScript</b>,
+ <b>UScriptRun</b>
+
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakDictionary,
+ BreakIterator,
+ CanonicalIterator,
+ CollationElementIterator,
+ CollationKey,
+ Collator,
+ DictionaryBasedBreakIterator,
+ Normalizer,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedBreakIterator,
+ RuleBasedCollator,
+ RuleBasedTransliterator,
+ SymbolTable,
+ Transliterator,
+ UCharacterIterator,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ <b>UTF16</b>
+ </td>
+
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ ByteArrayWrapper,
+ CaseInsensitiveString,
+ CompactByteArray,
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">Full Properties</th>
+ <td align="left" valign="baseline">propertiesFull,
+propertiesFullTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.lang</td>
+ <td align="right" valign="baseline">1,837 KB</td>
+
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+
+ <b>UCharacter</b>,
+ <b>UCharacterCategory</b>,
+ <b>UCharacterDirection</b>,
+ <b>UCharacterEnums</b>,
+ <b>UProperty</b>,
+ <b>UScript</b>,
+ <b>UScriptRun</b>
+ </td>
+
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ BreakDictionary,
+ BreakIterator,
+ CanonicalIterator,
+ DictionaryBasedBreakIterator,
+ <b>Normalizer</b>,
+ RawCollationKey,
+ <b>Replaceable</b>,
+ <b>ReplaceableString</b>,
+ RuleBasedBreakIterator,
+ RuleBasedCollator,
+ RuleBasedTransliterator,
+ SymbolTable,
+ Transliterator,
+ <b>UCharacterIterator</b>,
+ <b>UForwardCharacterIterator</b>,
+ <b>UnicodeFilter</b>,
+ <b>UnicodeMatcher</b>,
+ <b>UnicodeSet</b>,
+ <b>UnicodeSetIterator</b>,
+ <b>UTF16</b>
+
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ ByteArrayWrapper,
+ CaseInsensitiveString,
+ CompactByteArray,
+ Freezable,
+ <b>RangeValueIterator</b>,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceTypeMismatchException,
+ <b>ValueIterator</b>,
+ <b>VersionInfo</b>
+
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">Formatting</th>
+
+ <td align="left" valign="baseline">format, formatTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.format</td>
+ <td align="right" valign="baseline">3,207 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.math:</td>
+
+ <td valign="baseline">
+ <b>BigDecimal</b>
+ MathContext
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+
+ BreakIterator,
+ CanonicalIterator,
+ ChineseDateFormat,
+ ChineseDateFormatSymbols,
+ CollationElementIterator,
+ CollationKey,
+ Collator,
+ <b>DateFormat</b>,
+ <b>DateFormatSymbols</b>,
+ <b>DecimalFormat</b>,
+ <b>DecimalFormatSymbols</b>,
+ MeasureFormat,
+ MessageFormat,
+ Normalizer,
+ <b>NumberFormat</b>,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedCollator,
+ <b>RuleBasedNumberFormat</b>,
+ RuleBasedTransliterator,
+ <b>SimpleDateFormat</b>,
+ SymbolTable,
+ Transliterator,
+ <b>UCharacterIterator</b>,
+ UFormat,
+ <b>UForwardCharacterIterator</b>,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ <b>BuddhistCalendar</b>,
+ ByteArrayWrapper,
+ <b>Calendar</b>,
+ CaseInsensitiveString,
+ <b>ChineseCalendar</b>,
+ <b>CopticCalendar</b>,
+ <b>Currency</b>,
+ CurrencyAmount,
+ <b>EthiopicCalendar</b>,
+ Freezable,
+ <b>GregorianCalendar</b>,
+ <b>HebrewCalendar</b>,
+ <b>IslamicCalendar</b>,
+ <b>JapaneseCalendar</b>,
+ Measure,
+ MeasureUnit,
+ <b>RangeValueIterator</b>,
+ <b>SimpleTimeZone</b>,
+ StringTokenizer,
+ <b>TimeZone</b>,
+ ULocale,
+ UResourceBundle,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+ <th align="left" valign="baseline">StringPrep, IDNA</th>
+ <td align="left" valign="baseline">stringPrep, stringPrepTests</td>
+
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.stringprep</td>
+ <td align="right" valign="baseline">725 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ UCharacter,
+ UCharacterCategory,
+ UCharacterDirection,
+ UCharacterEnums,
+ UProperty,
+ UScript
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+
+ <td valign="baseline">
+ BreakIterator,
+ CanonicalIterator,
+ CollationElementIterator,
+ CollationKey,
+ Collator,
+ <b>IDNA</b>,
+ Normalizer,
+ RawCollationKey,
+ Replaceable,
+ ReplaceableString,
+ RuleBasedCollator,
+ RuleBasedTransliterator,
+ <b>StringPrep</b>,
+ <b>StringParseException</b>,
+ SymbolTable,
+ Transliterator,
+ UCharacterIterator,
+ UForwardCharacterIterator,
+ UnicodeFilter,
+ UnicodeMatcher,
+ UnicodeSet,
+ UnicodeSetIterator,
+ UTF16
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+
+ <td valign="baseline">
+ ByteArrayWrapper,
+ CaseInsensitiveString,
+ Freezable,
+ RangeValueIterator,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceTypeMismatchException,
+ ValueIterator,
+ VersionInfo
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+ <tr>
+
+ <th align="left" valign="baseline">Transforms</th>
+ <td align="left" valign="baseline">transliterator,
+transliteratorTests</td>
+ <td align="left" valign="baseline">com.ibm.icu.dev.test.translit</td>
+ <td align="right" valign="baseline">988 KB</td>
+ </tr>
+ <tr bgcolor="#ffffff">
+
+ <td colspan="4" valign="top">
+ <table border="0" cellpadding="5">
+ <tbody>
+ <tr>
+ <td valign="baseline">icu.lang:</td>
+ <td valign="baseline">
+ <b>UCharacter</b>,
+ <b>UCharacterCategory</b>,
+ <b>UCharacterDirection</b>,
+ <b>UCharacterEnums</b>,
+ <b>UProperty</b>,
+ <b>UScript</b>,
+ <b>UScriptRun</b>
+
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.text:</td>
+ <td valign="baseline">
+ <b>BreakDictionary</b>,
+ <b>BreakIterator</b>,
+ CanonicalIterator,
+ CollationElementIterator,
+ CollationKey,
+ Collator,
+ <b>DictionaryBasedBreakIterator</b>,
+ <b>Normalizer</b>,
+ RawCollationKey,
+ <b>Replaceable</b>,
+ <b>ReplaceableString</b>,
+ <b>RuleBasedBreakIterator</b>,
+ RuleBasedCollator,
+ RuleBasedTransliterator,
+ SymbolTable,
+ <b>Transliterator</b>,
+ <b>UCharacterIterator</b>,
+ <b>UForwardCharacterIterator</b>,
+ UnicodeFilter,
+ UnicodeMatcher,
+ <b>UnicodeSet</b>,
+ <b>UnicodeSetIterator</b>,
+ <b>UTF16</b>
+
+ </td>
+ <td> <br>
+ </td>
+ </tr>
+ <tr>
+ <td valign="baseline">icu.util:</td>
+ <td valign="baseline">
+ ByteArrayWrapper,
+ CaseInsensitiveString,
+ CompactByteArray,
+ Freezable,
+ <b>RangeValueIterator</b>,
+ StringTokenizer,
+ ULocale,
+ UResourceBundle,
+ UResourceTypeMismatchException,
+ <b>ValueIterator</b>,
+ <b>VersionInfo</b>
+
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </td>
+ </tr>
+<!-- tr>
+
+ <th align="left" valign="baseline"><b>Module Name</b></th>
+
+ <th align="left" valign="baseline"><b>Ant Targets</b></th>
+
+ <th align="left" valign="baseline"><b>Test Package Supported</b></th>
+
+ <th align="right" valign="baseline"><b>Size</b></th>
+
+</tr -->
+ </tbody>
+</table>
+<p>Building any of these modules is as easy as specifying a build
+target to the Ant build system, e.g:
+<br>
+To build a module that contains only the Normalizer API: </p>
+<ol>
+ <li> Build the module. <br>
+ <code> ant normalizer </code> </li>
+ <li> Build the jar containing the module. <br>
+ <code>ant moduleJar </code> </li>
+ <li> Build the tests for the module. <br>
+ <code> ant normalizerTests </code> </li>
+ <li> Run the tests and verify that the self tests pass. <br>
+ <code> java -classpath $icu4j_root/classes
+com.ibm.icu.dev.test.TestAll -nothrow -w </code> </li>
+</ol>
+If more than one module is required, the module build targets can be
+concatenated, e.g:
+<ol>
+ <li> Build the modules. <br>
+ <code> ant normalizer collator </code> </li>
+ <li> Build the jar containing the modules. <br>
+ <code>ant moduleJar </code> </li>
+ <li> Build the tests for the module. <br>
+ <code> ant normalizerTests collatorTests </code> </li>
+ <li> Run the tests and verify that they pass. <br>
+ <code> java -classpath $icu4j_root/classes
+com.ibm.icu.dev.test.TestAll -nothrow -w </code> </li>
+</ol>
+The jar should be built before the tests, since for some targets
+building the tests will cause additional classes to be compiled that
+are not strictly necessary for the module itself.
+<h5> Notes: </h5>
+<ul>
+ <li>Regardless of whether ICU4J is built as a whole or as modules,
+the jar file produced is named <em>icu4j.jar</em>.</li>
+ <li>To ascertain if an icu4j.jar contains all of ICU4J or not, please
+see the manifest file in the jar</li>
+ <li>The target moduleJar does not depend on any other target. It just
+creates a jar of all class files under
+$icu4j_root/classes/com/ibm/icu/, excluding the classs files in
+$icu4j_root/classes/com/ibm/icu/dev folder</li>
+ <li>The list of module build targets can be obtained by running the
+command: <code>ant -projecthelp</code></li>
+</ul>
+<h3 class="doc"><a name="tryingout"></a>Trying Out ICU4J</h3>
+<p><strong>Note:</strong> the demos provided with ICU4J are for the
+most part undocumented. This list can show you where to look, but
+you'll
+have to experiment a bit. The demos (with the
+exception of richedit) are <strong>unsupported</strong> and may change
+or disappear without notice.</p>
+<p>The icu4j.jar file contains only the core ICU4J classes, not the
+demo classes, so unless you build ICU4J there is little to try out.
+</p>
+<h4>Charset</h4>
+To try out the <strong>Charset</strong> package, build <strong>icu4j.jar</strong> and <strong>icu4j-charsets.jar</strong> using 'jar' target.
+You can use the charsets by placing these files on your classpath.
+<blockquote><tt>java -cp $Root/icu4j.jar:$Root/icu4j-charsets.jar <your program></tt></blockquote>
+<h4>Rich Edit</h4>
+To try out the <strong>richedit</strong> package, first build the
+richeditJar target.
+This is a 'runnable' jar file. To run the richedit demo, type:
+<blockquote><tt>java -jar $Root/richedit/richedit.jar</tt></blockquote>
+This will present an empty edit pane with an awt interface.
+<p>With a fuller command line you can try out other options, for
+example:</p>
+<blockquote><tt>java -classpath $Root/richedit/richedit.jar
+com.ibm.richtext.demo.EditDemo [-swing][file]</tt></blockquote>
+<p>This will use an awt GUI, or a swing GUI if
+<tt>-swing</tt> is passed on the command line. It will open a text
+file if one is provided, otherwise it will open a blank page. Click
+to type.</p>
+<p>
+You can add tabs to the tab ruler by clicking in the ruler while
+holding down the control key.
+Clicking on an existing tab changes between left, right, center, and
+decimal tabs. Dragging
+a tab moves it, dragging it off the ruler removes it.</p>
+<p>
+You can experiment with complex text by using the keymap functions.
+Please note that these are mainly for demo purposes, for real work
+with Arabic or Hebrew you will want to use an input method. You will
+need to use a font that supports Arabic or Hebrew, 'Lucida Sans'
+(provided
+with Java) supports these languages.</p>
+<h4>Other demos</h4>
+<p>The other demo programs are <strong>not supported</strong> and
+exist only to let you
+experiment with the ICU4J classes. First, build ICU4J using <tt>ant all</tt>.
+Then try
+one of the following:
+</p>
+<ul>
+ <li><tt>java -classpath classes
+com.ibm.icu.dev.demo.calendar.CalendarApp</tt> </li>
+ <li><tt>java -classpath classes
+com.ibm.icu.dev.demo.holiday.HolidayCalendarDemo</tt> </li>
+ <li><tt>java -classpath classes
+com.ibm.icu.dev.demo.rbbi.TextBoundDemo</tt><br>
+(Click in the text, then use <tt>ctrl-N</tt> and <tt>ctrl-P</tt> to
+select the next or previous block of text.) </li>
+ <li><tt>java -classpath classes com.ibm.icu.dev.demo.rbnf.RbnfDemo</tt>
+ </li>
+ <li><tt>java -classpath classes com.ibm.icu.dev.demo.translit.Demo</tt>
+ </li>
+</ul>
+<h3 class="doc"><a name="resources">ICU4J Resource Information</a></h3>
+Starting with release 2.1, ICU4J includes its own
+resource information
+which is completely independent of the JDK resource information. (Note,
+in ICU4J 3.2 and 3.4, time zone information still depends on the
+underlying JDK). The
+new ICU4J information is equivalent to the information in ICU4C and
+many
+resources are, in fact, the same binary files that ICU4C uses.
+<p>
+By default the ICU4J distribution includes all of the standard resource
+information. It is located under the directory com/ibm/icu/impl/data.
+Depending on the service, the data is in different locations and in
+different formats. <strong>Note:</strong> This will continue to change
+from release to release, so clients should not depend on the exact
+organization
+of the data in ICU4J.</p>
+<ul>
+ <li>The primary <b>locale data</b> is under the directory <tt>icudt36b</tt>,
+as a set of <tt>".res"</tt> files whose names are the locale identifiers.
+Locale naming is documented the <code>com.ibm.icu.util.ULocale</code>
+class, and the use of these names in searching for resources is documented
+in <code>java.util.ResourceBundle</code>.
+ </li>
+ <li>The <b>collation data</b> is under the directory <tt>icudt36b/coll</tt>,
+as a set of <tt>".res"</tt> files.</li>
+ <li>The <b>rule-based transliterator data</b> is under the directory
+<tt>icudt36b/translit</tt> as a set of <tt>".res"</tt> files. (<b>Note:</b> the
+Han transliterator test data is no longer included in the core icu4j.jar
+file by default.)</li>
+ <li>The <b>rule-based number format data</b> is under the directory
+<tt>icudt36b/rbnf</tt> as a set of <tt>".res"</tt> files.
+ <li>The <b>break iterator data</b> is directly under the data
+directory, as a set of <tt>".brk"</tt> files, named according to the
+type of break and the locale where there are locale-specific versions.</li>
+ <li>The <b>holiday data</b> is under the <tt>data</tt> directory,
+as a set of <tt>".class"</tt> files, named <tt>"HolidayBundle_"</tt>
+followed by the locale ID.</li>
+ <li>The <b>character property data</b> as well as assorted <b>normalization
+data</b> and default <b>unicode collation algorithm (UCA) data</b>
+is found under the <tt>data</tt> directory as a set of <tt>".icu"</tt>
+files. </li>
+</ul>
+<p>
+Some of the data files alias or otherwise reference data from other
+data files. One reason for this is because some locale names have
+changed. For example, <tt>he_IL</tt> used to be <tt>iw_IL</tt>. In
+order to support both names but not duplicate the data, one of the
+resource files refers to the other file's data. In other cases, a
+file may alias a portion of another file's data in order to save
+space. Currently ICU4J provides no tool for revealing these
+dependencies.</p>
+<blockquote><strong>Note:</strong> Java's <code>Locale</code> class
+silently converts the language code <tt>"he"</tt> to <tt>"iw"</tt>
+when you construct the Locale (for versions of Java through Java 5). Thus
+Java cannot be used to locate resources that use the <tt>"he"</tt>
+language code. ICU, on the other hand, does not perform this
+conversion in ULocale, and instead uses aliasing in the locale data to
+represent the same set of data under different locale
+ids.</blockquote>
+<p>
+Resource files that use locale ids form a hierarchy, with up to four
+levels: a root, language, region (country), and variant. Searches for
+locale data attempt to match as far down the hierarchy as possible,
+for example, <tt>"he_IL"</tt> will match <tt>he_IL</tt>, but
+<tt>"he_US"</tt> will match <tt>he</tt> (since there is no <tt>US</tt>
+variant for he, and <tt>"xx_YY</tt> will match root (the
+default fallback locale) since there is no <tt>xx</tt> language code
+in the locale hierarchy. Again, see
+<code>java.util.ResourceBundle</code> for more information.
+</p>
+<p>
+<strong>Currently ICU4J provides no tool for revealing these
+dependencies</strong> between data files, so trimming the data
+directly in the ICU4J project is a hit-or-miss affair. The key point
+when you remove data is to make sure to remove all dependencies on
+that data as well. For example, if you remove <tt>he.res</tt>, you
+need to remove <tt>he_IL.res</tt>, since it is lower in the hierarchy,
+and you must remove iw.res, since it references <tt>he.res</tt>, and
+<tt>iw_IL.res</tt>, since it depends on it (and also references
+<tt>he_IL.res</tt>).
+</p>
+<p>
+Unfortunately, the jar tool in the JDK provides no way to remove items
+from a jar file. Thus you have to extract the resources, remove the
+ones you don't want, and then create a new jar file with the remining
+resources. See the jar tool information for how to do this. Before
+'rejaring' the files, be sure to thoroughly test your application with
+the remaining resources, making sure each required resource is
+present.
+</p>
+<h4>Using additional resource files with ICU4J</h4>
+<blockquote>
+ <table cellpadding="3" frame="border" rules="none" width="50%">
+ <tbody>
+ <tr>
+ <td><b><font color="red" size="+1">Warning:</font> Resource
+file formats can change across releases of ICU4J!</b></td>
+ </tr>
+ <tr>
+ <td>The format of ICU4J resources is not part of the API.
+Clients who develop their own resources for use with ICU4J should be
+prepared to
+regenerate them when they move to new releases of ICU4J.</td>
+ </tr>
+ </tbody>
+ </table>
+</blockquote>
+<p>
+We are still developing ICU4J's resource mechanism. Currently it
+is not possible to mix icu's new binary <tt>.res</tt>
+resources
+with traditional java-style <tt>.class</tt> or <tt>.txt</tt>
+resources. We might
+allow for this in a future release, but since the resource data and
+format is not formally
+supported, you run the risk of incompatibilities with future releases
+of ICU4J.
+</p>
+<p>
+Resource data in ICU4J is checked in to the repository as a jar file
+containing the resource binaries, <tt>icudata.jar</tt>. This
+means that inspecting the contents of these resources is difficult.
+They currently are compiled from ICU4C <tt>.txt</tt> file data. You
+can view the contents of the ICU4C text resource files to understand
+the contents of the ICU4J resources.
+</p>
+<p>
+The files in <tt>icudata.jar</tt> get extracted to <tt>com/ibm/icu/impl/data</tt>
+in
+the build directory when the 'core' target is built.
+Building the <tt>'resources'</tt> target will force the
+resources to once again be extracted. Extraction will
+overwrite any corresponding resource files already in that directory.
+</p>
+<h4><a name="resourcesICU4C">Building ICU4J Resources from ICU4C</a></h4>
+<h5>Requirements</h5>
+<ul>
+ <li><a
+ href="http://www.ibm.com/software/globalization/icu/downloads.jsp">ICU4C</a></li>
+ <li>Compilers and tools required for building <a
+ href="http://dev.icu-project.org/cgi-bin/viewcvs.cgi/icu/readme.html?view=co#HowToBuild">ICU4C</a>.</li>
+ <li>Java SDK version 1.4.0 or above.</li>
+ <li>Perl version 5 or above.</li>
+</ul>
+<h5> Procedure</h5>
+<ol>
+ <li> Download and build ICU4C on a Windows machine. For instructions on
+downloading and building ICU4C, please click <a
+ href="http://dev.icu-project.org/cgi-bin/viewcvs.cgi/icu/readme.html?view=co#HowToBuild">here</a>.</li>
+ <li> Change directory to <i>$icu_root</i>/source/tools/genrb </li>
+ <li> Launch gendtjar.pl from that directory itself with the command <br>
+gendtjar.pl --icu-root=<i>$icu_root</i> --jar=<i>$jdk_home/bin</i>
+--icu4j-root=<i>$icu4j_root</i> --version=<i>$icu_version</i> <br>
+e.g: gendtjar.pl --icu-root=\work\icu --jar=\jdk1.4.1\bin
+--icu4j-root=\work\icu4j --version=3.0 <br>
+Execution of gendtjar.pl script will create the required jar files in
+the $icu_root\source\tools\genrb\temp directory.</li>
+ <li> Move icudata.jar to <i>$icu4j_root</i>/src/com/ibm/icu/impl/data
+directory.</li>
+ <li> Move testdata.jar to <i>$icu4j_root</i>/src/com/ibm/dev/data
+directory.</li>
+ <li> Build resources target of ant to unpack the jar files with the
+following command. <br>
+ <i>$ant_home</i>/bin/ant resources</li>
+</ol>
+<h5> Generating Data from CLDR </h5>
+<i> Note: This procedure assumes that all 3 sources are in sibling directories</i>
+<ol>
+ <li>Checkout CLDR</li>
+ <li>Update <i>$cldr_root</i>/common to 'release-1-4' tag</li>
+ <li>Update <i>$cldr_root</i>/tools to 'release-1-4' tag</li>
+ <li>Checkout ICU with tag 'release-3-6'</li>
+ <li>Checkout ICU4J with tag 'release-3-6'</li>
+ <li>Build ICU4J</li>
+ <li>Build ICU4C</li>
+ <li>Change to <i>$cldr_root</i>/tools/java directory</li>
+ <li>Build CLDR using ant after pointing ICU4J_CLASSES env var to the newly build ICU4J</li>
+ <li>cd to <i>$icu4c_root</i>/source/data directory</li>
+ <li>Follow the instructions in the cldr-icu-readme.txt</li>
+ <li>Build ICU data from CLDR</li>
+ <li>cd to <i>$icu4c_root/source/tools/genrb</i></li>
+ <li>run ./gendtjar.pl</li>
+ <li>cd to <i>$icu4j_root</i> dir</li>
+ <li>Build and test icu4j</li>
+</ol>
+
+<h3 class="doc"><a name="WhereToFindMore"></a>Where to Find More
+Information</h3>
+<p><a href="http://www.ibm.com/software/globalization/icu/">http://www.ibm.com/software/globalization/icu/</a>
+is a
+pointer to general information about the International Components for
+Unicode in Java </p>
+<p><a href="http://www.ibm.com/software/globalization/">http://www.ibm.com/software/globalization/</a>
+is a pointer to
+information on how to make applications global. </p>
+<h3 class="doc"><a name="SubmittingComments"></a>Submitting Comments,
+Requesting Features and
+Reporting Bugs</h3>
+<p>Your comments are important to making ICU4J successful. We are
+committed
+to fixing any bugs, and will use your feedback to help plan future
+releases.</p>
+<p>To submit comments, request features and report bugs, contact us
+through the <a
+ href="http://icu.sourceforge.net/contacts.html">ICU Support
+mailing list</a>.<br>
+While we are not able to respond individually to each comment, we do
+review all comments.</p>
+<br>
+<br>
+<h2>Thank you for your interest in ICU4J!</h2>
+<br>
+<hr align="center" size="2" width="100%">
+<p><i><font size="-1">Copyright © 2002-2006 International Business
+Machines Corporation and others. All Rights
+Reserved.<br>
+4400 North First Street, San José, CA 95193, USA
+</font></i></p>
+</body>
+</html>
diff --git a/src/META-INF/services/java.nio.charset.spi.CharsetProvider b/src/META-INF/services/java.nio.charset.spi.CharsetProvider
new file mode 100644
index 0000000..ca798e7
--- /dev/null
+++ b/src/META-INF/services/java.nio.charset.spi.CharsetProvider
@@ -0,0 +1,3 @@
+# Copyright (C) 2006, International Business Machines Corporation and others. All Rights Reserved.
+# icu4j converters
+com.ibm.icu.charset.CharsetProviderICU
diff --git a/src/com/ibm/icu/charset/Charset88591.java b/src/com/ibm/icu/charset/Charset88591.java
new file mode 100644
index 0000000..b90a043
--- /dev/null
+++ b/src/com/ibm/icu/charset/Charset88591.java
@@ -0,0 +1,210 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+
+class Charset88591 extends CharsetICU {
+ protected byte[] fromUSubstitution = new byte[]{(byte)0x1a};
+
+ public Charset88591(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 1;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ }
+ class CharsetDecoder88591 extends CharsetDecoderICU{
+
+ public CharsetDecoder88591(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult cr = CoderResult.UNDERFLOW;
+ if(!source.hasRemaining() && toUnicodeStatus==0) {
+ /* no input, nothing to do */
+ return cr;
+ }
+ if(!target.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int sourceArrayIndex=source.position(), count=0;
+ int sourceIndex = 0;
+ char c=0;
+ int oldTarget = target.position();
+ /* conversion loop */
+ c=0;
+ while(sourceArrayIndex<source.limit() &&
+ (c=(char)(source.get(sourceArrayIndex)&0xFF))<=0xff &&
+ target.hasRemaining()) {
+ target.put(c);
+ sourceArrayIndex++;
+ }
+
+ if(c>0xff) {
+ /* callback(illegal); copy the current bytes to toUBytes[] */
+ toUBytesArray[0]=(byte)c;
+ toULength=1;
+ cr = CoderResult.malformedForLength(toULength);
+ } else if(sourceArrayIndex<source.limit() && !target.hasRemaining()) {
+ /* target is full */
+ cr = CoderResult.OVERFLOW;
+ }
+
+ /* set offsets since the start */
+ if(offsets!=null) {
+ count=target.position()-oldTarget;
+ while(count>0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ }
+
+ source.position(sourceArrayIndex);
+ return cr;
+ }
+
+ }
+ class CharsetEncoder88591 extends CharsetEncoderICU{
+
+ public CharsetEncoder88591(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ private final static int NEED_TO_WRITE_BOM = 1;
+
+ protected void implReset() {
+ super.implReset();
+ fromUnicodeStatus = NEED_TO_WRITE_BOM;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult cr = CoderResult.UNDERFLOW;
+ if(!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return cr;
+ }
+
+ if(!target.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int sourceArrayIndex=source.position(), count=0;
+ int sourceIndex = 0;
+ int ch=0;
+ int oldTarget = target.position();
+ boolean doloop = true;
+
+ if (fromUChar32 != 0 && target.hasRemaining()){
+ ch = fromUChar32;
+ fromUChar32 = 0;
+
+ if (sourceArrayIndex < source.limit()) {
+ /* test the following code unit */
+ char trail = source.get(sourceArrayIndex);
+ if(UTF16.isTrailSurrogate(trail)) {
+ ++sourceArrayIndex;
+ ch = UCharacter.getCodePoint((char)ch, trail);
+ /* convert this supplementary code point */
+ cr = CoderResult.unmappableForLength(sourceArrayIndex);
+ doloop = false;
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ fromUChar32 = (int)ch;
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ doloop = false;
+ }
+ } else {
+ /* no more input */
+ fromUChar32 = (int)ch;
+ doloop = false;
+ }
+ }
+ if(doloop){
+ /* conversion loop */
+ ch=0;
+ int ch2=0;
+ while(sourceArrayIndex<source.limit()){
+ ch=source.get(sourceArrayIndex++);
+ if(ch<=0xff) {
+ if( target.hasRemaining()){
+ target.put((byte)ch);
+ }else{
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ }else {
+ if (UTF16.isSurrogate((char)ch)) {
+ if (UTF16.isLeadSurrogate((char)ch)) {
+ //lowsurogate:
+ if (sourceArrayIndex < source.limit()) {
+ ch2 = source.get(sourceArrayIndex);
+ if (UTF16.isTrailSurrogate((char)ch2)) {
+ ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
+ sourceArrayIndex++;
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ fromUChar32 = ch;
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ else {
+ /* ran out of source */
+ fromUChar32 = ch;
+ if (flush) {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ break;
+ }
+ }
+ }
+ fromUChar32 = ch;
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ break;
+ }
+ }
+ }
+ /* set offsets since the start */
+ if(offsets!=null) {
+ count=target.position()-oldTarget;
+ while(count>0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ }
+
+ source.position(sourceArrayIndex);
+ return cr;
+ }
+ }
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoder88591(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoder88591(this);
+ }
+
+}
diff --git a/src/com/ibm/icu/charset/CharsetASCII.java b/src/com/ibm/icu/charset/CharsetASCII.java
new file mode 100644
index 0000000..bb3e108
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetASCII.java
@@ -0,0 +1,211 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+
+class CharsetASCII extends CharsetICU {
+ protected byte[] fromUSubstitution = new byte[]{(byte)0x1a};
+
+ public CharsetASCII(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 1;
+ minBytesPerChar = 1;
+ maxCharsPerByte = 1;
+ }
+ class CharsetDecoderASCII extends CharsetDecoderICU{
+
+ public CharsetDecoderASCII(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult cr = CoderResult.UNDERFLOW;
+ if(!source.hasRemaining() && toUnicodeStatus==0) {
+ /* no input, nothing to do */
+ return cr;
+ }
+ if(!target.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int sourceArrayIndex=source.position(), count=0;
+ int sourceIndex = 0;
+ char c=0;
+ int oldTarget = target.position();
+
+ /* conversion loop */
+ c=0;
+ while(sourceArrayIndex<source.limit()&&
+ (c=(char)source.get(sourceArrayIndex))<=0x7f &&
+ target.hasRemaining()){
+ target.put(c);
+ sourceArrayIndex++;
+ }
+
+ if(c>0x7f) {
+ /* callback(illegal); copy the current bytes to toUBytes[] */
+ toUBytesArray[0]=(byte)c;
+ toULength=1;
+ cr = CoderResult.malformedForLength(toULength);
+ } else if(sourceArrayIndex<source.limit() && !target.hasRemaining()) {
+ /* target is full */
+ cr = CoderResult.OVERFLOW;
+ }
+
+ /* set offsets since the start */
+ if(offsets!=null) {
+ count=target.position()-oldTarget;
+ while(count>0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ }
+ source.position(sourceArrayIndex);
+ return cr;
+ }
+
+ }
+ class CharsetEncoderASCII extends CharsetEncoderICU{
+
+ public CharsetEncoderASCII(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ private final static int NEED_TO_WRITE_BOM = 1;
+
+ protected void implReset() {
+ super.implReset();
+ fromUnicodeStatus = NEED_TO_WRITE_BOM;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult cr = CoderResult.UNDERFLOW;
+ if(!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return cr;
+ }
+
+ if(!target.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int sourceArrayIndex=source.position(), count=0;
+ int sourceIndex = 0;
+ int ch=0;
+ int oldTarget = target.position();
+ boolean doloop = true;
+
+ if (fromUChar32 != 0 && target.hasRemaining()){
+ ch = fromUChar32;
+ fromUChar32 = 0;
+
+ if (sourceArrayIndex < source.limit()) {
+ /* test the following code unit */
+ char trail = source.get(sourceArrayIndex);
+ if(UTF16.isTrailSurrogate(trail)) {
+ ++sourceArrayIndex;
+ ch = UCharacter.getCodePoint((char)ch, trail);
+ /* convert this supplementary code point */
+ /* callback(unassigned) */
+ cr = CoderResult.unmappableForLength(sourceArrayIndex);
+ doloop = false;
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ fromUChar32 = (int)ch;
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ doloop = false;
+ }
+ } else {
+ /* no more input */
+ fromUChar32 = (int)ch;
+ doloop = false;
+ }
+ }
+ if(doloop){
+ /* conversion loop */
+ ch=0;
+ int ch2=0;
+ while(sourceArrayIndex<source.limit()){
+ ch=source.get(sourceArrayIndex++);
+ if(ch<=0xff) {
+ if(target.hasRemaining()){
+ target.put((byte)ch);
+ }else{
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ }else {
+ if (UTF16.isSurrogate((char)ch)) {
+ if (UTF16.isLeadSurrogate((char)ch)) {
+ //lowsurogate:
+ if (sourceArrayIndex < source.limit()) {
+ ch2 = source.get(sourceArrayIndex);
+ if (UTF16.isTrailSurrogate((char)ch2)) {
+ ch = ((ch - UConverterSharedData.SURROGATE_HIGH_START) << UConverterSharedData.HALF_SHIFT) + ch2 + UConverterSharedData.SURROGATE_LOW_BASE;
+ sourceArrayIndex++;
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ fromUChar32 = ch;
+ cr = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ else {
+ /* ran out of source */
+ fromUChar32 = ch;
+ if (flush) {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ break;
+ }
+ }
+ }
+ fromUChar32 = ch;
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ break;
+ }
+ }
+ }
+ /* set offsets since the start */
+ if(offsets!=null) {
+ count=target.position()-oldTarget;
+ while(count>0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ }
+
+ source.position(sourceArrayIndex);
+ return cr;
+ }
+ }
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderASCII(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderASCII(this);
+ }
+
+}
diff --git a/src/com/ibm/icu/charset/CharsetCallback.java b/src/com/ibm/icu/charset/CharsetCallback.java
new file mode 100644
index 0000000..3369243
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetCallback.java
@@ -0,0 +1,234 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CoderResult;
+
+/**
+ * <h2> Callback API for CharsetICU API </h2>
+ *
+ * CharsetCallback class defines some error behaviour functions called
+ * by CharsetDecoderICU and CharsetEncoderICU. The class also provides
+ * the facility by which clients can write their own callbacks.
+ *
+ * These functions, although public, should NEVER be called directly.
+ * They should be used as parameters to the onUmappableCharacter() and
+ * onMalformedInput() methods, to set the behaviour of a converter
+ * when it encounters UNMAPPED/INVALID sequences.
+ * Currently the only way to set callbacks is by using CodingErrorAction.
+ * In the future we will provide set methods on CharsetEncoder and CharsetDecoder
+ * that will accept CharsetCallback fields.
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+
+/*public*/ class CharsetCallback {
+ /**
+ * FROM_U, TO_U context options for sub callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String SUB_STOP_ON_ILLEGAL = "i";
+
+ /**
+ * FROM_U, TO_U context options for skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String SKIP_STOP_ON_ILLEGAL = "i";
+
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
+ * @draft ICU 3.6
+ */
+ /*public*/ static final String ESCAPE_ICU = null;
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ * @draft ICU 3.6
+ */
+ /*public*/ static final String ESCAPE_JAVA = "J";
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String ESCAPE_C = "C";
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String ESCAPE_XML_DEC = "D";
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String ESCAPE_XML_HEX = "X";
+ /**
+ * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ static final String ESCAPE_UNICODE = "U";
+
+ /**
+ * Decoder Callback interface
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public interface Decoder {
+ /**
+ * This function is called when the bytes in the source cannot be handled,
+ * and this function is meant to handle or fix the error if possible.
+ *
+ * @return Result of decoding action. This returned object is set to an error
+ * if this function could not handle the conversion.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr);
+ }
+ /**
+ * Encoder Callback interface
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public interface Encoder {
+ /**
+ * This function is called when the Unicode characters in the source cannot be handled,
+ * and this function is meant to handle or fix the error if possible.
+ * @return Result of decoding action. This returned object is set to an error
+ * if this function could not handle the conversion.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr);
+ }
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ if(context==null){
+ return CoderResult.UNDERFLOW;
+ }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+ if(!cr.isUnmappable()){
+ return cr;
+ }else{
+ return CoderResult.UNDERFLOW;
+ }
+ }
+ return cr;
+ }
+ };
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+ if(context==null){
+ return CoderResult.UNDERFLOW;
+ }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+ if(!cr.isUnmappable()){
+ return cr;
+ }else{
+ return CoderResult.UNDERFLOW;
+ }
+ }
+ return cr;
+ }
+ };
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ if(context==null){
+ return encoder.cbFromUWriteSub(encoder, source, target, offsets);
+ }else if(((String)context).equals(SUB_STOP_ON_ILLEGAL)){
+ if(!cr.isUnmappable()){
+ return cr;
+ }else{
+ return encoder.cbFromUWriteSub(encoder, source, target, offsets);
+ }
+ }
+ return cr;
+ }
+ };
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+
+ char[] kSubstituteChar1 = new char[]{0x1A};
+ char[] kSubstituteChar = new char[] {0xFFFD};
+ CharsetICU cs = (CharsetICU) decoder.charset();
+ /* could optimize this case, just one uchar */
+ if(decoder.invalidCharLength == 1 && cs.subChar1 != 0) {
+ return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar1, 0, 1, target, offsets, source.position());
+ } else {
+ return CharsetDecoderICU.toUWriteUChars(decoder, kSubstituteChar, 0, 1, target, offsets, source.position());
+ }
+ }
+ };
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() {
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr){
+ return cr;
+ }
+ };
+ /**
+ * Skip callback
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final Decoder TO_U_CALLBACK_STOP = new Decoder() {
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr){
+ return cr;
+ }
+ };
+}
diff --git a/src/com/ibm/icu/charset/CharsetDecoderICU.java b/src/com/ibm/icu/charset/CharsetDecoderICU.java
new file mode 100644
index 0000000..27b4426
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetDecoderICU.java
@@ -0,0 +1,661 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.nio.ByteBuffer;
+
+import com.ibm.icu.charset.CharsetCallback;
+import com.ibm.icu.impl.Assert;
+
+/**
+ * An abstract class that provides framework methods of decoding operations for concrete
+ * subclasses.
+ * In the future this class will contain API that will implement converter sematics of ICU4C.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+
+public abstract class CharsetDecoderICU extends CharsetDecoder{
+
+ int toUnicodeStatus;
+ byte[] toUBytesArray = new byte[128];
+ int toUBytesBegin = 0;
+ int toULength;
+ char[] charErrorBufferArray = new char[128];
+ int charErrorBufferLength;
+ int charErrorBufferBegin;
+ char[] invalidCharBuffer = new char[128];
+ int invalidCharLength;
+
+ /* store previous UChars/chars to continue partial matches */
+ byte[] preToUArray;
+ int preToUBegin;
+ int preToULength; /* negative: replay */
+ int preToUFirstLength; /* length of first character */
+ int mode;
+
+ Object toUContext = null;
+ private CharsetCallback.Decoder onUnmappableInput = CharsetCallback.TO_U_CALLBACK_STOP;
+ private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP;
+ CharsetCallback.Decoder toCharErrorBehaviour= new CharsetCallback.Decoder(){
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
+ ByteBuffer source, CharBuffer target, IntBuffer offsets,
+ char[] buffer, int length, CoderResult cr) {
+ if(cr.isUnmappable()){
+ return onUnmappableInput.call(decoder, context,
+ source, target, offsets,
+ buffer, length, cr);
+ }else if(cr.isMalformed()){
+ return onMalformedInput.call(decoder, context,
+ source, target, offsets,
+ buffer, length, cr);
+ }
+ return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context,
+ source, target, offsets,
+ buffer, length, cr);
+ }
+ };
+
+ /**
+ * Construct a CharsetDecorderICU based on the information provided from a
+ * CharsetICU object.
+ * @param cs The CharsetICU object containing information about how to
+ * charset to decode.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ CharsetDecoderICU(CharsetICU cs) {
+ super(cs, (float) (1/(float)cs.maxCharsPerByte), cs.maxCharsPerByte);
+ }
+
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ * @param newAction action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected final void implOnMalformedInput(CodingErrorAction newAction) {
+ onMalformedInput = getCallback(newAction);
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ * @param newAction action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected final void implOnUnmappableCharacter(CodingErrorAction newAction) {
+ onUnmappableInput = getCallback(newAction);
+ }
+ private static CharsetCallback.Decoder getCallback(CodingErrorAction action){
+ if(action==CodingErrorAction.REPLACE){
+ return CharsetCallback.TO_U_CALLBACK_SUBSTITUTE;
+ }else if(action==CodingErrorAction.IGNORE){
+ return CharsetCallback.TO_U_CALLBACK_SKIP;
+ }else if(action==CodingErrorAction.REPORT){
+ return CharsetCallback.TO_U_CALLBACK_STOP;
+ }
+ return CharsetCallback.TO_U_CALLBACK_STOP;
+ }
+ private final ByteBuffer EMPTY = ByteBuffer.allocate(0);
+ /**
+ * Flushes any characters saved in the converter's internal buffer and
+ * resets the converter.
+ * @param out action to be taken
+ * @return result of flushing action and completes the decoding all input.
+ * Returns CoderResult.UNDERFLOW if the action succeeds.
+ * @stable ICU 3.6
+ */
+ protected final CoderResult implFlush(CharBuffer out) {
+ return decode(EMPTY, out, null, true);
+ }
+
+ /**
+ * Resets the to Unicode mode of converter
+ * @stable ICU 3.6
+ */
+ protected void implReset() {
+ toUnicodeStatus = 0 ;
+ toULength = 0;
+ charErrorBufferLength = 0;
+ charErrorBufferBegin = 0;
+
+ /* store previous UChars/chars to continue partial matches */
+ preToUBegin = 0;
+ preToULength = 0; /* negative: replay */
+ preToUFirstLength = 0;
+
+ mode = 0;
+ }
+
+ /**
+ * Decodes one or more bytes. The default behaviour of the converter
+ * is stop and report if an error in input stream is encountered.
+ * To set different behaviour use @see CharsetDecoder.onMalformedInput()
+ * This method allows a buffer by buffer conversion of a data stream.
+ * The state of the conversion is saved between calls to convert.
+ * Among other things, this means multibyte input sequences can be
+ * split between calls. If a call to convert results in an Error, the
+ * conversion may be continued by calling convert again with suitably
+ * modified parameters.All conversions should be finished with a call to
+ * the flush method.
+ * @param in buffer to decode
+ * @param out buffer to populate with decoded result
+ * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @stable ICU 3.6
+ */
+ protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){
+ if(!in.hasRemaining()){
+ return CoderResult.UNDERFLOW;
+ }
+ in.position(in.position()+toUCountPending());
+ /* do the conversion */
+ CoderResult ret = decode(in, out, null, false);
+
+ setSourcePosition(in);
+ return ret;
+ }
+
+ /**
+ * Implements the ICU semantic for decode operation
+ * @param in The input byte buffer
+ * @param out The output character buffer
+ * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets, boolean flush);
+
+ /**
+ * Implements the ICU semantic for decode operation
+ * @param source The input byte buffer
+ * @param target The output character buffer
+ * @param offsets
+ * @param flush true if, and only if, the invoker can provide no
+ * additional input bytes beyond those in the given buffer.
+ * @return Result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+
+ /* check parameters */
+ if(target==null || source==null) {
+ throw new IllegalArgumentException();
+ }
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be sourceLimit=t+0x7fffffff; for example.
+ */
+ /*agljport:fix
+ if(
+ ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
+ ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t)
+ ) {
+ *err=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ */
+
+ /* flush the target overflow buffer */
+ if(charErrorBufferLength>0) {
+ char[] overflow = null;
+ int i, length;
+
+ overflow=charErrorBufferArray;
+ length=charErrorBufferLength;
+ i=0;
+ do {
+ if(target.remaining()<=0) {
+ /* the overflow buffer contains too much, keep the rest */
+ int j=0;
+
+ do {
+ overflow[j++]=overflow[i++];
+ } while(i<length);
+
+ charErrorBufferLength=(byte)j;
+ return CoderResult.OVERFLOW;
+ }
+
+ /* copy the overflow contents to the target */
+ target.put(overflow[i++]);
+ if(offsets!=null) {
+ offsets.put(-1); /* no source index available for old output */
+ }
+ } while(i<length);
+
+ /* the overflow buffer is completely copied to the target */
+ charErrorBufferLength=0;
+ }
+
+ if(!flush && source.remaining()==0 && preToULength>=0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return CoderResult.UNDERFLOW;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ return toUnicodeWithCallback(source, target, offsets, flush);
+ }
+
+ /* maximum number of indexed bytes */
+ private static final int EXT_MAX_BYTES = 0x1f;
+ private void updateOffsets(IntBuffer offsets,int length, int sourceIndex, int errorInputLength) {
+ int limit;
+ int delta, offset;
+
+ if(sourceIndex>=0) {
+ /*
+ * adjust each offset by adding the previous sourceIndex
+ * minus the length of the input sequence that caused an
+ * error, if any
+ */
+ delta=sourceIndex-errorInputLength;
+ } else {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ */
+ delta=-1;
+ }
+ limit=offsets.position()+length;
+ if(delta==0) {
+ /* most common case, nothing to do */
+ } else if(delta>0) {
+ /* add the delta to each offset (but not if the offset is <0) */
+ while(offsets.position()<limit) {
+ offset=offsets.get(offsets.position());
+ if(offset>=0) {
+ offsets.put(offset+delta);
+ }
+ //FIXME: ++offsets;
+ }
+ } else /* delta<0 */ {
+ /*
+ * set each offset to -1 because this conversion function
+ * does not handle offsets
+ * or the error input sequence started in a previous buffer
+ */
+ while(offsets.position()<limit) {
+ offsets.put(-1);
+ }
+ }
+ }
+ final CoderResult toUnicodeWithCallback(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+
+ int sourceIndex;
+ int errorInputLength;
+ boolean converterSawEndOfInput, calledCallback;
+ int t=target.position();
+ int s=source.position();
+ /* variables for m:n conversion */
+ ByteBuffer replayArray = ByteBuffer.allocate(EXT_MAX_BYTES);
+ int replayArrayIndex = 0;
+
+ ByteBuffer realSource=null;
+ boolean realFlush=false;
+ int realSourceIndex=0;
+
+
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* get the converter implementation function */
+ sourceIndex=0;
+
+ if(preToULength>=0) {
+ /* normal mode */
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=source;
+ realFlush=flush;
+ realSourceIndex=sourceIndex;
+ //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
+ replayArray.put(preToUArray,0, -preToULength);
+ source=replayArray;
+ source.position(0);
+ source.limit(replayArrayIndex-preToULength);
+ flush=false;
+ sourceIndex=-1;
+ preToULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+ if(cr.isUnderflow()) {
+ /* convert */
+ cr = decodeLoop(source, target, offsets, flush);
+
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv->preToULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput= (cr.isUnderflow() && flush && source.remaining()==0 && toULength==0);
+ } else {
+ /* handle error from getNextUChar() */
+ converterSawEndOfInput=false;
+ }
+
+ /* no callback called yet for this iteration */
+ calledCallback=false;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength=0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for(;;) {
+ /* update offsets if we write any */
+ if(offsets!=null) {
+
+ int length=(target.position()-t);
+ if(length>0) {
+ updateOffsets(offsets, length, sourceIndex, errorInputLength);
+
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then pArgs->offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ //TODO: pArgs->offsets=offsets+=length;
+ }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(source.position()-s);
+ }
+
+ }
+
+ if(preToULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==null)
+ {
+ realSource=source;
+ realFlush=flush;
+ realSourceIndex=sourceIndex;
+
+ //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
+ replayArray.put(preToUArray,0, -preToULength);
+
+ source=replayArray;
+ source.limit(replayArrayIndex-preToULength);
+ flush=false;
+ if((sourceIndex+=preToULength)<0) {
+ sourceIndex=-1;
+ }
+
+ preToULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ //agljport:todo U_ASSERT(realSource==NULL);
+ Assert.assrt(realSource==null);
+ }
+ }
+
+ /* update pointers */
+ s=source.position();
+ t=target.position();
+
+ if(cr.isUnderflow()) {
+ if(s<source.limit())
+ {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if(realSource!=null) {
+ /* switch back from replaying to the real source and continue */
+ source = realSource;
+ flush=realFlush;
+ sourceIndex=realSourceIndex;
+ realSource=null;
+ break;
+ } else if(flush && toULength>0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ cr = CoderResult.malformedForLength(toULength);
+ calledCallback=false; /* new error condition */
+ } else {
+ /* input consumed */
+ if(flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if(!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ implReset();
+ }
+
+ /* done successfully */
+ return cr;
+ }
+ }
+
+ /* U_FAILURE(*err) */
+ {
+
+ if( calledCallback || cr.isOverflow() ||
+ (cr.isMalformed() && cr.isUnmappable())
+ ) {
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if(realSource!=null) {
+ int length;
+ Assert.assrt(preToULength==0);
+ length=(int)(source.limit()-source.position());
+ if(length>0) {
+ //UConverterUtility.uprv_memcpy(preToUArray, preToUBegin, pArgs.sourceArray, pArgs.sourceBegin, length);
+ source.get(preToUArray, preToUBegin, length);
+ preToULength=(byte)-length;
+ }
+
+ source=realSource;
+ flush=realFlush;
+ }
+ return cr;
+ }
+ }
+
+ /* copy toUBytes[] to invalidCharBuffer[] */
+ errorInputLength=invalidCharLength=toULength;
+ if(errorInputLength>0) {
+ copy(toUBytesArray, 0, invalidCharBuffer, 0, errorInputLength);
+ }
+
+ /* set the converter state to deal with the next character */
+ toULength=0;
+
+ /* call the callback function */
+ cr = toCharErrorBehaviour.call(this, toUContext, source, target, offsets, invalidCharBuffer, errorInputLength, cr);
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback=true;
+ }
+ }
+ }
+
+ /**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @draft ICU 3.6
+ */
+ /*public*/ int toUCountPending() {
+ if(preToULength > 0){
+ return preToULength ;
+ }else if(preToULength < 0){
+ return -preToULength;
+ }else if(toULength > 0){
+ return toULength;
+ }
+ return 0;
+ }
+
+
+ private final void setSourcePosition(ByteBuffer source){
+ // ok was there input held in the previous invocation of decodeLoop
+ // that resulted in output in this invocation?
+ source.position(source.position() - toUCountPending());
+
+ }
+ private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) {
+ for(int i=srcOffset; i<length; i++){
+ dst[dstOffset++]=(char)src[srcOffset++];
+ }
+ }
+ /**
+ * ONLY used by ToU callback functions.
+ * This function will write out the specified characters to the target
+ * character buffer.
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ static final CoderResult toUWriteUChars( CharsetDecoderICU cnv,
+ char[] ucharsArray, int ucharsBegin, int length,
+ CharBuffer target, IntBuffer offsets, int sourceIndex) {
+
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* write UChars */
+ if(offsets==null) {
+ while(length>0 && target.hasRemaining()) {
+ target.put(ucharsArray[ucharsBegin++]);
+ --length;
+ }
+
+ } else {
+ /* output with offsets */
+ while(length>0 && target.hasRemaining()) {
+ target.put(ucharsArray[ucharsBegin++]);
+ offsets.put(sourceIndex);
+ --length;
+ }
+ }
+ /* write overflow */
+ if(length>0) {
+ cnv.charErrorBufferLength= 0;
+ cr = CoderResult.OVERFLOW;
+ do {
+ cnv.charErrorBufferArray[cnv.charErrorBufferLength++]=ucharsArray[ucharsBegin++];
+ } while(--length>0);
+ }
+ return cr;
+ }
+ /**
+ * This function will write out the Unicode substitution character to the
+ * target character buffer.
+ * Sub classes to override this method if required
+ * @param decoder
+ * @param source
+ * @param target
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ CoderResult cbToUWriteSub(CharsetDecoderICU decoder,
+ ByteBuffer source, CharBuffer target,
+ IntBuffer offsets){
+ String sub = decoder.replacement();
+ CharsetICU cs = (CharsetICU) decoder.charset();
+ if (decoder.invalidCharLength==1 && cs.subChar1 != 0x00) {
+ char[] subArr = new char[] { 0x1a };
+ return CharsetDecoderICU.toUWriteUChars(decoder, subArr, 0, sub
+ .length(), target, offsets, source.position());
+ } else {
+ return CharsetDecoderICU.toUWriteUChars(decoder, sub.toCharArray(),
+ 0, sub.length(), target, offsets, source.position());
+
+ }
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetEncoderICU.java b/src/com/ibm/icu/charset/CharsetEncoderICU.java
new file mode 100644
index 0000000..f771686
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetEncoderICU.java
@@ -0,0 +1,673 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+
+import com.ibm.icu.impl.Assert;
+import com.ibm.icu.text.UTF16;
+
+/**
+ * An abstract class that provides framework methods of decoding operations for concrete
+ * subclasses.
+ * In the future this class will contain API that will implement converter sematics of ICU4C.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+public abstract class CharsetEncoderICU extends CharsetEncoder {
+
+ byte[] errorBuffer = new byte[30];
+ int errorBufferLength = 0;
+
+ /** these are for encodeLoopICU */
+ int fromUnicodeStatus;
+ int fromUChar32;
+ boolean useSubChar1;
+
+ /* store previous UChars/chars to continue partial matches */
+ int preFromUFirstCP; /* >=0: partial match */
+ char[] preFromUArray;
+ int preFromUBegin;
+ int preFromULength; /* negative: replay */
+
+ char[] invalidUCharBuffer = new char[2];
+ int invalidUCharLength;
+ Object fromUContext;
+ private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP;
+ private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
+ CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder(){
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ char[] buffer, int length, int cp, CoderResult cr) {
+ if(cr.isUnmappable()){
+ return onUnmappableInput.call(encoder, context,
+ source, target, offsets,
+ buffer, length, cp, cr);
+ }else if(cr.isMalformed()){
+ return onMalformedInput.call(encoder, context,
+ source, target, offsets,
+ buffer, length, cp, cr);
+ }
+ return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context,
+ source, target, offsets,
+ buffer, length, cp, cr);
+
+ }
+ };
+
+ /**
+ * Construcs a new encoder for the given charset
+ * @param cs for which the decoder is created
+ * @param replacement the substitution bytes
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ CharsetEncoderICU(CharsetICU cs, byte[] replacement) {
+ super(cs, (cs.minBytesPerChar+cs.maxBytesPerChar)/2, cs.maxBytesPerChar, replacement);
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ * @param newAction action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected void implOnMalformedInput(CodingErrorAction newAction) {
+ onMalformedInput = getCallback(newAction);
+ }
+
+ /**
+ * Sets the action to be taken if an illegal sequence is encountered
+ * @param newAction action to be taken
+ * @exception IllegalArgumentException
+ * @stable ICU 3.6
+ */
+ protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
+ onUnmappableInput = getCallback(newAction);
+ }
+
+ private static CharsetCallback.Encoder getCallback(CodingErrorAction action){
+ if(action==CodingErrorAction.REPLACE){
+ return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
+ }else if(action==CodingErrorAction.IGNORE){
+ return CharsetCallback.FROM_U_CALLBACK_SKIP;
+ }else if(action==CodingErrorAction.REPORT){
+ return CharsetCallback.FROM_U_CALLBACK_STOP;
+ }
+ return CharsetCallback.FROM_U_CALLBACK_STOP;
+ }
+
+ private static final CharBuffer EMPTY = CharBuffer.allocate(0);
+ /**
+ * Flushes any characters saved in the converter's internal buffer and
+ * resets the converter.
+ * @param out action to be taken
+ * @return result of flushing action and completes the decoding all input.
+ * Returns CoderResult.UNDERFLOW if the action succeeds.
+ * @stable ICU 3.6
+ */
+ protected CoderResult implFlush(ByteBuffer out) {
+ return encode(EMPTY, out, null, true);
+ }
+
+ /**
+ * Resets the from Unicode mode of converter
+ * @stable ICU 3.6
+ */
+ protected void implReset() {
+ errorBufferLength=0;
+ fromUChar32=0;
+ fromUnicodeStatus = 0;
+ preFromUBegin = 0;
+ preFromUFirstCP = 0;
+ preFromULength = 0;
+ }
+
+ /**
+ * Encodes one or more chars. The default behaviour of the
+ * converter is stop and report if an error in input stream is encountered.
+ * To set different behaviour use @see CharsetEncoder.onMalformedInput()
+ * @param in buffer to decode
+ * @param out buffer to populate with decoded result
+ * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
+ * action succeeds or more input is needed for completing the decoding action.
+ * @stable ICU 3.6
+ */
+ protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+ if(!in.hasRemaining()){
+ return CoderResult.UNDERFLOW;
+ }
+ in.position(in.position()+fromUCountPending());
+ /* do the conversion */
+ CoderResult ret = encode(in, out, null, false);
+ setSourcePosition(in);
+ return ret;
+ }
+ /**
+ * Implements ICU semantics of buffer management
+ * @param source
+ * @param target
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush);
+
+ /**
+ * Implements ICU semantics for encoding the buffer
+ * @param source The input character buffer
+ * @param target The output byte buffer
+ * @param offsets
+ * @param flush true if, and only if, the invoker can provide no
+ * additional input bytes beyond those in the given buffer.
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ final CoderResult encode(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+
+
+ /* check parameters */
+ if(target==null || source==null) {
+ throw new IllegalArgumentException();
+ }
+
+ /*
+ * Make sure that the buffer sizes do not exceed the number range for
+ * int32_t because some functions use the size (in units or bytes)
+ * rather than comparing pointers, and because offsets are int32_t values.
+ *
+ * size_t is guaranteed to be unsigned and large enough for the job.
+ *
+ * Return with an error instead of adjusting the limits because we would
+ * not be able to maintain the semantics that either the source must be
+ * consumed or the target filled (unless an error occurs).
+ * An adjustment would be targetLimit=t+0x7fffffff; for example.
+ */
+
+ /* flush the target overflow buffer */
+ if(errorBufferLength>0) {
+ byte[] overflowArray;
+ int i, length;
+
+ overflowArray=errorBuffer;
+ length=errorBufferLength;
+ i=0;
+ do {
+ if(target.remaining()==0) {
+ /* the overflow buffer contains too much, keep the rest */
+ int j=0;
+
+ do {
+ overflowArray[j++]=overflowArray[i++];
+ } while(i<length);
+
+ errorBufferLength=(byte)j;
+ return CoderResult.OVERFLOW;
+ }
+
+ /* copy the overflow contents to the target */
+ target.put(overflowArray[i++]);
+ if(offsets!=null) {
+ offsets.put(-1); /* no source index available for old output */
+ }
+ } while(i<length);
+
+ /* the overflow buffer is completely copied to the target */
+ errorBufferLength=0;
+ }
+
+ if(!flush && source.remaining()==0 && preFromULength>=0) {
+ /* the overflow buffer is emptied and there is no new input: we are done */
+ return CoderResult.UNDERFLOW;
+ }
+
+ /*
+ * Do not simply return with a buffer overflow error if
+ * !flush && t==targetLimit
+ * because it is possible that the source will not generate any output.
+ * For example, the skip callback may be called;
+ * it does not output anything.
+ */
+
+ return fromUnicodeWithCallback(source, target, offsets, flush);
+
+ }
+ /* maximum number of indexed UChars */
+ private static final int EXT_MAX_UCHARS = 19;
+
+ /**
+ * Implementation note for m:n conversions
+ *
+ * While collecting source units to find the longest match for m:n conversion,
+ * some source units may need to be stored for a partial match.
+ * When a second buffer does not yield a match on all of the previously stored
+ * source units, then they must be "replayed", i.e., fed back into the converter.
+ *
+ * The code relies on the fact that replaying will not nest -
+ * converting a replay buffer will not result in a replay.
+ * This is because a replay is necessary only after the _continuation_ of a
+ * partial match failed, but a replay buffer is converted as a whole.
+ * It may result in some of its units being stored again for a partial match,
+ * but there will not be a continuation _during_ the replay which could fail.
+ *
+ * It is conceivable that a callback function could call the converter
+ * recursively in a way that causes another replay to be stored, but that
+ * would be an error in the callback function.
+ * Such violations will cause assertion failures in a debug build,
+ * and wrong output, but they will not cause a crash.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ final CoderResult fromUnicodeWithCallback(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+ int sBufferIndex;
+ int sourceIndex;
+ int errorInputLength;
+ boolean converterSawEndOfInput, calledCallback;
+
+
+ /* variables for m:n conversion */
+ CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS);
+ int replayArrayIndex=0;
+ CharBuffer realSource;
+ boolean realFlush;
+
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ /* get the converter implementation function */
+ sourceIndex=0;
+
+ if(preFromULength>=0) {
+ /* normal mode */
+ realSource=null;
+ realFlush=false;
+ } else {
+ /*
+ * Previous m:n conversion stored source units from a partial match
+ * and failed to consume all of them.
+ * We need to "replay" them from a temporary buffer and convert them first.
+ */
+ realSource=source;
+ realFlush = flush;
+
+ //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
+ replayArray.put(preFromUArray,0, -preFromULength);
+ source.position(replayArrayIndex);
+ source.limit(replayArrayIndex-preFromULength); //preFromULength is negative, see declaration
+ source=replayArray;
+ flush=false;
+
+ preFromULength=0;
+ }
+
+ /*
+ * loop for conversion and error handling
+ *
+ * loop {
+ * convert
+ * loop {
+ * update offsets
+ * handle end of input
+ * handle errors/call callback
+ * }
+ * }
+ */
+ for(;;) {
+ /* convert */
+ cr = encodeLoop(source, target, offsets, flush);
+ /*
+ * set a flag for whether the converter
+ * successfully processed the end of the input
+ *
+ * need not check cnv.preFromULength==0 because a replay (<0) will cause
+ * s<sourceLimit before converterSawEndOfInput is checked
+ */
+ converterSawEndOfInput= (boolean)(cr.isUnderflow() && flush && source.remaining()==0 && fromUChar32==0);
+
+ /* no callback called yet for this iteration */
+ calledCallback=false;
+
+ /* no sourceIndex adjustment for conversion, only for callback output */
+ errorInputLength=0;
+
+ /*
+ * loop for offsets and error handling
+ *
+ * iterates at most 3 times:
+ * 1. to clean up after the conversion function
+ * 2. after the callback
+ * 3. after the callback again if there was truncated input
+ */
+ for(;;) {
+ /* update offsets if we write any */
+ if(offsets!=null) {
+ int length = target.remaining();
+ if(length>0) {
+
+ /*
+ * if a converter handles offsets and updates the offsets
+ * pointer at the end, then offset should not change
+ * here;
+ * however, some converters do not handle offsets at all
+ * (sourceIndex<0) or may not update the offsets pointer
+ */
+ offsets.position(offsets.position()+length);
+ }
+
+ if(sourceIndex>=0) {
+ sourceIndex+=(int)(source.position());
+ }
+ }
+
+ if(preFromULength<0) {
+ /*
+ * switch the source to new replay units (cannot occur while replaying)
+ * after offset handling and before end-of-input and callback handling
+ */
+ if(realSource==null) {
+ realSource=source;
+ realFlush=flush;
+
+ //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR);
+ replayArray.put(preFromUArray,0, -preFromULength);
+
+ source=replayArray;
+ source.position(replayArrayIndex);
+ source.limit(replayArrayIndex-preFromULength);
+ flush=false;
+ if((sourceIndex+=preFromULength)<0) {
+ sourceIndex=-1;
+ }
+
+ preFromULength=0;
+ } else {
+ /* see implementation note before _fromUnicodeWithCallback() */
+ //agljport:todo U_ASSERT(realSource==NULL);
+ Assert.assrt(realSource==null);
+ }
+ }
+
+ /* update pointers */
+ sBufferIndex=source.position();
+ if(cr.isUnderflow()) {
+ if(sBufferIndex<source.limit()) {
+ /*
+ * continue with the conversion loop while there is still input left
+ * (continue converting by breaking out of only the inner loop)
+ */
+ break;
+ } else if(realSource!=null) {
+ /* switch back from replaying to the real source and continue */
+ source=realSource;
+ flush=realFlush;
+ sourceIndex=source.position();
+ realSource=null;
+ break;
+ } else if(flush && fromUChar32!=0) {
+ /*
+ * the entire input stream is consumed
+ * and there is a partial, truncated input sequence left
+ */
+
+ /* inject an error and continue with callback handling */
+ //err[0]=ErrorCode.U_TRUNCATED_CHAR_FOUND;
+ cr = CoderResult.malformedForLength(1);
+ calledCallback=false; /* new error condition */
+ } else {
+ /* input consumed */
+ if(flush) {
+ /*
+ * return to the conversion loop once more if the flush
+ * flag is set and the conversion function has not
+ * successfully processed the end of the input yet
+ *
+ * (continue converting by breaking out of only the inner loop)
+ */
+ if(!converterSawEndOfInput) {
+ break;
+ }
+
+ /* reset the converter without calling the callback function */
+ implReset();
+ }
+
+ /* done successfully */
+ return cr;
+ }
+ }
+
+ /*U_FAILURE(*err) */
+ {
+
+ if( calledCallback || cr.isOverflow() ||
+ (cr.isMalformed() && cr.isUnmappable())
+ ){
+ /*
+ * the callback did not or cannot resolve the error:
+ * set output pointers and return
+ *
+ * the check for buffer overflow is redundant but it is
+ * a high-runner case and hopefully documents the intent
+ * well
+ *
+ * if we were replaying, then the replay buffer must be
+ * copied back into the UConverter
+ * and the real arguments must be restored
+ */
+ if(realSource!=null) {
+ int length;
+
+ //agljport:todo U_ASSERT(cnv.preFromULength==0);
+
+ length=source.remaining();
+ if(length>0) {
+ //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR);
+ source.get(preFromUArray, 0, length );
+ preFromULength=(byte)-length;
+ }
+ source=realSource;
+ flush=realFlush;
+ }
+ return cr;
+ }
+ }
+
+ /* callback handling */
+ {
+ /* get and write the code point */
+ errorInputLength = UTF16.append(invalidUCharBuffer, 0, fromUChar32);
+ invalidUCharLength = errorInputLength;
+
+ /* set the converter state to deal with the next character */
+ fromUChar32=0;
+
+ /* call the callback function */
+ cr = fromCharErrorBehaviour.call(this, fromUContext, source, target, offsets, invalidUCharBuffer, invalidUCharLength, fromUChar32, cr);
+ }
+
+ /*
+ * loop back to the offset handling
+ *
+ * this flag will indicate after offset handling
+ * that a callback was called;
+ * if the callback did not resolve the error, then we return
+ */
+ calledCallback=true;
+ }
+ }
+ }
+ /**
+ * Ascertains if a given Unicode code point (32bit value for handling surrogates)
+ * can be converted to the target encoding. If the caller wants to test if a
+ * surrogate pair can be converted to target encoding then the
+ * responsibility of assembling the int value lies with the caller.
+ * For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
+ * <pre>
+ * while(i<mySource.length){
+ * if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
+ * if(UTF16.isTrailSurrogate(mySource[i+1])){
+ * int temp = UTF16.charAt(mySource,i,i+1,0);
+ * if(!((CharsetEncoderICU) myConv).canEncode(temp)){
+ * passed=false;
+ * }
+ * i++;
+ * i++;
+ * }
+ * }
+ * }
+ * </pre>
+ * or
+ * <pre>
+ * String src = new String(mySource);
+ * int i,codepoint;
+ * boolean passed = false;
+ * while(i<src.length()){
+ * codepoint = UTF16.charAt(src,i);
+ * i+= (codepoint>0xfff)? 2:1;
+ * if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
+ * passed = false;
+ * }
+ * }
+ * </pre>
+ *
+ * @param codepoint Unicode code point as int value
+ * @return true if a character can be converted
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean canEncode(int codepoint) {
+ return true;
+ }
+ /**
+ * Overrides super class method
+ * @stable ICU 3.6
+ */
+ public boolean isLegalReplacement(byte[] repl){
+ return true;
+ }
+
+ /**
+ * Writes out the specified output bytes to the target byte buffer or to converter internal buffers.
+ * @param cnv
+ * @param bytesArray
+ * @param bytesBegin
+ * @param bytesLength
+ * @param out
+ * @param offsets
+ * @param sourceIndex
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv,
+ byte[] bytesArray, int bytesBegin, int bytesLength,
+ ByteBuffer out, IntBuffer offsets, int sourceIndex){
+
+ //write bytes
+ int obl = bytesLength;
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int bytesLimit = bytesBegin + bytesLength;
+ try{
+ for (;bytesBegin< bytesLimit;){
+ out.put(bytesArray[bytesBegin]);
+ bytesBegin++;
+ }
+ // success
+ bytesLength=0;
+ }catch( BufferOverflowException ex){
+ cr = CoderResult.OVERFLOW;
+ }
+
+
+ if(offsets!=null) {
+ while(obl>bytesLength) {
+ offsets.put(sourceIndex);
+ --obl;
+ }
+ }
+ //write overflow
+ cnv.errorBufferLength = bytesLimit - bytesBegin;
+ if(cnv.errorBufferLength >0) {
+ if(cnv!=null) {
+ int index = 0;
+ while(bytesBegin<bytesLimit) {
+ cnv.errorBuffer[index++]=bytesArray[bytesBegin++];
+ }
+ }
+ cr = CoderResult.OVERFLOW;
+ }
+ return cr;
+ }
+
+ /**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @draft ICU 3.4
+ * @provisional This API might change or be removed in a future release.
+ */
+ /*public*/ int fromUCountPending(){
+ if(preFromULength > 0){
+ return UTF16.getCharCount(preFromUFirstCP)+preFromULength ;
+ }else if(preFromULength < 0){
+ return -preFromULength ;
+ }else if(fromUChar32 > 0){
+ return 1;
+ }else if(preFromUFirstCP >0){
+ return UTF16.getCharCount(preFromUFirstCP);
+ }
+ return 0;
+ }
+ /**
+ *
+ * @param source
+ */
+ private final void setSourcePosition(CharBuffer source){
+
+ // ok was there input held in the previous invocation of decodeLoop
+ // that resulted in output in this invocation?
+ source.position(source.position() - fromUCountPending());
+ }
+ /**
+ * Write the codepage substitution character.
+ * Subclasses to override this method.
+ * For stateful converters, it is typically necessary to handle this
+ * specificially for the converter in order to properly maintain the state.
+ * @param source The input character buffer
+ * @param target The output byte buffer
+ * @param offsets
+ * @return A CoderResult object that contains the error result when an error occurs.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
+ CharBuffer source, ByteBuffer target,
+ IntBuffer offsets){
+ CharsetICU cs = (CharsetICU) encoder.charset();
+ byte[] sub = encoder.replacement();
+ if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) {
+ return CharsetEncoderICU.fromUWriteBytes(encoder,
+ new byte[] { cs.subChar1 }, 0, 1, target, offsets, source
+ .position());
+ } else {
+ return CharsetEncoderICU.fromUWriteBytes(encoder, sub, 0,
+ sub.length, target, offsets, source.position());
+ }
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetICU.java b/src/com/ibm/icu/charset/CharsetICU.java
new file mode 100644
index 0000000..2a429ea
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetICU.java
@@ -0,0 +1,233 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStreamReader;
+import java.lang.reflect.Constructor;
+
+import java.lang.reflect.InvocationTargetException;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.HashMap;
+
+import com.ibm.icu.lang.UCharacter;
+/**
+ * <p>A subclass of java.nio.Charset for providing implementation of ICU's charset converters.
+ * This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link Charset#forName } and {@link #forNameICU }. With that
+ * converter, you can get its properties, set options, convert your data.</p>
+ *
+ * <p>Since many software programs recogize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases.
+ *
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+public abstract class CharsetICU extends Charset{
+
+ String icuCanonicalName;
+ String javaCanonicalName;
+ int options;
+
+ float maxCharsPerByte;
+
+ boolean useFallback;
+
+ String name; /* +4: 60 internal name of the converter- invariant chars */
+
+ int codepage; /* +64: 4 codepage # (now IBM-$codepage) */
+
+ byte platform; /* +68: 1 platform of the converter (only IBM now) */
+ byte conversionType; /* +69: 1 conversion type */
+
+ int minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
+ int maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
+
+ byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */
+ byte subCharLen; /* +76: 1 */
+
+ byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
+ byte hasFromUnicodeFallback; /* +78: 1 */
+ short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
+ byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
+ byte reserved[/*19*/]; /* +81: 19 to round out the structure */
+
+ boolean writeBOM = false; /* only used by UTF-16, UTF-32 */
+
+ /**
+ *
+ * @param icuCanonicalName
+ * @param canonicalName
+ * @param aliases
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ protected CharsetICU(String icuCanonicalName, String canonicalName, String[] aliases) {
+ super(canonicalName,aliases);
+ if(canonicalName.length() == 0){
+ throw new IllegalCharsetNameException(canonicalName);
+ }
+ this.javaCanonicalName = canonicalName;
+ this.icuCanonicalName = icuCanonicalName;
+ }
+
+ /**
+ * Ascertains if a charset is a sub set of this charset
+ * Implements the abstract method of super class.
+ * @param cs charset to test
+ * @return true if the given charset is a subset of this charset
+ * @stable ICU 3.6
+ */
+ public boolean contains(Charset cs){
+ if (null == cs) {
+ return false;
+ } else if (this.equals(cs)) {
+ return true;
+ }
+ return false;
+ }
+ private static final HashMap algorithmicCharsets = new HashMap();
+ static{
+ /*algorithmicCharsets.put("BOCU-1", "com.ibm.icu.charset.CharsetBOCU1" );
+ algorithmicCharsets.put("CESU-8", "com.ibm.icu.charset.CharsetCESU8" );
+ algorithmicCharsets.put("HZ", "com.ibm.icu.charset.CharsetHZ" );
+ algorithmicCharsets.put("imapmailboxname", "com.ibm.icu.charset.CharsetIMAP" );
+ algorithmicCharsets.put("ISCII", "com.ibm.icu.charset.CharsetISCII" );
+ algorithmicCharsets.put("iso2022", "com.ibm.icu.charset.CharsetISO2022" );
+ algorithmicCharsets.put("lmbcs1", "com.ibm.icu.charset.CharsetLMBCS1" );
+ algorithmicCharsets.put("lmbcs11", "com.ibm.icu.charset.CharsetLMBCS11" );
+ algorithmicCharsets.put("lmbcs16", "com.ibm.icu.charset.CharsetLMBCS16" );
+ algorithmicCharsets.put("lmbcs17", "com.ibm.icu.charset.CharsetLMBCS17" );
+ algorithmicCharsets.put("lmbcs18", "com.ibm.icu.charset.CharsetLMBCS18" );
+ algorithmicCharsets.put("lmbcs19", "com.ibm.icu.charset.CharsetLMBCS19" );
+ algorithmicCharsets.put("lmbcs2", "com.ibm.icu.charset.CharsetLMBCS2" );
+ algorithmicCharsets.put("lmbcs3", "com.ibm.icu.charset.CharsetLMBCS3" );
+ algorithmicCharsets.put("lmbcs4", "com.ibm.icu.charset.CharsetLMBCS4" );
+ algorithmicCharsets.put("lmbcs5", "com.ibm.icu.charset.CharsetLMBCS5" );
+ algorithmicCharsets.put("lmbcs6", "com.ibm.icu.charset.CharsetLMBCS6" );
+ algorithmicCharsets.put("lmbcs8", "com.ibm.icu.charset.CharsetLMBCS8" )
+ algorithmicCharsets.put("scsu", "com.ibm.icu.charset.CharsetSCSU" ); */
+ algorithmicCharsets.put("US-ASCII", "com.ibm.icu.charset.CharsetASCII" );
+ algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.charset.Charset88591" );
+ algorithmicCharsets.put("UTF-16", "com.ibm.icu.charset.CharsetUTF16" );
+ algorithmicCharsets.put("UTF-16BE", "com.ibm.icu.charset.CharsetUTF16" );
+ algorithmicCharsets.put("UTF-16LE", "com.ibm.icu.charset.CharsetUTF16LE" );
+ algorithmicCharsets.put("UTF16_OppositeEndian", "com.ibm.icu.charset.CharsetUTF16LE" );
+ algorithmicCharsets.put("UTF16_PlatformEndian", "com.ibm.icu.charset.CharsetUTF16" );
+ algorithmicCharsets.put("UTF-32", "com.ibm.icu.charset.CharsetUTF32" );
+ algorithmicCharsets.put("UTF-32BE", "com.ibm.icu.charset.CharsetUTF32" );
+ algorithmicCharsets.put("UTF-32LE", "com.ibm.icu.charset.CharsetUTF32LE" );
+ algorithmicCharsets.put("UTF32_PlatformEndian", "com.ibm.icu.charset.CharsetUTF32LE" );
+ algorithmicCharsets.put("UTF32_OppositeEndian", "com.ibm.icu.charset.CharsetUTF32" );
+ algorithmicCharsets.put("UTF-7", "com.ibm.icu.charset.CharsetUTF7" );
+ algorithmicCharsets.put("UTF-8", "com.ibm.icu.charset.CharsetUTF8" );
+ }
+
+ /*public*/ static final Charset getCharset(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ String className = (String) algorithmicCharsets.get(icuCanonicalName);
+ if(className==null){
+ //all the cnv files are loaded as MBCS
+ className = "com.ibm.icu.charset.CharsetMBCS";
+ }
+ try{
+ CharsetICU conv = null;
+ Class cs = Class.forName(className);
+ Class[] paramTypes = new Class[]{ String.class, String.class, String[].class};
+ final Constructor c = cs.getConstructor(paramTypes);
+ Object[] params = new Object[]{ icuCanonicalName, javaCanonicalName, aliases};
+
+ // Run constructor
+ try {
+ Object obj = c.newInstance(params);
+ if(obj!=null && obj instanceof CharsetICU){
+ conv = (CharsetICU)obj;
+ return conv;
+ }
+ }catch (InvocationTargetException e) {
+ throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className+ ". Exception:" + e.getTargetException());
+ }
+ }catch(ClassNotFoundException ex){
+ }catch(NoSuchMethodException ex){
+ }catch (IllegalAccessException ex){
+ }catch (InstantiationException ex){
+ }
+ throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className);
+ }
+
+ static final boolean isSurrogate(int c){
+ return (((c)&0xfffff800)==0xd800);
+ }
+
+ /**
+ * Always use fallbacks from codepage to Unicode?
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ final boolean isToUUseFallback() {
+ return true;
+ }
+
+ /**
+ * Use fallbacks from Unicode to codepage when useFallback or for private-use code points
+ * @param c A codepoint
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ final boolean isFromUUseFallback(int c) {
+ return (useFallback) || isPrivateUse(c);
+ }
+
+ /**
+ * Returns the default charset name
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ static final String getDefaultCharsetName(){
+ String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding();
+ return defaultEncoding;
+ }
+
+ static final boolean isPrivateUse(int c) {
+ return (UCharacter.getType(c) == UCharacter.PRIVATE_USE);
+ }
+
+ /**
+ * Returns a charset object for the named charset.
+ * This method gurantee that ICU charset is returned when
+ * available. If the ICU charset provider does not support
+ * the specified charset, then try other charset providers
+ * including the standard Java charset provider.
+ *
+ * @param charsetName The name of the requested charset,
+ * may be either a canonical name or an alias
+ * @return A charset object for the named charset
+ * @throws IllegalCharsetNameException If the given charset name
+ * is illegal
+ * @throws UnsupportedCharsetException If no support for the
+ * named charset is available in this instance of th Java
+ * virtual machine
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Charset forNameICU(String charsetName) throws IllegalCharsetNameException, UnsupportedCharsetException {
+ CharsetProviderICU icuProvider = new CharsetProviderICU();
+ CharsetICU cs = (CharsetICU) icuProvider.charsetForName(charsetName);
+ if (cs != null) {
+ cs.writeBOM = true;
+ return cs;
+ }
+ return Charset.forName(charsetName);
+ }
+
+}
+
diff --git a/src/com/ibm/icu/charset/CharsetMBCS.java b/src/com/ibm/icu/charset/CharsetMBCS.java
new file mode 100644
index 0000000..6cc378c
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetMBCS.java
@@ -0,0 +1,3707 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+package com.ibm.icu.charset;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.Buffer;
+import java.nio.BufferOverflowException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.charset.UConverterSharedData.UConverterType;
+import com.ibm.icu.impl.ICUData;
+import com.ibm.icu.impl.ICUResourceBundle;
+import com.ibm.icu.impl.InvalidFormatException;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+
+class CharsetMBCS extends CharsetICU {
+
+ protected byte[] fromUSubstitution = null;
+ protected UConverterSharedData sharedData = null;
+ static final int MAX_VERSION_LENGTH=4;
+ /**
+ * Fallbacks to Unicode are stored outside the normal state table and code point structures
+ * in a vector of items of this type. They are sorted by offset.
+ */
+ final class MBCSToUFallback {
+ int offset;
+ int codePoint;
+ }
+ /**
+ * This is the MBCS part of the UConverterTable union (a runtime data structure).
+ * It keeps all the per-converter data and points into the loaded mapping tables.
+ */
+ static final class UConverterMBCSTable {
+ /* toUnicode */
+ short countStates;
+ byte dbcsOnlyState;
+ boolean stateTableOwned;
+ int countToUFallbacks;
+
+ int stateTable[/*countStates*/][/*256*/];
+ int swapLFNLStateTable[/*countStates*/][/*256*/]; /* for swaplfnl */
+ char unicodeCodeUnits[/*countUnicodeResults*/];
+ MBCSToUFallback toUFallbacks[/*countToUFallbacks*/];
+
+ /* fromUnicode */
+ char fromUnicodeTable[];
+ byte fromUnicodeBytes[];
+ byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */
+ int fromUBytesLength;
+ short outputType, unicodeMask;
+
+ /* converter name for swaplfnl */
+ String swapLFNLName;
+
+ /* extension data */
+ UConverterSharedData baseSharedData;
+ //int extIndexes[];
+ ByteBuffer extIndexes; // create int[] view etc. as needed
+
+ UConverterMBCSTable()
+ {
+ }
+
+/* UConverterMBCSTable(UConverterMBCSTable t)
+ {
+ countStates = t.countStates;
+ dbcsOnlyState = t.dbcsOnlyState;
+ stateTableOwned = t.stateTableOwned;
+ countToUFallbacks = t.countToUFallbacks;
+ stateTable = t.stateTable;
+ swapLFNLStateTable = t.swapLFNLStateTable;
+ unicodeCodeUnits = t.unicodeCodeUnits;
+ toUFallbacks = t.toUFallbacks;
+ fromUnicodeTable = t.fromUnicodeTable;
+ fromUnicodeBytes = t.fromUnicodeBytes;
+ swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes;
+ fromUBytesLength = t.fromUBytesLength;
+ outputType = t.outputType;
+ unicodeMask = t.unicodeMask;
+ swapLFNLName = t.swapLFNLName;
+ baseSharedData = t.baseSharedData;
+ extIndexes = t.extIndexes;
+ }*/
+ }
+
+ /**
+ * MBCS data header. See data format description above.
+ */
+ final class MBCSHeader {
+ byte version[/*U_MAX_VERSION_LENGTH*/];
+ int countStates, countToUFallbacks, offsetToUCodeUnits, offsetFromUTable, offsetFromUBytes;
+ int flags;
+ int fromUBytesLength;
+
+ MBCSHeader()
+ {
+ version = new byte[MAX_VERSION_LENGTH];
+ }
+ }
+ /**
+ * Tags for pacifying the check tags tool
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public CharsetMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) throws InvalidFormatException{
+ super(icuCanonicalName, javaCanonicalName, aliases);
+
+ // now try to load the data
+ LoadArguments args = new LoadArguments(1, icuCanonicalName);
+ sharedData = loadConverter(args);
+
+ maxBytesPerChar = sharedData.staticData.maxBytesPerChar;
+ minBytesPerChar = sharedData.staticData.minBytesPerChar;
+ maxCharsPerByte = 1;
+ fromUSubstitution = sharedData.staticData.subChar;
+ subChar = sharedData.staticData.subChar;
+ subCharLen = sharedData.staticData.subCharLen;
+ subChar1 = sharedData.staticData.subChar1;
+ fromUSubstitution = new byte[sharedData.staticData.subCharLen];
+ System.arraycopy(sharedData.staticData.subChar, 0, fromUSubstitution, 0, sharedData.staticData.subCharLen);
+
+ // Todo: pass options
+ initializeConverter(0);
+ }
+
+ class LoadArguments
+ {
+ int nestedLoads; /* count nested loadConverter() calls */
+ // int reserved; /* reserved - for good alignment of the pointers */
+ // long options;
+ // String pkg;
+ String name;
+
+ LoadArguments(int nestedLoads, String name)
+ {
+ this.nestedLoads = nestedLoads;
+ this.name = name;
+ }
+ }
+
+ protected UConverterSharedData loadConverter(LoadArguments args) throws InvalidFormatException
+ {
+ // Read converter data from file
+ UConverterStaticData staticData = new UConverterStaticData();
+ UConverterDataReader reader = null;
+ try {
+ InputStream i = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/" + args.name + "." + UConverterSharedData.DATA_TYPE);
+ BufferedInputStream b = new BufferedInputStream(i, UConverterConstants.CNV_DATA_BUFFER_SIZE);
+ reader = new UConverterDataReader(b);
+ reader.readStaticData(staticData);
+ }
+ catch(IOException e) {
+ throw new InvalidFormatException();
+ }
+ catch(Exception e) {
+ throw new InvalidFormatException();
+ }
+
+ UConverterSharedData data = null;
+ int type = staticData.conversionType;
+
+ if( type != UConverterSharedData.UConverterType.MBCS ||
+ staticData.structSize != UConverterSharedData.SIZE_OF_UCONVERTER_SHARED_DATA)
+ {
+ throw new InvalidFormatException();
+ }
+
+ data = new UConverterSharedData(UConverterSharedData.SIZE_OF_UCONVERTER_SHARED_DATA, 1, null, false, 0);
+ data.dataReader = reader;
+ data.staticData = staticData;
+ data.sharedDataCached = false;
+
+ // Load data
+ UConverterMBCSTable mbcsTable = data.mbcs;
+ MBCSHeader header = new MBCSHeader();
+ try {
+ reader.readMBCSHeader(header);
+ }
+ catch(IOException e) {
+ throw new InvalidFormatException();
+ }
+
+ int offset;
+ //int[] extIndexesArray = null;
+ String baseNameString = null;
+ int[][] stateTableArray = null;
+ MBCSToUFallback[] toUFallbacksArray = null;
+ char[] unicodeCodeUnitsArray = null;
+ char[] fromUnicodeTableArray = null;
+ byte[] fromUnicodeBytesArray = null;
+
+ if(header.version[0]!=4) {
+ throw new InvalidFormatException();
+ }
+
+ mbcsTable.outputType=(byte)header.flags;
+
+ /* extension data, header version 4.2 and higher */
+ offset=header.flags>>>8;
+ //if(offset!=0 && mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
+ if(mbcsTable.outputType == MBCS_OUTPUT_EXT_ONLY) {
+ try {
+ baseNameString = reader.readBaseTableName();
+ if(offset != 0) {
+ //agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null terminator byte all already read;
+ mbcsTable.extIndexes=reader.readExtIndexes(offset - 32 - baseNameString.length() - 1);
+ }
+ }
+ catch(IOException e) {
+ throw new InvalidFormatException();
+ }
+ }
+ /*
+ if(offset != 0) {
+ try {
+ //agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null terminator byte all already read;
+ int namelen = baseNameString != null? baseNameString.length() + 1: 0;
+ mbcsTable.extIndexes=dataReader.readExtIndexes(offset - 32 - namelen);
+
+ }
+ catch(IOException e) {
+ if(debug) System.err.println("Caught IOException: " + e.getMessage());
+ pErrorCode[0] = UErrorCode.U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ }
+ */
+ //agljport:add this would be unnecessary if extIndexes were memory mapped
+ if(mbcsTable.extIndexes != null) {
+ /*
+ try {
+ //int nbytes = mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_LENGTH]*4 + mbcsTable.extIndexes[UConverterExt.UCNV_EXT_TO_U_UCHARS_LENGTH]*2 + mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_LENGTH]*6 + mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_BYTES_LENGTH] + mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_12_LENGTH]*2 + mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3_LENGTH]*2 + mbcsTable.extIndexes[UConverterExt.UCNV_EXT_FROM_U_STAGE_3B_LENGTH]*4;
+ //int nbytes = mbcsTable.extIndexes[UConverterExt.UCNV_EXT_SIZE]
+ //byte[] extTables = dataReader.readExtTables(nbytes);
+ //mbcsTable.extTables = ByteBuffer.wrap(extTables);
+ }
+ catch(IOException e) {
+ System.err.println("Caught IOException: " + e.getMessage());
+ pErrorCode[0] = UErrorCode.U_INVALID_FORMAT_ERROR;
+ return;
+ }
+ */
+ }
+
+ if(mbcsTable.outputType==MBCS_OUTPUT_EXT_ONLY) {
+ UConverterSharedData baseSharedData = null;
+ ByteBuffer extIndexes;
+ String baseName;
+
+ /* extension-only file, load the base table and set values appropriately */
+ if((extIndexes=mbcsTable.extIndexes)==null) {
+ /* extension-only file without extension */
+ throw new InvalidFormatException();
+ }
+
+ if(args.nestedLoads!=1) {
+ /* an extension table must not be loaded as a base table */
+ throw new InvalidFormatException();
+ }
+
+ /* load the base table */
+ baseName=baseNameString;
+ if(baseName.equals(staticData.name)) {
+ /* forbid loading this same extension-only file */
+ throw new InvalidFormatException();
+ }
+
+ /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
+ //agljport:fix args.size=sizeof(UConverterLoadArgs);
+ LoadArguments args2 = new LoadArguments(2, baseName);
+ baseSharedData=loadConverter(args2);
+
+ if( baseSharedData.staticData.conversionType!=UConverterType.MBCS ||
+ baseSharedData.mbcs.baseSharedData!=null
+ ) {
+ //agljport:fix ucnv_unload(baseSharedData);
+ throw new InvalidFormatException();
+ }
+
+ /* copy the base table data */
+ //agljport:comment deep copy in C changes mbcs through local reference mbcsTable; in java we probably don't need the deep copy so can just make sure mbcs and its local reference both refer to the same new object
+ mbcsTable = data.mbcs = baseSharedData.mbcs;
+
+ /* overwrite values with relevant ones for the extension converter */
+ mbcsTable.baseSharedData=baseSharedData;
+ mbcsTable.extIndexes=extIndexes;
+
+ /*
+ * It would be possible to share the swapLFNL data with a base converter,
+ * but the generated name would have to be different, and the memory
+ * would have to be free'd only once.
+ * It is easier to just create the data for the extension converter
+ * separately when it is requested.
+ */
+ mbcsTable.swapLFNLStateTable=null;
+ mbcsTable.swapLFNLFromUnicodeBytes=null;
+ mbcsTable.swapLFNLName=null;
+
+ /*
+ * Set a special, runtime-only outputType if the extension converter
+ * is a DBCS version of a base converter that also maps single bytes.
+ */
+ if(staticData.conversionType==UConverterType.DBCS ||
+ (staticData.conversionType==UConverterType.MBCS && staticData.minBytesPerChar>=2)){
+
+ if(baseSharedData.mbcs.outputType==MBCS_OUTPUT_2_SISO) {
+ /* the base converter is SI/SO-stateful */
+ int entry;
+
+ /* get the dbcs state from the state table entry for SO=0x0e */
+ entry=mbcsTable.stateTable[0][0xe];
+ if( MBCS_ENTRY_IS_FINAL(entry) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
+ MBCS_ENTRY_FINAL_STATE(entry)!=0
+ ) {
+ mbcsTable.dbcsOnlyState=(byte)MBCS_ENTRY_FINAL_STATE(entry);
+
+ mbcsTable.outputType=MBCS_OUTPUT_DBCS_ONLY;
+ }
+ }
+ else if(baseSharedData.staticData.conversionType==UConverterType.MBCS &&
+ baseSharedData.staticData.minBytesPerChar==1 &&
+ baseSharedData.staticData.maxBytesPerChar==2 &&
+ mbcsTable.countStates<=127){
+
+ /* non-stateful base converter, need to modify the state table */
+ int newStateTable[][/*256*/];
+ int state[]; // this works because java 2-D array is array of references and we can have state = newStateTable[i];
+ int i, count;
+
+ /* allocate a new state table and copy the base state table contents */
+ count=mbcsTable.countStates;
+ newStateTable=new int[(count+1)*1024][256];
+
+ for(i = 0; i < mbcsTable.stateTable.length; ++i)
+ System.arraycopy(mbcsTable.stateTable[i], 0, newStateTable[i], 0, mbcsTable.stateTable[i].length);
+
+ /* change all final single-byte entries to go to a new all-illegal state */
+ state=newStateTable[0];
+ for(i=0; i<256; ++i) {
+ if(MBCS_ENTRY_IS_FINAL(state[i])) {
+ state[i]=MBCS_ENTRY_TRANSITION(count, 0);
+ }
+ }
+
+ /* build the new all-illegal state */
+ state=newStateTable[count];
+ for(i=0; i<256; ++i) {
+ state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
+ }
+ mbcsTable.stateTable=newStateTable;
+ mbcsTable.countStates=(byte)(count+1);
+ mbcsTable.stateTableOwned=true;
+
+ mbcsTable.outputType=MBCS_OUTPUT_DBCS_ONLY;
+ }
+ }
+
+ /*
+ * unlike below for files with base tables, do not get the unicodeMask
+ * from the sharedData; instead, use the base table's unicodeMask,
+ * which we copied in the memcpy above;
+ * this is necessary because the static data unicodeMask, especially
+ * the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
+ */
+ }
+ else {
+ /* conversion file with a base table; an additional extension table is optional */
+ /* make sure that the output type is known */
+ switch(mbcsTable.outputType) {
+ case MBCS_OUTPUT_1:
+ case MBCS_OUTPUT_2:
+ case MBCS_OUTPUT_3:
+ case MBCS_OUTPUT_4:
+ case MBCS_OUTPUT_3_EUC:
+ case MBCS_OUTPUT_4_EUC:
+ case MBCS_OUTPUT_2_SISO:
+ /* OK */
+ break;
+ default:
+ throw new InvalidFormatException();
+ }
+
+ stateTableArray = new int[header.countStates][256];
+ toUFallbacksArray = new MBCSToUFallback[header.countToUFallbacks];
+ for(int i = 0; i < toUFallbacksArray.length; ++i)
+ toUFallbacksArray[i] = new MBCSToUFallback();
+ unicodeCodeUnitsArray = new char[(header.offsetFromUTable - header.offsetToUCodeUnits)/2];
+ fromUnicodeTableArray = new char[(header.offsetFromUBytes - header.offsetFromUTable)/2];
+ fromUnicodeBytesArray = new byte[header.fromUBytesLength];
+ try {
+ reader.readMBCSTable(stateTableArray, toUFallbacksArray, unicodeCodeUnitsArray, fromUnicodeTableArray, fromUnicodeBytesArray);
+ }
+ catch(IOException e) {
+ throw new InvalidFormatException();
+ }
+
+ mbcsTable.countStates=(byte)header.countStates;
+ mbcsTable.countToUFallbacks=header.countToUFallbacks;
+ mbcsTable.stateTable=stateTableArray;
+ mbcsTable.toUFallbacks=toUFallbacksArray;
+ mbcsTable.unicodeCodeUnits=unicodeCodeUnitsArray;
+
+ mbcsTable.fromUnicodeTable=fromUnicodeTableArray;
+ mbcsTable.fromUnicodeBytes=fromUnicodeBytesArray;
+ mbcsTable.fromUBytesLength=header.fromUBytesLength;
+
+ /*
+ * converter versions 6.1 and up contain a unicodeMask that is
+ * used here to select the most efficient function implementations
+ */
+ //agljport:fix info.size=sizeof(UDataInfo);
+ //agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
+ //agljport:fix if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
+ /* mask off possible future extensions to be safe */
+ mbcsTable.unicodeMask=(short)(staticData.unicodeMask&3);
+ //agljport:fix } else {
+ /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
+ //agljport:fix mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;
+ //agljport:fix }
+ if(offset != 0) {
+ try {
+ //agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null terminator byte all already read;
+ //int namelen = baseNameString != null? baseNameString.length() + 1: 0;
+ //mbcsTable.extIndexes=dataReader.readExtIndexes(offset - 32 - namelen);
+ mbcsTable.extIndexes=reader.readExtIndexes(0);
+ }
+ catch(IOException e) {
+ throw new InvalidFormatException();
+ }
+ }
+ }
+ return data;
+ }
+
+ protected void initializeConverter(int options)
+ {
+ UConverterMBCSTable mbcsTable;
+ ByteBuffer extIndexes;
+ short outputType;
+ byte maxBytesPerUChar;
+
+ mbcsTable=sharedData.mbcs;
+ outputType=mbcsTable.outputType;
+
+ if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
+ /* the swaplfnl option does not apply, remove it */
+ this.options=options&=~UConverterConstants.OPTION_SWAP_LFNL;
+ }
+
+ if((options&UConverterConstants.OPTION_SWAP_LFNL)!=0) {
+ /* do this because double-checked locking is broken */
+ boolean isCached;
+
+ //agljport:todo umtx_lock(NULL);
+ isCached=mbcsTable.swapLFNLStateTable!=null;
+ //agljport:todo umtx_unlock(NULL);
+
+ if(!isCached) {
+ //agljport:fix if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
+ //agljport:fix if(U_FAILURE(*pErrorCode)) {
+ //agljport:fix return; /* something went wrong */
+ //agljport:fix }
+
+ /* the option does not apply, remove it */
+ //agljport:fix cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
+ //agljport:fix }
+ }
+ }
+
+ if(icuCanonicalName.toLowerCase().indexOf("gb18030") >= 0) {
+ /* set a flag for GB 18030 mode, which changes the callback behavior */
+ this.options|=MBCS_OPTION_GB18030;
+ }
+
+ /* fix maxBytesPerUChar depending on outputType and options etc. */
+ if(outputType==MBCS_OUTPUT_2_SISO) {
+ maxBytesPerChar=3; /* SO+DBCS */
+ }
+
+ extIndexes=mbcsTable.extIndexes;
+ if(extIndexes!=null) {
+ maxBytesPerUChar=(byte)GET_MAX_BYTES_PER_UCHAR(extIndexes);
+ if(outputType==MBCS_OUTPUT_2_SISO) {
+ ++maxBytesPerUChar; /* SO + multiple DBCS */
+ }
+
+ if(maxBytesPerUChar>maxBytesPerChar) {
+ maxBytesPerChar=maxBytesPerUChar;
+ }
+ }
+ }
+
+ /**
+ * MBCS output types for conversions from Unicode.
+ * These per-converter types determine the storage method in stage 3 of the lookup table,
+ * mostly how many bytes are stored per entry.
+ */
+ protected static final int MBCS_OUTPUT_1 = 0; /* 0 */
+ protected static final int MBCS_OUTPUT_2 = MBCS_OUTPUT_1 + 1; /* 1 */
+ protected static final int MBCS_OUTPUT_3 = MBCS_OUTPUT_2 + 1; /* 2 */
+ protected static final int MBCS_OUTPUT_4 = MBCS_OUTPUT_3 + 1; /* 3 */
+ protected static final int MBCS_OUTPUT_3_EUC=8; /* 8 */
+ protected static final int MBCS_OUTPUT_4_EUC = MBCS_OUTPUT_3_EUC + 1; /* 9 */
+ protected static final int MBCS_OUTPUT_2_SISO=12; /* c */
+ protected static final int MBCS_OUTPUT_2_HZ = MBCS_OUTPUT_2_SISO + 1; /* d */
+ protected static final int MBCS_OUTPUT_EXT_ONLY = MBCS_OUTPUT_2_HZ + 1; /* e */
+ protected static final int MBCS_OUTPUT_COUNT = MBCS_OUTPUT_EXT_ONLY + 1;
+ protected static final int MBCS_OUTPUT_DBCS_ONLY=0xdb; /* runtime-only type for DBCS-only handling of SISO tables */
+
+ /* GB 18030 data ------------------------------------------------------------ */
+
+ /* helper macros for linear values for GB 18030 four-byte sequences */
+ protected static long LINEAR_18030(long a, long b, long c, long d) {return ((((a)*10+(b))*126L+(c))*10L+(d));}
+
+ protected static long LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);
+
+ protected static long LINEAR(long x) {return LINEAR_18030(x>>>24, (x>>>16)&0xff, (x>>>8)&0xff, x&0xff);}
+
+ /*
+ * Some ranges of GB 18030 where both the Unicode code points and the
+ * GB four-byte sequences are contiguous and are handled algorithmically by
+ * the special callback functions below.
+ * The values are start & end of Unicode & GB codes.
+ *
+ * Note that single surrogates are not mapped by GB 18030
+ * as of the re-released mapping tables from 2000-nov-30.
+ */
+ protected static final long gb18030Ranges[][] = new long[/*13*/][/*4*/]{
+ {0x10000L, 0x10FFFFL, LINEAR(0x90308130L), LINEAR(0xE3329A35L)},
+ {0x9FA6L, 0xD7FFL, LINEAR(0x82358F33L), LINEAR(0x8336C738L)},
+ {0x0452L, 0x200FL, LINEAR(0x8130D330L), LINEAR(0x8136A531L)},
+ {0xE865L, 0xF92BL, LINEAR(0x8336D030L), LINEAR(0x84308534L)},
+ {0x2643L, 0x2E80L, LINEAR(0x8137A839L), LINEAR(0x8138FD38L)},
+ {0xFA2AL, 0xFE2FL, LINEAR(0x84309C38L), LINEAR(0x84318537L)},
+ {0x3CE1L, 0x4055L, LINEAR(0x8231D438L), LINEAR(0x8232AF32L)},
+ {0x361BL, 0x3917L, LINEAR(0x8230A633L), LINEAR(0x8230F237L)},
+ {0x49B8L, 0x4C76L, LINEAR(0x8234A131L), LINEAR(0x8234E733L)},
+ {0x4160L, 0x4336L, LINEAR(0x8232C937L), LINEAR(0x8232F837L)},
+ {0x478EL, 0x4946L, LINEAR(0x8233E838L), LINEAR(0x82349638L)},
+ {0x44D7L, 0x464BL, LINEAR(0x8233A339L), LINEAR(0x8233C931L)},
+ {0xFFE6L, 0xFFFFL, LINEAR(0x8431A234L), LINEAR(0x8431A439L)}
+ };
+
+ /* bit flag for UConverter.options indicating GB 18030 special handling */
+ protected static final int MBCS_OPTION_GB18030 = 0x8000;
+
+ /**
+ * MBCS action codes for conversions to Unicode.
+ * These values are in bits 23..20 of the state table entries.
+ */
+ protected static final int MBCS_STATE_VALID_DIRECT_16 = 0;
+ protected static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;
+ protected static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;
+ protected static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;
+ protected static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;
+ protected static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;
+ protected static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;
+ protected static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;
+ protected static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;
+
+ /* Methods for state table entries */
+ protected static int MBCS_ENTRY_TRANSITION(int state, int offset) {return (state<<24L)|offset; }
+ protected static int MBCS_ENTRY_FINAL(int state, int action, int value) {return (int)(0x80000000|((int)(state)<<24L)|((action)<<20L)|(value));}
+ protected static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {return (entry)>=0; }
+ protected static boolean MBCS_ENTRY_IS_FINAL(int entry) {return (entry)<0;}
+ protected static int MBCS_ENTRY_TRANSITION_STATE(int entry) {return ((entry)>>>24);}
+ protected static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {return ((entry)&0xffffff);}
+ protected static int MBCS_ENTRY_FINAL_STATE(int entry) {return ((entry)>>>24)&0x7f;}
+ protected static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {return ((entry)<0x80100000);}
+ protected static int MBCS_ENTRY_FINAL_ACTION(int entry) {return ((entry)>>>20)&0xf;}
+ protected static int MBCS_ENTRY_FINAL_VALUE(int entry) {return ((entry)&0xfffff); }
+ protected static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {return (char)(entry);}
+
+ /**
+ * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte.
+ * It works for single-byte, single-state codepages that only map
+ * to and from BMP code points, and it always
+ * returns fallback values.
+ */
+ protected static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b)
+ {
+ return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b & UConverterConstants.UNSIGNED_BYTE_MASK]);
+ }
+
+ /* single-byte fromUnicode: get the 16-bit result word */
+ protected static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c)
+ {
+ int i1 = table[c>>>10] +((c>>>4)&0x3f);
+ int i = 2* (table[i1] +(c&0xf)); // used as index into byte[] array treated as char[] array
+ return (char)(((results[i] & UConverterConstants.UNSIGNED_BYTE_MASK) <<8) | (results[i+1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+ }
+
+ /* multi-byte fromUnicode: get the 32-bit stage 2 entry */
+ protected static int MBCS_STAGE_2_FROM_U(char[] table, int c)
+ {
+ int i = 2 * (table[(c)>>>10] +((c>>>4)&0x3f)); // 2x because used as index into char[] array treated as int[] array
+ return ((table[i] & UConverterConstants.UNSIGNED_SHORT_MASK) <<16) | (table[i+1] & UConverterConstants.UNSIGNED_SHORT_MASK);
+ }
+
+ protected static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) {return ( ((stage2Entry) & (1<< (16+((c)&0xf)) )) !=0);}
+
+ protected static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c)
+ {
+ int i = 2 * (16*((char)stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK)+(c&0xf));
+ return (char)(((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) <<8) | (bytes[i+1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+ }
+
+ protected static int MBCS_VALUE_4_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c)
+ {
+ int i = 4 * (16*((char)stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK)+(c&0xf));
+ return ((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) <<24) |
+ ((bytes[i+1] & UConverterConstants.UNSIGNED_BYTE_MASK) <<16) |
+ ((bytes[i+2] & UConverterConstants.UNSIGNED_BYTE_MASK) <<8) |
+ (bytes[i+3] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+
+ protected static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c)
+ {
+ return ((16*((char)(stage2Entry) & UConverterConstants.UNSIGNED_SHORT_MASK)+((c)&0xf))*3);
+ }
+
+ //------------UConverterExt-------------------------------------------------------
+
+ protected static final int EXT_INDEXES_LENGTH = 0; /* 0 */
+
+ protected static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */
+ protected static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;
+ protected static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;
+ protected static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;
+
+ protected static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */
+ protected static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;
+ protected static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;
+ protected static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;
+ protected static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;
+
+ protected static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */
+ protected static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;
+ protected static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;
+ protected static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;
+ protected static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;
+ protected static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;
+ protected static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;
+
+ protected static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */
+ protected static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1;
+ protected static final int EXT_FLAGS = EXT_COUNT_UCHARS + 1;
+
+ protected static final int EXT_RESERVED_INDEX = EXT_FLAGS + 1; /* 20, moves with additional indexes */
+
+ protected static final int EXT_SIZE=31;
+ protected static final int EXT_INDEXES_MIN_LENGTH=32;
+
+ /* toUnicode helpers -------------------------------------------------------- */
+
+ protected static final int TO_U_BYTE_SHIFT = 24;
+ protected static final int TO_U_VALUE_MASK = 0xffffff;
+ protected static final int TO_U_MIN_CODE_POINT = 0x1f0000;
+ protected static final int TO_U_MAX_CODE_POINT = 0x2fffff;
+ protected static final int TO_U_ROUNDTRIP_FLAG = (1<<23);
+ protected static final int TO_U_INDEX_MASK = 0x3ffff;
+ protected static final int TO_U_LENGTH_SHIFT = 18;
+ protected static final int TO_U_LENGTH_OFFSET = 12;
+
+ /* maximum number of indexed UChars */
+ protected static final int MAX_UCHARS = 19;
+
+ protected static int TO_U_GET_BYTE(int word)
+ {
+ return word>>>TO_U_BYTE_SHIFT;
+ }
+
+ protected static int TO_U_GET_VALUE(int word)
+ {
+ return word&TO_U_VALUE_MASK;
+ }
+
+ protected static boolean TO_U_IS_ROUNDTRIP(int value)
+ {
+ return (value&TO_U_ROUNDTRIP_FLAG)!=0;
+ }
+
+ protected static boolean TO_U_IS_PARTIAL(int value)
+ {
+ return (value&UConverterConstants.UNSIGNED_INT_MASK)<TO_U_MIN_CODE_POINT;
+ }
+
+ protected static int TO_U_GET_PARTIAL_INDEX(int value)
+ {
+ return value;
+ }
+
+ protected static int TO_U_MASK_ROUNDTRIP(int value)
+ {
+ return value&~TO_U_ROUNDTRIP_FLAG;
+ }
+
+ protected static int TO_U_MAKE_WORD(byte b, int value)
+ {
+ return ((b&UConverterConstants.UNSIGNED_BYTE_MASK)<<TO_U_BYTE_SHIFT)|value;
+ }
+
+ /* use after masking off the roundtrip flag */
+ protected static boolean TO_U_IS_CODE_POINT(int value)
+ {
+ return (value&UConverterConstants.UNSIGNED_INT_MASK)<=TO_U_MAX_CODE_POINT;
+ }
+
+ protected static int TO_U_GET_CODE_POINT(int value)
+ {
+ return (int)((value&UConverterConstants.UNSIGNED_INT_MASK)-TO_U_MIN_CODE_POINT);
+ }
+
+ protected static int TO_U_GET_INDEX(int value)
+ {
+ return value&TO_U_INDEX_MASK;
+ }
+
+ protected static int TO_U_GET_LENGTH(int value)
+ {
+ return (value>>>TO_U_LENGTH_SHIFT)-TO_U_LENGTH_OFFSET;
+ }
+
+ /* fromUnicode helpers ------------------------------------------------------ */
+
+ /* most trie constants are shared with ucnvmbcs.h */
+ protected static final int STAGE_2_LEFT_SHIFT = 2;
+ protected static final int STAGE_3_GRANULARITY = 4;
+
+ /* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */
+ protected static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c)
+ {
+ return stage3.get(stage3.position() + ((int)stage12.get( stage12.position() + (stage12.get(stage12.position()+s1Index) +((c>>>4)&0x3f)) )<<STAGE_2_LEFT_SHIFT) +(c&0xf) );
+ }
+
+ protected static final int FROM_U_LENGTH_SHIFT = 24;
+ protected static final int FROM_U_ROUNDTRIP_FLAG = 1<<31;
+ protected static final int FROM_U_RESERVED_MASK = 0x60000000;
+ protected static final int FROM_U_DATA_MASK = 0xffffff;
+
+ /* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */
+ protected static final int FROM_U_SUBCHAR1 = 0x80000001;
+
+ /* at most 3 bytes in the lower part of the value */
+ protected static final int FROM_U_MAX_DIRECT_LENGTH = 3;
+
+ /* maximum number of indexed bytes */
+ protected static final int MAX_BYTES = 0x1f;
+
+ protected static boolean FROM_U_IS_PARTIAL(int value) {return (value>>>FROM_U_LENGTH_SHIFT)==0;}
+ protected static int FROM_U_GET_PARTIAL_INDEX(int value) {return value;}
+
+ protected static boolean FROM_U_IS_ROUNDTRIP(int value) {return (value&FROM_U_ROUNDTRIP_FLAG)!=0;}
+ protected static int FROM_U_MASK_ROUNDTRIP(int value) {return value&~FROM_U_ROUNDTRIP_FLAG;}
+
+ /* use after masking off the roundtrip flag */
+ protected static int FROM_U_GET_LENGTH(int value) {return (value>>>FROM_U_LENGTH_SHIFT)&MAX_BYTES;}
+
+ /* get bytes or bytes index */
+ protected static int FROM_U_GET_DATA(int value) {return value&FROM_U_DATA_MASK;}
+
+ /* get the pointer to an extension array from indexes[index] */
+ protected static Buffer ARRAY(ByteBuffer indexes, int index, Class itemType)
+ {
+ int oldpos = indexes.position();
+ Buffer b;
+
+ indexes.position(indexes.getInt(index*4));
+ if(itemType == int.class)
+ b = indexes.asIntBuffer();
+ else if(itemType == short.class)
+ b = indexes.asShortBuffer();
+ else if(itemType == byte.class)
+ b = indexes.slice();
+ else if(itemType == char.class)
+ b = indexes.asCharBuffer();
+ else
+ b = indexes.slice();
+ indexes.position(oldpos);
+ return b;
+ }
+
+ protected static int GET_MAX_BYTES_PER_UCHAR(ByteBuffer indexes)
+ {
+ indexes.position(0);
+ return indexes.getInt(EXT_COUNT_BYTES)&0xff;
+ }
+
+ /*
+ * @return index of the UChar, if found; else <0
+ */
+ protected static int findFromU(CharBuffer fromUSection, int length, char u)
+ {
+ int i, start, limit;
+
+ /* binary search */
+ start=0;
+ limit=length;
+ for(;;) {
+ i=limit-start;
+ if(i<=1) {
+ break; /* done */
+ }
+ /* start<limit-1 */
+
+ if(i<=4) {
+ /* linear search for the last part */
+ if(u<=fromUSection.get(fromUSection.position() + start)) {
+ break;
+ }
+ if(++start<limit && u<=fromUSection.get(fromUSection.position() +start)) {
+ break;
+ }
+ if(++start<limit && u<=fromUSection.get(fromUSection.position() + start)) {
+ break;
+ }
+ /* always break at start==limit-1 */
+ ++start;
+ break;
+ }
+
+ i=(start+limit)/2;
+ if(u<fromUSection.get(fromUSection.position() +i)) {
+ limit=i;
+ } else {
+ start=i;
+ }
+ }
+
+ /* did we really find it? */
+ if(start<limit && u==fromUSection.get(fromUSection.position() +start)) {
+ return start;
+ } else {
+ return -1; /* not found */
+ }
+ }
+
+ /*
+ * @return lookup value for the byte, if found; else 0
+ */
+ protected static int findToU(IntBuffer toUSection, int length, short byt)
+ {
+ long word0, word;
+ int i, start, limit;
+
+ /* check the input byte against the lowest and highest section bytes */
+ //agljport:comment instead of receiving a start position parameter for toUSection we'll rely on its position property
+ start = TO_U_GET_BYTE(toUSection.get(toUSection.position()));
+ limit = TO_U_GET_BYTE(toUSection.get(toUSection.position() + length-1));
+ if(byt<start || limit<byt) {
+ return 0; /* the byte is out of range */
+ }
+
+ if(length==((limit-start)+1)) {
+ /* direct access on a linear array */
+ return TO_U_GET_VALUE(toUSection.get(toUSection.position()+byt-start)); /* could be 0 */
+ }
+
+ /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */
+ word0 = TO_U_MAKE_WORD((byte)byt, 0) & UConverterConstants.UNSIGNED_INT_MASK;
+
+ /*
+ * Shift byte once instead of each section word and add 0xffffff.
+ * We will compare the shifted/added byte (bbffffff) against
+ * section words which have byte values in the same bit position.
+ * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv
+ * for all v=0..f
+ * so we need not mask off the lower 24 bits of each section word.
+ */
+ word = word0|TO_U_VALUE_MASK;
+
+ /* binary search */
+ start = 0;
+ limit = length;
+ for(;;) {
+ i=limit-start;
+ if(i<=1) {
+ break; /* done */
+ }
+ /* start<limit-1 */
+
+ if(i<=4) {
+ /* linear search for the last part */
+ if(word0<=(toUSection.get(toUSection.position()+start) & UConverterConstants.UNSIGNED_INT_MASK)) {
+ break;
+ }
+ if(++start<limit && word0<=(toUSection.get(toUSection.position()+start)&UConverterConstants.UNSIGNED_INT_MASK)) {
+ break;
+ }
+ if(++start<limit && word0<=(toUSection.get(toUSection.position()+start)&UConverterConstants.UNSIGNED_INT_MASK)) {
+ break;
+ }
+ /* always break at start==limit-1 */
+ ++start;
+ break;
+ }
+
+ i=(start+limit)/2;
+ if(word<(toUSection.get(toUSection.position()+i)&UConverterConstants.UNSIGNED_INT_MASK)) {
+ limit=i;
+ } else {
+ start=i;
+ }
+ }
+
+ /* did we really find it? */
+ if(start<limit && byt==TO_U_GET_BYTE((int)(word=(toUSection.get(toUSection.position()+start)&UConverterConstants.UNSIGNED_INT_MASK)))) {
+ return TO_U_GET_VALUE((int)word); /* never 0 */
+ } else {
+ return 0; /* not found */
+ }
+ }
+
+ /*
+ * TRUE if not an SI/SO stateful converter,
+ * or if the match length fits with the current converter state
+ */
+ protected static boolean TO_U_VERIFY_SISO_MATCH(byte sisoState, int match)
+ {
+ return sisoState<0 || (sisoState==0) == (match==1);
+ }
+
+ /*
+ * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS),
+ * or 1 for DBCS-only,
+ * or -1 if the converter is not SI/SO stateful
+ *
+ * Note: For SI/SO stateful converters getting here,
+ * cnv->mode==0 is equivalent to firstLength==1.
+ */
+ protected static int SISO_STATE(UConverterSharedData sharedData, int mode)
+ {
+ return sharedData.mbcs.outputType==MBCS_OUTPUT_2_SISO ? (byte)mode :
+ sharedData.mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1;
+ }
+
+ class CharsetDecoderMBCS extends CharsetDecoderICU{
+
+ CharsetDecoderMBCS(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult[] cr = {CoderResult.UNDERFLOW};
+
+ int sourceArrayIndex;
+ int stateTable[][/*256*/];
+ char[] unicodeCodeUnits;
+
+ int offset;
+ byte state;
+ int byteIndex;
+ byte[] bytes;
+
+ int sourceIndex, nextSourceIndex;
+
+ int entry = 0;
+ char c;
+ byte action;
+
+ if(preToULength>0) {
+ /*
+ * pass sourceIndex=-1 because we continue from an earlier buffer
+ * in the future, this may change with continuous offsets
+ */
+ cr[0] = continueMatchToU(source, target, offsets, -1, flush);
+
+ if(cr[0].isError() || preToULength<0) {
+ return cr[0];
+ }
+ }
+
+ if(sharedData.mbcs.countStates==1) {
+ if((sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
+ }
+ else {
+ cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
+ }
+ return cr[0];
+ }
+
+ /* set up the local pointers */
+ sourceArrayIndex = source.position();
+
+ if((options&UConverterConstants.OPTION_SWAP_LFNL)!=0) {
+ stateTable = sharedData.mbcs.swapLFNLStateTable;
+ }
+ else {
+ stateTable = sharedData.mbcs.stateTable;
+ }
+ unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
+
+ /* get the converter state from UConverter */
+ offset = (int)toUnicodeStatus;
+ byteIndex = toULength;
+ bytes = toUBytesArray;
+
+ /*
+ * if we are in the SBCS state for a DBCS-only converter,
+ * then load the DBCS state from the MBCS data
+ * (dbcsOnlyState==0 if it is not a DBCS-only converter)
+ */
+ if((state=(byte)(mode))==0) {
+ state = sharedData.mbcs.dbcsOnlyState;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = byteIndex==0 ? 0 : -1;
+ nextSourceIndex = 0;
+
+ /* conversion loop */
+ while(sourceArrayIndex<source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one code unit that
+ * overflows as a result of a surrogate pair or callback output
+ * from the last source byte.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(!target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+
+ if(byteIndex==0) {
+ /* optimized loop for 1/2-byte input and BMP output */
+ if(offsets==null) {
+ do {
+ entry = stateTable[state][source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK];
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
+
+ ++sourceArrayIndex;
+ if(sourceArrayIndex<source.limit() &&
+ MBCS_ENTRY_IS_FINAL(entry=stateTable[state][source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK]) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
+ (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe) {
+ ++sourceArrayIndex;
+ target.put(c);
+ state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ offset = 0;
+ }
+ else {
+ /* set the state and leave the optimized loop */
+ bytes[0] = source.get(sourceArrayIndex-1);
+ byteIndex = 1;
+ break;
+ }
+ }
+ else {
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ ++sourceArrayIndex;
+ target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
+ state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ }
+ else {
+ /* leave the optimized loop */
+ break;
+ }
+ }
+ } while(sourceArrayIndex<source.limit() && target.hasRemaining());
+ }
+ else /* offsets!=NULL */ {
+ //agljport:todo see ucnvmbcs.c for deleted block
+ do {
+ entry = stateTable[state][source.get(sourceArrayIndex)];
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
+
+ ++sourceArrayIndex;
+ if(sourceArrayIndex<source.limit() &&
+ MBCS_ENTRY_IS_FINAL(entry=stateTable[state][source.get(sourceArrayIndex)]) &&
+ MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
+ (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe) {
+
+ ++sourceArrayIndex;
+ target.put(c);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ sourceIndex = (nextSourceIndex+=2);
+ }
+ state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ offset = 0;
+ }
+ else {
+ /* set the state and leave the optimized loop */
+ ++nextSourceIndex;
+ bytes[0] = source.get(sourceArrayIndex-1);
+ byteIndex = 1;
+ break;
+ }
+ }
+ else {
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ ++sourceArrayIndex;
+ target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ sourceIndex = ++nextSourceIndex;
+ }
+ state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+ }
+ else {
+ /* leave the optimized loop */
+ break;
+ }
+ }
+ } while(sourceArrayIndex<source.limit() && target.hasRemaining());
+ }
+
+ /*
+ * these tests and break statements could be put inside the loop
+ * if C had "break outerLoop" like Java
+ */
+ if(sourceArrayIndex>=source.limit()) {
+ break;
+ }
+ if(!target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+
+ ++nextSourceIndex;
+ bytes[byteIndex++] = source.get(sourceArrayIndex++);
+ }
+ else /* byteIndex>0 */ {
+ ++nextSourceIndex;
+ entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++)) & UConverterConstants.UNSIGNED_BYTE_MASK];
+ }
+
+ if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+ state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
+ offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
+ continue;
+ }
+
+ /* save the previous state for proper extension mapping with SI/SO-stateful converters */
+ mode = state;
+
+ /* set the next state early so that we can reuse the entry variable */
+ state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action = (byte)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_VALID_16) {
+ offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c = unicodeCodeUnits[offset];
+ if(c<0xfffe) {
+ /* output BMP code point */
+ target.put(c);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ }
+ else if(c==0xfffe) {
+ if(isToUUseFallback() && (entry=(int)getFallback(sharedData.mbcs, offset))!=0xfffe) {
+ /* output fallback BMP code point */
+ target.put((char)entry);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ }
+ }
+ else {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(byteIndex);
+ }
+ }
+ else if(action==MBCS_STATE_VALID_DIRECT_16) {
+ /* output BMP code point */
+ target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ }
+ else if(action==MBCS_STATE_VALID_16_PAIR) {
+ offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
+ c = unicodeCodeUnits[offset++];
+ if(c<0xd800) {
+ /* output BMP code point below 0xd800 */
+ target.put(c);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ }
+ else if(isToUUseFallback() ? c<=0xdfff : c<=0xdbff) {
+ /* output roundtrip or fallback surrogate pair */
+ target.put((char)(c&0xdbff));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ if(target.hasRemaining()) {
+ target.put(unicodeCodeUnits[offset]);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ }
+ else {
+ /* target overflow */
+ charErrorBufferArray[0] = unicodeCodeUnits[offset];
+ charErrorBufferLength = 1;
+ cr[0] = CoderResult.OVERFLOW;
+
+ offset = 0;
+ break;
+ }
+ }
+ else if(isToUUseFallback() ? (c&0xfffe)==0xe000 : c==0xe000) {
+ /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
+ target.put(unicodeCodeUnits[offset]);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ }
+ else if(c==0xffff) {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(byteIndex);
+ }
+ }
+ else if(action==MBCS_STATE_VALID_DIRECT_20 ||
+ (action==MBCS_STATE_FALLBACK_DIRECT_20 && isToUUseFallback())) {
+ entry = MBCS_ENTRY_FINAL_VALUE(entry);
+ /* output surrogate pair */
+ target.put((char)(0xd800|(char)(entry>>10)));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ c = (char)(0xdc00|(char)(entry&0x3ff));
+ if(target.hasRemaining()) {
+ target.put(c);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ }
+ else {
+ /* target overflow */
+ charErrorBufferArray[0]=c;
+ charErrorBufferLength=1;
+ cr[0] = CoderResult.OVERFLOW;
+
+ offset = 0;
+ break;
+ }
+ }
+ else if(action==MBCS_STATE_CHANGE_ONLY) {
+ /*
+ * This serves as a state change without any output.
+ * It is useful for reading simple stateful encodings,
+ * for example using just Shift-In/Shift-Out codes.
+ * The 21 unused bits may later be used for more sophisticated
+ * state transitions.
+ */
+ if(sharedData.mbcs.dbcsOnlyState==0) {
+ byteIndex = 0;
+ }
+ else {
+ /* SI/SO are illegal for DBCS-only conversion */
+ state = (byte)(mode); /* restore the previous state */
+
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(byteIndex);
+ }
+ }
+ else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(isToUUseFallback()) {
+ /* output BMP code point */
+ target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ byteIndex = 0;
+ }
+ }
+ else if(action==MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ }
+ else if(action==MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(byteIndex);
+ }
+ else {
+ /* reserved, must never occur */
+ byteIndex = 0;
+ }
+
+ /* end of action codes: prepare for a new character */
+ offset=0;
+
+ if(byteIndex==0) {
+ sourceIndex = nextSourceIndex;
+ }
+ else if(cr[0].isError()) {
+ /* callback(illegal) */
+ break;
+ }
+ else /* unassigned sequences indicated with byteIndex>0 */ {
+ /* try an extension mapping */
+ int sourceBeginIndex = sourceArrayIndex;
+ source.position(sourceArrayIndex);
+ byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);
+ sourceArrayIndex = source.position();
+ sourceIndex = nextSourceIndex+(int)(sourceArrayIndex-sourceBeginIndex);
+
+ if(cr[0].isError()|| cr[0].isOverflow()) {
+ /* not mappable or buffer overflow */
+ break;
+ }
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ toUnicodeStatus = offset;
+ mode = state;
+ toULength = byteIndex;
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ /*
+ * continue partial match with new input
+ * never called for simple, single-character conversion
+ */
+ protected CoderResult continueMatchToU(ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex, boolean flush)
+ {
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ int[] value = new int[1];
+ int match, length;
+
+ match = matchToU((byte)SISO_STATE(sharedData, mode), preToUArray, preToUBegin, preToULength, source, value, flush);
+
+ if(match>0) {
+ if(match>=preToULength) {
+ /* advance src pointer for the consumed input */
+ source.position(source.position()+match-preToULength);
+ preToULength = 0;
+ }
+ else {
+ /* the match did not use all of preToU[] - keep the rest for replay */
+ length = preToULength - match;
+ System.arraycopy(preToUArray, preToUBegin+match, preToUArray, preToUBegin, length);
+ preToULength=(byte)-length;
+ }
+
+ /* write result */
+ cr = writeToU(value[0], target, offsets, srcIndex);
+ }
+ else if(match<0) {
+ /* save state for partial match */
+ int j, sArrayIndex;
+
+ /* just _append_ the newly consumed input to preToU[] */
+ sArrayIndex = source.position();
+ match =- match;
+ for(j=preToULength; j<match; ++j) {
+ preToUArray[j] = source.get(sArrayIndex++);
+ }
+ source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
+ preToULength=(byte)match;
+ }
+ else /* match==0 */ {
+ /*
+ * no match
+ *
+ * We need to split the previous input into two parts:
+ *
+ * 1. The first codepage character is unmappable - that's how we got into
+ * trying the extension data in the first place.
+ * We need to move it from the preToU buffer
+ * to the error buffer, set an error code,
+ * and prepare the rest of the previous input for 2.
+ *
+ * 2. The rest of the previous input must be converted once we
+ * come back from the callback for the first character.
+ * At that time, we have to try again from scratch to convert
+ * these input characters.
+ * The replay will be handled by the ucnv.c conversion code.
+ */
+
+ /* move the first codepage character to the error field */
+ System.arraycopy(preToUArray, preToUBegin, toUBytesArray, toUBytesBegin, preToUFirstLength);
+ toULength = preToUFirstLength;
+
+ /* move the rest up inside the buffer */
+ length = preToULength-preToUFirstLength;
+ if(length>0) {
+ System.arraycopy(preToUArray, preToUBegin+preToUFirstLength, preToUArray, preToUBegin, length);
+ }
+
+ /* mark preToU for replay */
+ preToULength = (byte)-length;
+
+ /* set the error code for unassigned */
+ cr = CoderResult.unmappableForLength(preToUFirstLength);
+ }
+ return cr;
+ }
+
+ /*
+ * this works like natchFromU() except
+ * - the first character is in pre
+ * - no trie is used
+ * - the returned matchLength is not offset by 2
+ */
+ protected int matchToU(byte sisoState, byte[] preArray, int preArrayBegin, int preLength, ByteBuffer source, int[] pMatchValue, boolean flush)
+ {
+ ByteBuffer cx = sharedData.mbcs.extIndexes;
+ IntBuffer toUTable, toUSection;
+
+ int value, matchValue, srcLength;
+ int i, j, index, length, matchLength;
+ short b;
+
+ if(cx==null || cx.asIntBuffer().get(EXT_TO_U_LENGTH)<=0) {
+ return 0; /* no extension data, no match */
+ }
+
+ /* initialize */
+ toUTable = (IntBuffer)ARRAY(cx, EXT_TO_U_INDEX, int.class);
+ index = 0;
+
+ matchValue = 0;
+ i = j = matchLength=0;
+ srcLength = source.remaining();
+
+ if(sisoState==0) {
+ /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
+ if(preLength>1) {
+ return 0; /* no match of a DBCS sequence in SBCS mode */
+ }
+ else if(preLength==1) {
+ srcLength = 0;
+ }
+ else /* preLength==0 */ {
+ if(srcLength>1) {
+ srcLength = 1;
+ }
+ }
+ flush = true;
+ }
+
+ /* we must not remember fallback matches when not using fallbacks */
+
+ /* match input units until there is a full match or the input is consumed */
+ for(;;) {
+ /* go to the next section */
+ int oldpos = toUTable.position();
+ toUSection=((IntBuffer)toUTable.position(index)).slice();
+ toUTable.position(oldpos);
+
+ /* read first pair of the section */
+ value = toUSection.get();
+ length = TO_U_GET_BYTE(value);
+ value =TO_U_GET_VALUE(value);
+ if(value!=0 &&
+ (TO_U_IS_ROUNDTRIP(value) || isToUUseFallback()) &&
+ TO_U_VERIFY_SISO_MATCH(sisoState, i+j)) {
+ /* remember longest match so far */
+ matchValue=value;
+ matchLength=i+j;
+ }
+
+ /* match pre[] then src[] */
+ if(i<preLength) {
+ b=(short)(preArray[preArrayBegin + i++] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+ else if(j<srcLength) {
+ b=(short)(source.get(source.position() + j++) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ }
+ else {
+ /* all input consumed, partial match */
+ if(flush || (length=(i+j))>MAX_BYTES) {
+ /*
+ * end of the entire input stream, stop with the longest match so far
+ * or: partial match must not be longer than UCNV_EXT_MAX_BYTES
+ * because it must fit into state buffers
+ */
+ break;
+ }
+ else {
+ /* continue with more input next time */
+ return -length;
+ }
+ }
+
+ /* search for the current UChar */
+ value = findToU(toUSection, length, b);
+ if(value==0) {
+ /* no match here, stop with the longest match so far */
+ break;
+ } else {
+ if(TO_U_IS_PARTIAL(value)) {
+ /* partial match, continue */
+ index = TO_U_GET_PARTIAL_INDEX(value);
+ } else {
+ if((TO_U_IS_ROUNDTRIP(value) || isToUUseFallback()) &&
+ TO_U_VERIFY_SISO_MATCH(sisoState, i+j)) {
+ /* full match, stop with result */
+ matchValue = value;
+ matchLength = i+j;
+ }
+ else {
+ /* full match on fallback not taken, stop with the longest match so far */
+ }
+ break;
+ }
+ }
+ }
+
+ if(matchLength==0) {
+ /* no match at all */
+ return 0;
+ }
+
+ /* return result */
+ pMatchValue[0] = TO_U_MASK_ROUNDTRIP(matchValue);
+ return matchLength;
+ }
+
+ protected CoderResult writeToU(int value, CharBuffer target, IntBuffer offsets, int srcIndex)
+ {
+ ByteBuffer cx = sharedData.mbcs.extIndexes;
+ /* output the result */
+ if(TO_U_IS_CODE_POINT(value)) {
+ /* output a single code point */
+ return toUWriteCodePoint(TO_U_GET_CODE_POINT(value), target, offsets, srcIndex);
+ } else {
+ /* output a string - with correct data we have resultLength>0 */
+
+ char[] a = new char[TO_U_GET_LENGTH(value)];
+ CharBuffer cb = ((CharBuffer)ARRAY(cx, EXT_TO_U_UCHARS_INDEX, char.class));
+ cb.position(TO_U_GET_INDEX(value));
+ cb.get(a, 0, a.length);
+ return toUWriteUChars(this, a, 0, a.length, target, offsets, srcIndex);
+ }
+ }
+
+ protected CoderResult toUWriteCodePoint(int c, CharBuffer target, IntBuffer offsets, int sourceIndex)
+ {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int tBeginIndex = target.position();
+
+ if(target.hasRemaining()) {
+ if(c<=0xffff) {
+ target.put((char)c);
+ c = UConverterConstants.U_SENTINEL;
+ } else /* c is a supplementary code point */ {
+ target.put(UTF16.getLeadSurrogate(c));
+ c = UTF16.getTrailSurrogate(c);
+ if(target.hasRemaining()) {
+ target.put((char)c);
+ c = UConverterConstants.U_SENTINEL;
+ }
+ }
+
+ /* write offsets */
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ if((tBeginIndex+1)<target.position()) {
+ offsets.put(sourceIndex);
+ }
+ }
+ }
+
+ /* write overflow from c */
+ if(c>=0) {
+ charErrorBufferLength = UTF16.append(charErrorBufferArray, 0, c);
+ cr = CoderResult.OVERFLOW;
+ }
+
+ return cr;
+ }
+
+ /*
+ * Input sequence: cnv->toUBytes[0..length[
+ * @return if(U_FAILURE) return the length (toULength, byteIndex) for the input
+ * else return 0 after output has been written to the target
+ */
+ protected int toU(int length, ByteBuffer source, CharBuffer target, IntBuffer offsets, int sourceIndex, boolean flush, CoderResult[] cr)
+ {
+ //ByteBuffer cx;
+
+ if(sharedData.mbcs.extIndexes!=null &&
+ initialMatchToU(length, source, target, offsets, sourceIndex, flush, cr)) {
+ return 0; /* an extension mapping handled the input */
+ }
+
+ /* GB 18030 */
+ if(length==4 && (options&MBCS_OPTION_GB18030)!=0) {
+ long[] range;
+ long linear;
+ int i;
+
+ linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]);
+ range = gb18030Ranges[0];
+ for(i=0; i<gb18030Ranges.length/gb18030Ranges[0].length; range=gb18030Ranges[++i]) {
+ if(range[2]<=linear && linear<=range[3]) {
+ /* found the sequence, output the Unicode code point for it */
+ cr[0] = CoderResult.UNDERFLOW;
+
+ /* add the linear difference between the input and start sequences to the start code point */
+ linear = range[0]+(linear-range[2]);
+
+ /* output this code point */
+ cr[0] = toUWriteCodePoint((int)linear, target, offsets, sourceIndex);
+
+ return 0;
+ }
+ }
+ }
+
+ /* no mapping */
+ cr[0] = CoderResult.unmappableForLength(length);
+ return length;
+ }
+
+ /*
+ * target<targetLimit; set error code for overflow
+ */
+ protected boolean initialMatchToU(int firstLength, ByteBuffer source, CharBuffer target, IntBuffer offsets, int srcIndex, boolean flush, CoderResult[] cr)
+ {
+ int[] value = new int[1];
+ int match = 0;
+
+ /* try to match */
+ match = matchToU((byte)SISO_STATE(sharedData, mode), toUBytesArray, toUBytesBegin, firstLength, source, value, flush);
+ if(match>0) {
+ /* advance src pointer for the consumed input */
+ source.position(source.position()+match-firstLength);
+
+ /* write result to target */
+ cr[0] = writeToU(value[0], target, offsets, srcIndex);
+ return true;
+ }
+ else if(match<0) {
+ /* save state for partial match */
+ byte[] sArray;
+ int sArrayIndex;
+ int j;
+
+ /* copy the first code point */
+ sArray = toUBytesArray;
+ sArrayIndex = toUBytesBegin;
+ preToUFirstLength = (byte)firstLength;
+ for(j=0; j<firstLength; ++j) {
+ preToUArray[j]=sArray[sArrayIndex++];
+ }
+
+ /* now copy the newly consumed input */
+ sArrayIndex = source.position();
+ match =- match;
+ for(; j<match; ++j) {
+ preToUArray[j] = source.get(sArrayIndex++);
+ }
+ source.position(sArrayIndex);
+ preToULength=(byte)match;
+ return true;
+ }
+ else /* match==0 no match */ {
+ return false;
+ }
+ }
+
+ /*
+ * This version of cnvMBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages
+ * that only map to and from the BMP.
+ * In addition to single-byte optimizations, the offset calculations
+ * become much easier.
+ */
+ protected CoderResult cnvMBCSSingleToBMPWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush)
+ {
+ CoderResult[] cr = {CoderResult.UNDERFLOW};
+
+ int sourceArrayIndex, lastSource;
+ int targetCapacity, length;
+ int[][] stateTable;
+
+ int sourceIndex;
+
+ int entry;
+ byte action;
+
+ /* set up the local pointers */
+ sourceArrayIndex = source.position();
+ targetCapacity = target.remaining();
+
+ if((options&UConverterConstants.OPTION_SWAP_LFNL)!=0) {
+ stateTable = sharedData.mbcs.swapLFNLStateTable;
+ }
+ else {
+ stateTable = sharedData.mbcs.stateTable;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = 0;
+ lastSource = sourceArrayIndex;
+
+ /*
+ * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
+ * for the minimum of the sourceLength and targetCapacity
+ */
+ length = source.remaining();
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+
+ /* conversion loop */
+ while(targetCapacity>0) {
+ entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
+ /* MBCS_ENTRY_IS_FINAL(entry) */
+
+ /* test the most common case first */
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
+ --targetCapacity;
+ continue;
+ }
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action = (byte)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(isToUUseFallback()) {
+ /* output BMP code point */
+ target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
+ --targetCapacity;
+ continue;
+ }
+ }
+ else if(action==MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ }
+ else if(action==MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(sourceArrayIndex-lastSource);
+ } else {
+ /* reserved, must never occur */
+ continue;
+ }
+
+ /* set offsets since the start or the last extension */
+ if(offsets!=null) {
+ int count = sourceArrayIndex-lastSource;
+
+ /* predecrement: do not set the offset for the callback-causing character */
+ while(--count>0) {
+ offsets.put(sourceIndex++);
+ }
+ /* offset and sourceIndex are now set for the current character */
+ }
+
+ if(cr[0].isError()) {
+ /* callback(illegal) */
+ break;
+ }
+ else /* unassigned sequences indicated with byteIndex>0 */ {
+ /* try an extension mapping */
+ lastSource = sourceArrayIndex;
+ toUBytesArray[0]=source.get(sourceArrayIndex-1);
+ source.position(sourceArrayIndex);
+ toULength = toU((byte)1, source, target, offsets, sourceIndex, flush, cr);
+ sourceArrayIndex = source.position();
+ sourceIndex += 1+(int)(sourceArrayIndex-lastSource);
+
+ if(cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ break;
+ }
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity = target.remaining();
+ length = source.remaining();
+ if(length<targetCapacity) {
+ targetCapacity = length;
+ }
+ }
+ }
+
+ if(!cr[0].isError() && sourceArrayIndex<source.capacity() && !target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ }
+
+ /* set offsets since the start or the last callback */
+ if(offsets!=null) {
+ int count = sourceArrayIndex-lastSource;
+ while(count>0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ }
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ /* This version of cnvMBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
+ protected CoderResult cnvMBCSSingleToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush)
+ {
+ CoderResult[] cr = {CoderResult.UNDERFLOW};
+
+ int sourceArrayIndex;
+ int[][] stateTable;
+
+ int sourceIndex;
+
+ int entry;
+ char c;
+ byte action;
+
+ /* set up the local pointers */
+ sourceArrayIndex = source.position();
+
+ if((options&UConverterConstants.OPTION_SWAP_LFNL)!=0) {
+ stateTable = sharedData.mbcs.swapLFNLStateTable;
+ }
+ else {
+ stateTable = sharedData.mbcs.stateTable;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = 0;
+
+ /* conversion loop */
+ while(sourceArrayIndex<source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one code unit that
+ * overflows as a result of a surrogate pair or callback output
+ * from the last source byte.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(!target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+
+ entry = stateTable[0][source.get(sourceArrayIndex++) & UConverterConstants.UNSIGNED_BYTE_MASK];
+ /* MBCS_ENTRY_IS_FINAL(entry) */
+
+ /* test the most common case first */
+ if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
+ /* output BMP code point */
+ target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+
+ /* normal end of action codes: prepare for a new character */
+ ++sourceIndex;
+ continue;
+ }
+
+ /*
+ * An if-else-if chain provides more reliable performance for
+ * the most common cases compared to a switch.
+ */
+ action = (byte)(MBCS_ENTRY_FINAL_ACTION(entry));
+ if(action==MBCS_STATE_VALID_DIRECT_20 ||
+ (action==MBCS_STATE_FALLBACK_DIRECT_20 && isToUUseFallback())) {
+
+ entry = MBCS_ENTRY_FINAL_VALUE(entry);
+ /* output surrogate pair */
+ target.put((char)(0xd800|(char)(entry>>>10)));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ c = (char)(0xdc00|(char)(entry&0x3ff));
+ if(target.hasRemaining()) {
+ target.put(c);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ }
+ else {
+ /* target overflow */
+ charErrorBufferArray[0]=c;
+ charErrorBufferLength=1;
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+
+ ++sourceIndex;
+ continue;
+ }
+ else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
+ if(isToUUseFallback()) {
+ /* output BMP code point */
+ target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+
+ ++sourceIndex;
+ continue;
+ }
+ }
+ else if(action==MBCS_STATE_UNASSIGNED) {
+ /* just fall through */
+ }
+ else if(action==MBCS_STATE_ILLEGAL) {
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ }
+ else {
+ /* reserved, must never occur */
+ ++sourceIndex;
+ continue;
+ }
+
+ if(cr[0].isError()) {
+ /* callback(illegal) */
+ break;
+ }
+ else /* unassigned sequences indicated with byteIndex>0 */ {
+ /* try an extension mapping */
+ int sourceBeginIndex = sourceArrayIndex;
+ toUBytesArray[0] = source.get(sourceArrayIndex-1);
+ source.position(sourceArrayIndex);
+ toULength = toU((byte)1, source, target, offsets, sourceIndex, flush, cr);
+ sourceArrayIndex = source.position();
+ sourceIndex += 1+(int)(sourceArrayIndex-sourceBeginIndex);
+
+ if(cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ break;
+ }
+ }
+ }
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ protected int getFallback(UConverterMBCSTable mbcsTable, int offset)
+ {
+ MBCSToUFallback[] toUFallbacks;
+ int i, start, limit;
+
+ limit = mbcsTable.countToUFallbacks;
+ if(limit>0) {
+ /* do a binary search for the fallback mapping */
+ toUFallbacks = mbcsTable.toUFallbacks;
+ start = 0;
+ while(start<limit-1) {
+ i = (start+limit)/2;
+ if(offset<toUFallbacks[i].offset) {
+ limit = i;
+ }
+ else {
+ start = i;
+ }
+ }
+
+ /* did we really find it? */
+ if(offset==toUFallbacks[start].offset) {
+ return toUFallbacks[start].codePoint;
+ }
+ }
+
+ return 0xfffe;
+ }
+
+ }
+
+ class CharsetEncoderMBCS extends CharsetEncoderICU{
+
+ CharsetEncoderMBCS(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ protected void implReset() {
+ super.implReset();
+ preFromUFirstCP = UConverterConstants.U_SENTINEL;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+
+ CoderResult[] cr = {CoderResult.UNDERFLOW};
+
+ int sourceArrayIndex;
+ char[] table;
+ byte[] pArray, bytes;
+ int pArrayIndex, outputType, c;
+ int prevSourceIndex, sourceIndex, nextSourceIndex;
+ int stage2Entry, value, length, prevLength;
+ short unicodeMask;
+
+ try{
+
+ if(preFromUFirstCP>=0) {
+ /*
+ * pass sourceIndex=-1 because we continue from an earlier buffer
+ * in the future, this may change with continuous offsets
+ */
+ cr[0] = continueMatchFromU(source, target, offsets, flush, -1);
+
+ if(cr[0].isError() || preFromULength<0) {
+ return cr[0];
+ }
+ }
+
+ /* use optimized function if possible */
+ outputType = sharedData.mbcs.outputType;
+ unicodeMask = sharedData.mbcs.unicodeMask;
+ if(outputType==MBCS_OUTPUT_1 && (unicodeMask&UConverterConstants.HAS_SURROGATES) == 0) {
+ if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ cr[0] = cnvMBCSSingleFromBMPWithOffsets(source, target, offsets, flush);
+ } else {
+ cr[0] = cnvMBCSSingleFromUnicodeWithOffsets(source, target, offsets, flush);
+ }
+ return cr[0];
+ } else if(outputType==MBCS_OUTPUT_2) {
+ cr[0] = cnvMBCSDoubleFromUnicodeWithOffsets(source, target, offsets, flush);
+ return cr[0];
+ }
+
+ table = sharedData.mbcs.fromUnicodeTable;
+ sourceArrayIndex = source.position();
+
+ if((options&UConverterConstants.OPTION_SWAP_LFNL)!=0) {
+ bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ bytes = sharedData.mbcs.fromUnicodeBytes;
+ }
+
+ /* get the converter state from UConverter */
+ c = fromUChar32;
+
+ if(outputType==MBCS_OUTPUT_2_SISO) {
+ prevLength=(int)fromUnicodeStatus;
+ if(prevLength==0) {
+ /* set the real value */
+ prevLength=1;
+ }
+ } else {
+ /* prevent fromUnicodeStatus from being set to something non-0 */
+ prevLength=0;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ prevSourceIndex=-1;
+ sourceIndex= c==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion loop */
+ /*
+ * This is another piece of ugly code:
+ * A goto into the loop if the converter state contains a first surrogate
+ * from the previous function call.
+ * It saves me to check in each loop iteration a check of if(c==0)
+ * and duplicating the trail-surrogate-handling code in the else
+ * branch of that check.
+ * I could not find any other way to get around this other than
+ * using a function call for the conversion and callback, which would
+ * be even more inefficient.
+ *
+ * Markus Scherer 2000-jul-19
+ */
+ boolean doloop = true;
+ if(c!=0 && target.hasRemaining()) {
+ SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength);
+ doloop = getTrail(source, target, unicodeMask, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ prevSourceIndex = x.prevSourceIndex;
+ prevLength = x.prevLength;
+ }
+
+ if(doloop) {
+ while(sourceArrayIndex<source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one byte that
+ * overflows as a result of a multi-byte character or callback output
+ * from the last source character.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(target.hasRemaining()) {
+ /*
+ * Get a correct Unicode code point:
+ * a single UChar for a BMP code point or
+ * a matched surrogate pair for a "supplementary code point".
+ */
+ c = source.get(sourceArrayIndex++);
+ ++nextSourceIndex;
+ /*
+ * This also tests if the codepage maps single surrogates.
+ * If it does, then surrogates are not paired but mapped separately.
+ * Note that in this case unmatched surrogates are not detected.
+ */
+ if(UTF16.isSurrogate((char)c) && (unicodeMask&UConverterConstants.HAS_SURROGATES) == 0) {
+ if(UTF16.isLeadSurrogate((char)c)) {
+ //getTrail:
+ SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength);
+ doloop = getTrail(source, target, unicodeMask, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ prevSourceIndex = x.prevSourceIndex;
+
+ if(doloop)
+ continue;
+ else
+ break;
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ break;
+ }
+ }
+
+ /* convert the Unicode code point in c into codepage bytes */
+
+ /*
+ * The basic lookup is a triple-stage compact array (trie) lookup.
+ * For details see the beginning of this file.
+ *
+ * Single-byte codepages are handled with a different data structure
+ * by _MBCSSingle... functions.
+ *
+ * The result consists of a 32-bit value from stage 2 and
+ * a pointer to as many bytes as are stored per character.
+ * The pointer points to the character's bytes in stage 3.
+ * Bits 15..0 of the stage 2 entry contain the stage 3 index
+ * for that pointer, while bits 31..16 are flags for which of
+ * the 16 characters in the block are roundtrip-assigned.
+ *
+ * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t
+ * respectively as uint32_t, in the platform encoding.
+ * For 3-byte codepages, the bytes are always stored in big-endian order.
+ *
+ * For EUC encodings that use only either 0x8e or 0x8f as the first
+ * byte of their longest byte sequences, the first two bytes in
+ * this third stage indicate with their 7th bits whether these bytes
+ * are to be written directly or actually need to be preceeded by
+ * one of the two Single-Shift codes. With this, the third stage
+ * stores one byte fewer per character than the actual maximum length of
+ * EUC byte sequences.
+ *
+ * Other than that, leading zero bytes are removed and the other
+ * bytes output. A single zero byte may be output if the "assigned"
+ * bit in stage 2 was on.
+ * The data structure does not support zero byte output as a fallback,
+ * and also does not allow output of leading zeros.
+ */
+ stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
+
+ /* get the bytes and the length for the output */
+ switch(outputType) {
+ case MBCS_OUTPUT_2:
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xff) {
+ length=1;
+ }
+ else {
+ length=2;
+ }
+ break;
+ case MBCS_OUTPUT_2_SISO:
+ /* 1/2-byte stateful with Shift-In/Shift-Out */
+ /*
+ * Save the old state in the converter object
+ * right here, then change the local prevLength state variable if necessary.
+ * Then, if this character turns out to be unassigned or a fallback that
+ * is not taken, the callback code must not save the new state in the converter
+ * because the new state is for a character that is not output.
+ * However, the callback must still restore the state from the converter
+ * in case the callback function changed it for its output.
+ */
+ fromUnicodeStatus=prevLength; /* save the old state */
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xff) {
+ if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==false) {
+ /* no mapping, leave value==0 */
+ length = 0;
+ }
+ else if(prevLength<=1) {
+ length = 1;
+ }
+ else {
+ /* change from double-byte mode to single-byte */
+ value |= UConverterConstants.SI<<8;
+ length = 2;
+ prevLength = 1;
+ }
+ }
+ else {
+ if(prevLength==2) {
+ length = 2;
+ }
+ else {
+ /* change from single-byte mode to double-byte */
+ value |= UConverterConstants.SO<<16;
+ length = 3;
+ prevLength = 2;
+ }
+ }
+ break;
+ case MBCS_OUTPUT_DBCS_ONLY:
+ /* table with single-byte results, but only DBCS mappings used */
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xff) {
+ /* no mapping or SBCS result, not taken for DBCS-only */
+ value = stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
+ length = 0;
+ } else {
+ length = 2;
+ }
+ break;
+ case MBCS_OUTPUT_3:
+ pArray = bytes;
+ pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
+ value = ((pArray[pArrayIndex]&UConverterConstants.UNSIGNED_BYTE_MASK)<<16)|((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK);
+ if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xff) {
+ length = 1;
+ }
+ else if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xffff) {
+ length = 2;
+ }
+ else {
+ length = 3;
+ }
+ break;
+ case MBCS_OUTPUT_4:
+ value = MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
+ if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xff) {
+ length = 1;
+ }
+ else if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xffff) {
+ length = 2;
+ }
+ else if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xffffff) {
+ length = 3;
+ }
+ else {
+ length = 4;
+ }
+ break;
+ case MBCS_OUTPUT_3_EUC:
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ /* EUC 16-bit fixed-length representation */
+ if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xff) {
+ length = 1;
+ }
+ else if((value&0x8000)==0) {
+ value |= 0x8e8000;
+ length = 3;
+ }
+ else if((value&0x80)==0) {
+ value |= 0x8f0080;
+ length = 3;
+ }
+ else {
+ length = 2;
+ }
+ break;
+ case MBCS_OUTPUT_4_EUC:
+ pArray = bytes;
+ pArrayIndex = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
+ value = ((pArray[pArrayIndex]&UConverterConstants.UNSIGNED_BYTE_MASK)<<16)|((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK);
+ /* EUC 16-bit fixed-length representation applied to the first two bytes */
+ if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xff) {
+ length = 1;
+ }
+ else if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xffff) {
+ length = 2;
+ }
+ else if((value&0x800000)==0) {
+ value |= 0x8e800000;
+ length = 4;
+ }
+ else if((value&0x8000)==0) {
+ value |= 0x8f008000;
+ length = 4;
+ }
+ else {
+ length = 3;
+ }
+ break;
+ default:
+ /* must not occur */
+ /*
+ * To avoid compiler warnings that value & length may be
+ * used without having been initialized, we set them here.
+ * In reality, this is unreachable code.
+ * Not having a default branch also causes warnings with
+ * some compilers.
+ */
+ value = stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
+ length = 0;
+ break;
+ }
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value!=0))) {
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry.
+ * There is no way with this data structure for fallback output
+ * to be a zero byte.
+ */
+
+ //unassigned:
+ SideEffects x = new SideEffects(c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength);
+ doloop = unassigned(source, target, offsets, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ prevSourceIndex = x.prevSourceIndex;
+ prevLength = x.prevLength;
+ if(doloop)
+ continue;
+ else
+ break;
+ }
+
+ /* write the output character bytes from value and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(length<=target.remaining()) {
+ if(offsets==null) {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ target.put((byte)(value>>>24));
+ case 3:
+ target.put((byte)(value>>>16));
+ case 2:
+ target.put((byte)(value>>>8));
+ case 1:
+ target.put((byte)value);
+ default:
+ /* will never occur */
+ break;
+ }
+ }
+ else {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ target.put((byte)(value>>>24));
+ offsets.put(sourceIndex);
+ case 3:
+ target.put((byte)(value>>>16));
+ offsets.put(sourceIndex);
+ case 2:
+ target.put((byte)(value>>>8));
+ offsets.put(sourceIndex);
+ case 1:
+ target.put((byte)value);
+ offsets.put(sourceIndex);
+ default:
+ /* will never occur */
+ break;
+ }
+ }
+ }
+ else {
+ int errorBufferArrayIndex;
+
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target.
+ */
+ /* we know that 1<=targetCapacity<length<=4 */
+ length -= target.remaining();
+
+ errorBufferArrayIndex = 0;
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 3:
+ errorBuffer[errorBufferArrayIndex++]=(byte)(value>>>16);
+ case 2:
+ errorBuffer[errorBufferArrayIndex++]=(byte)(value>>>8);
+ case 1:
+ errorBuffer[errorBufferArrayIndex]=(byte)value;
+ default:
+ /* will never occur */
+ break;
+ }
+ errorBufferLength = (byte)length;
+
+ /* now output what fits into the regular target */
+ value>>>=8*length; /* length was reduced by targetCapacity */
+ switch(target.remaining()) {
+ /* each branch falls through to the next one */
+ case 3:
+ target.put((byte)(value>>>16));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ case 2:
+ target.put((byte)(value>>>8));
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ case 1:
+ target.put((byte)value);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ default:
+ /* will never occur */
+ break;
+ }
+
+ /* target overflow */
+ cr[0] = CoderResult.OVERFLOW;
+ c=0;
+ break;
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ if(offsets!=null) {
+ prevSourceIndex=sourceIndex;
+ sourceIndex=nextSourceIndex;
+ }
+ continue;
+ }
+ else {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ }
+
+ /*
+ * the end of the input stream and detection of truncated input
+ * are handled by the framework, but for EBCDIC_STATEFUL conversion
+ * we need to emit an SI at the very end
+ *
+ * conditions:
+ * successful
+ * EBCDIC_STATEFUL in DBCS mode
+ * end of input and no truncated input
+ */
+ if(outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
+ flush && sourceArrayIndex>=source.limit() && c==0){
+
+ /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
+ if(target.hasRemaining()) {
+ target.put((byte)UConverterConstants.SI);
+ if(offsets!=null) {
+ /* set the last source character's index (sourceIndex points at sourceLimit now) */
+ offsets.put(prevSourceIndex);
+ }
+ }
+ else {
+ /* target is full */
+ errorBuffer[0]=(byte)UConverterConstants.SI;
+ errorBufferLength=1;
+ cr[0] = CoderResult.OVERFLOW;
+ }
+ prevLength=1; /* we switched into SBCS */
+ }
+
+ /* set the converter state back into UConverter */
+ fromUChar32=c;
+ fromUnicodeStatus=prevLength;
+
+ source.position(sourceArrayIndex);
+ }
+ catch(BufferOverflowException ex){
+ cr[0] = CoderResult.OVERFLOW;
+ }
+
+ return cr[0];
+ }
+
+ /*
+ * continue partial match with new input, requires cnv->preFromUFirstCP>=0
+ * never called for simple, single-character conversion
+ */
+ protected CoderResult continueMatchFromU(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush, int srcIndex)
+ {
+ CoderResult cr = CoderResult.UNDERFLOW;
+ int[] value = new int[1];
+ int match;
+
+ match = matchFromU(preFromUFirstCP, preFromUArray, preFromUBegin, preFromULength, source, target, value, flush);
+ if(match>=2) {
+ match-=2; /* remove 2 for the initial code point */
+
+ if(match>=preFromULength) {
+ /* advance src pointer for the consumed input */
+ source.position(source.position()+match-preFromULength);
+ preFromULength=0;
+ } else {
+ /* the match did not use all of preFromU[] - keep the rest for replay */
+ int length = preFromULength-match;
+ System.arraycopy(preFromUArray, preFromUBegin+match, preFromUArray, preFromUBegin, length);
+ preFromULength=(byte)-length;
+ }
+
+ /* finish the partial match */
+ preFromUFirstCP = UConverterConstants.U_SENTINEL;
+
+ /* write result */
+ writeFromU(value[0], target, offsets, srcIndex);
+ }
+ else if(match<0) {
+ /* save state for partial match */
+ int sArrayIndex;
+ int j;
+
+ /* just _append_ the newly consumed input to preFromU[] */
+ sArrayIndex = source.position();
+ match =- match-2; /* remove 2 for the initial code point */
+ for(j=preFromULength; j<match; ++j) {
+ preFromUArray[j]=source.get(sArrayIndex++);
+ }
+ source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
+ preFromULength=(byte)match;
+ }
+ else /* match==0 or 1 */ {
+ /*
+ * no match
+ *
+ * We need to split the previous input into two parts:
+ *
+ * 1. The first code point is unmappable - that's how we got into
+ * trying the extension data in the first place.
+ * We need to move it from the preFromU buffer
+ * to the error buffer, set an error code,
+ * and prepare the rest of the previous input for 2.
+ *
+ * 2. The rest of the previous input must be converted once we
+ * come back from the callback for the first code point.
+ * At that time, we have to try again from scratch to convert
+ * these input characters.
+ * The replay will be handled by the ucnv.c conversion code.
+ */
+
+ if(match==1) {
+ /* matched, no mapping but request for <subchar1> */
+ useSubChar1=true;
+ }
+
+ /* move the first code point to the error field */
+ fromUChar32 = preFromUFirstCP;
+ preFromUFirstCP = UConverterConstants.U_SENTINEL;
+
+ /* mark preFromU for replay */
+ preFromULength = (byte) - preFromULength;
+
+ /* set the error code for unassigned */
+ cr = CoderResult.unmappableForLength(source.position());
+ }
+ return cr;
+ }
+
+ /*
+ * @param cx pointer to extension data; if NULL, returns 0
+ * @param firstCP the first code point before all the other UChars
+ * @param pre UChars that must match; !initialMatch: partial match with them
+ * @param preLength length of pre, >=0
+ * @param src UChars that can be used to complete a match
+ * @param srcLength length of src, >=0
+ * @param pMatchValue [out] output result value for the match from the data structure
+ * @param useFallback "use fallback" flag, usually from cnv->useFallback
+ * @param flush TRUE if the end of the input stream is reached
+ * @return >1: matched, return value=total match length (number of input units matched)
+ * 1: matched, no mapping but request for <subchar1>
+ * (only for the first code point)
+ * 0: no match
+ * <0: partial match, return value=negative total match length
+ * (partial matches are never returned for flush==TRUE)
+ * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS)
+ * the matchLength is 2 if only firstCP matched, and >2 if firstCP and
+ * further code units matched
+ */
+ //static int32_t ucnv_extMatchFromU(const int32_t *cx, UChar32 firstCP, const UChar *pre, int32_t preLength, const UChar *src, int32_t srcLength, uint32_t *pMatchValue, UBool useFallback, UBool flush)
+ protected int matchFromU(int firstCP, char[] preArray, int preArrayBegin, int preLength, CharBuffer source, ByteBuffer target, int[] pMatchValue, boolean flush)
+ {
+ ByteBuffer cx = sharedData.mbcs.extIndexes;
+
+ CharBuffer stage12, stage3;
+ IntBuffer stage3b;
+
+ CharBuffer fromUTableUChars, fromUSectionUChars;
+ IntBuffer fromUTableValues, fromUSectionValues;
+
+ int value, matchValue;
+ int i, j, index, length, matchLength;
+ char c;
+
+ if(cx==null) {
+ return 0; /* no extension data, no match */
+ }
+
+ /* trie lookup of firstCP */
+ index=firstCP>>>10; /* stage 1 index */
+ if(index>=cx.asIntBuffer().get(EXT_FROM_U_STAGE_1_LENGTH)) {
+ return 0; /* the first code point is outside the trie */
+ }
+
+ stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class);
+ stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class);
+ index = FROM_U(stage12, stage3, index, firstCP);
+
+ stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class);
+ value = stage3b.get(stage3b.position() + index);
+ if(value==0) {
+ return 0;
+ }
+
+ if(TO_U_IS_PARTIAL(value)) {
+ /* partial match, enter the loop below */
+ index = FROM_U_GET_PARTIAL_INDEX(value);
+
+ /* initialize */
+ fromUTableUChars = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class);
+ fromUTableValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class);
+
+ matchValue=0;
+ i=j=matchLength=0;
+
+ /* we must not remember fallback matches when not using fallbacks */
+
+ /* match input units until there is a full match or the input is consumed */
+ for(;;) {
+ /* go to the next section */
+ int oldpos = fromUTableUChars.position();
+ fromUSectionUChars = ((CharBuffer)fromUTableUChars.position(index)).slice();
+ fromUTableUChars.position(oldpos);
+ oldpos = fromUTableValues.position();
+ fromUSectionValues = ((IntBuffer)fromUTableValues.position(index)).slice();
+ fromUTableValues.position(oldpos);
+
+ /* read first pair of the section */
+ length = fromUSectionUChars.get();
+ value = fromUSectionValues.get();
+ if( value!=0 &&
+ (FROM_U_IS_ROUNDTRIP(value) ||
+ isFromUUseFallback(firstCP))
+ ) {
+ /* remember longest match so far */
+ matchValue = value;
+ matchLength = 2+i+j;
+ }
+
+ /* match pre[] then src[] */
+ if(i<preLength) {
+ c = preArray[preArrayBegin + i++];
+ } else if(j<source.remaining()) {
+ c = source.get(source.position() + j++);
+ } else {
+ /* all input consumed, partial match */
+ if(flush || (length=(i+j))>MAX_UCHARS) {
+ /*
+ * end of the entire input stream, stop with the longest match so far
+ * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS
+ * because it must fit into state buffers
+ */
+ break;
+ } else {
+ /* continue with more input next time */
+ return -(2+length);
+ }
+ }
+
+ /* search for the current UChar */
+ index = findFromU(fromUSectionUChars, length, c);
+ if(index<0) {
+ /* no match here, stop with the longest match so far */
+ break;
+ } else {
+ value = fromUSectionValues.get(fromUSectionValues.position() + index);
+ if(FROM_U_IS_PARTIAL(value)) {
+ /* partial match, continue */
+ index = FROM_U_GET_PARTIAL_INDEX(value);
+ } else {
+ if( FROM_U_IS_ROUNDTRIP(value) ||
+ isFromUUseFallback(firstCP)
+ ) {
+ /* full match, stop with result */
+ matchValue=value;
+ matchLength=2+i+j;
+ } else {
+ /* full match on fallback not taken, stop with the longest match so far */
+ }
+ break;
+ }
+ }
+ }
+
+ if(matchLength==0) {
+ /* no match at all */
+ return 0;
+ }
+ } else /* result from firstCP trie lookup */ {
+ if( FROM_U_IS_ROUNDTRIP(value) ||
+ isFromUUseFallback(firstCP)
+ ) {
+ /* full match, stop with result */
+ matchValue=value;
+ matchLength=2;
+ } else {
+ /* fallback not taken */
+ return 0;
+ }
+ }
+
+ if((matchValue&FROM_U_RESERVED_MASK) != 0) {
+ /* do not interpret values with reserved bits used, for forward compatibility */
+ return 0;
+ }
+
+ /* return result */
+ if(matchValue==FROM_U_SUBCHAR1) {
+ return 1; /* assert matchLength==2 */
+ }
+
+ pMatchValue[0]=FROM_U_MASK_ROUNDTRIP(matchValue);
+ return matchLength;
+ }
+
+ protected CoderResult writeFromU(int value, ByteBuffer target, IntBuffer offsets, int srcIndex)
+ {
+ ByteBuffer cx = sharedData.mbcs.extIndexes;
+
+ byte bufferArray[] = new byte[1+MAX_BYTES];
+ int bufferArrayIndex = 0;
+ byte[] resultArray;
+ int resultArrayIndex;
+ int length, prevLength;
+
+ length = FROM_U_GET_LENGTH(value);
+ value = FROM_U_GET_DATA(value);
+
+ /* output the result */
+ if(length<=FROM_U_MAX_DIRECT_LENGTH) {
+ /*
+ * Generate a byte array and then write it below.
+ * This is not the fastest possible way, but it should be ok for
+ * extension mappings, and it is much simpler.
+ * Offset and overflow handling are only done once this way.
+ */
+ int p = bufferArrayIndex+1; /* reserve buffer[0] for shiftByte below */
+ switch(length) {
+ case 3:
+ bufferArray[p++] = (byte)(value>>>16);
+ case 2:
+ bufferArray[p++] = (byte)(value>>>8);
+ case 1:
+ bufferArray[p++] = (byte)value;
+ default:
+ break; /* will never occur */
+ }
+ resultArray = bufferArray;
+ resultArrayIndex = bufferArrayIndex+1;
+ }
+ else {
+ byte[] slice = new byte[length];
+
+ ByteBuffer bb = ((ByteBuffer)ARRAY(cx, EXT_FROM_U_BYTES_INDEX, byte.class));
+ bb.position(value);
+ bb.get(slice, 0, slice.length);
+
+ resultArray = slice;
+ resultArrayIndex = 0;
+ }
+
+ /* with correct data we have length>0 */
+
+ if((prevLength=(int)fromUnicodeStatus)!=0) {
+ /* handle SI/SO stateful output */
+ byte shiftByte;
+
+ if(prevLength>1 && length==1) {
+ /* change from double-byte mode to single-byte */
+ shiftByte = (byte)UConverterConstants.SI;
+ fromUnicodeStatus = 1;
+ }
+ else if(prevLength==1 && length>1) {
+ /* change from single-byte mode to double-byte */
+ shiftByte = (byte)UConverterConstants.SO;
+ fromUnicodeStatus = 2;
+ }
+ else {
+ shiftByte = 0;
+ }
+
+ if(shiftByte!=0) {
+ /* prepend the shift byte to the result bytes */
+ bufferArray[0] = shiftByte;
+ if(resultArray!=bufferArray || resultArrayIndex!=bufferArrayIndex+1) {
+ System.arraycopy(resultArray, resultArrayIndex, bufferArray, bufferArrayIndex+1, length);
+ }
+ resultArray = bufferArray;
+ resultArrayIndex = bufferArrayIndex;
+ ++length;
+ }
+ }
+
+ return fromUWriteBytes(this, resultArray, resultArrayIndex, length, target, offsets, srcIndex);
+ }
+
+ /*
+ * @return if(U_FAILURE) return the code point for cnv->fromUChar32
+ * else return 0 after output has been written to the target
+ */
+ protected int fromU(int cp_, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex, boolean flush, CoderResult[] cr)
+ {
+ //ByteBuffer cx;
+ long cp = cp_ & UConverterConstants.UNSIGNED_INT_MASK;
+
+ useSubChar1=false;
+
+ if( sharedData.mbcs.extIndexes!=null && initialMatchFromU((int)cp, source, target, offsets, sourceIndex, flush, cr)) {
+ return 0; /* an extension mapping handled the input */
+ }
+
+ /* GB 18030 */
+ if((options&MBCS_OPTION_GB18030)!=0) {
+ long[] range;
+ int i;
+
+ for(i=0; i<gb18030Ranges.length; ++i) {
+ range=gb18030Ranges[i];
+ if(range[0]<=cp && cp<=range[1]) {
+ /* found the Unicode code point, output the four-byte sequence for it */
+ long linear;
+ byte bytes[] = new byte[4];
+
+ /* get the linear value of the first GB 18030 code in this range */
+ linear=range[2]-LINEAR_18030_BASE;
+
+ /* add the offset from the beginning of the range */
+ linear+=(cp-range[0]);
+
+ bytes[3]=(byte)(0x30+linear%10); linear/=10;
+ bytes[2]=(byte)(0x81+linear%126); linear/=126;
+ bytes[1]=(byte)(0x30+linear%10); linear/=10;
+ bytes[0]=(byte)(0x81+linear);
+
+ /* output this sequence */
+ cr[0] = fromUWriteBytes(this, bytes, 0, 4, target, offsets, sourceIndex);
+ return 0;
+ }
+ }
+ }
+
+ /* no mapping */
+ cr[0] = CoderResult.unmappableForLength(1);
+ return (int)cp;
+ }
+
+ /*
+ * target<targetLimit; set error code for overflow
+ */
+ protected boolean initialMatchFromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets, int srcIndex, boolean flush, CoderResult[] cr)
+ {
+ int[] value = new int[1];
+ int match;
+
+ /* try to match */
+ match = matchFromU(cp, null, 0, 0, source, target, value, flush);
+
+ /* reject a match if the result is a single byte for DBCS-only */
+ if( match>=2 &&
+ !(FROM_U_GET_LENGTH(value[0])==1 &&
+ sharedData.mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY)
+ ) {
+ /* advance src pointer for the consumed input */
+ source.position(source.position()+match-2); /* remove 2 for the initial code point */
+
+ /* write result to target */
+ cr[0] = writeFromU(value[0], target, offsets, srcIndex);
+ return true;
+ } else if(match<0) {
+ /* save state for partial match */
+ int sArrayIndex;
+ int j;
+
+ /* copy the first code point */
+ preFromUFirstCP=cp;
+
+ /* now copy the newly consumed input */
+ sArrayIndex = source.position();
+ match =- match-2; /* remove 2 for the initial code point */
+ for(j=0; j<match; ++j) {
+ preFromUArray[j]=source.get(sArrayIndex++);
+ }
+ source.position(sArrayIndex); /* same as *src=srcLimit; because we reached the end of input */
+ preFromULength=(byte)match;
+ return true;
+ } else if(match==1) {
+ /* matched, no mapping but request for <subchar1> */
+ useSubChar1=true;
+ return false;
+ } else /* match==0 no match */ {
+ return false;
+ }
+ }
+
+ /*
+ * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages
+ * that map only to and from the BMP.
+ * In addition to single-byte/state optimizations, the offset calculations
+ * become much easier.
+ */
+ protected CoderResult cnvMBCSSingleFromBMPWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+
+ CoderResult[] cr = {CoderResult.UNDERFLOW};
+
+ int sourceArrayIndex, lastSource;
+ int targetCapacity, length;
+ char[] table;
+ byte[] results;
+
+ int c, sourceIndex;
+ char value, minValue;
+
+ /* set up the local pointers */
+ sourceArrayIndex = source.position();
+ targetCapacity = target.remaining();
+ table = sharedData.mbcs.fromUnicodeTable;
+
+ if((options&UConverterConstants.OPTION_SWAP_LFNL)!=0) {
+ results = sharedData.mbcs.swapLFNLFromUnicodeBytes; //agljport:comment should swapLFNLFromUnicodeBytes be a ByteBuffer so results can be a 16-bit view of it?
+ }
+ else {
+ results = sharedData.mbcs.fromUnicodeBytes; //agljport:comment should swapLFNLFromUnicodeBytes be a ByteBuffer so results can be a 16-bit view of it?
+ }
+
+ if(useFallback) {
+ /* use all roundtrip and fallback results */
+ minValue = 0x800;
+ }
+ else {
+ /* use only roundtrips and fallbacks from private-use characters */
+ minValue = 0xc00;
+ }
+
+ /* get the converter state from UConverter */
+ c = fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex = c==0 ? 0 : -1;
+ lastSource = sourceArrayIndex;
+
+ /*
+ * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
+ * for the minimum of the sourceLength and targetCapacity
+ */
+ length = source.limit()-sourceArrayIndex;
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+
+ boolean doloop = true;
+ if(c!=0 && targetCapacity>0) {
+ SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
+ doloop = getTrailSingleBMP(source, x, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ }
+
+ if(doloop) {
+ while(targetCapacity>0) {
+ /*
+ * Get a correct Unicode code point:
+ * a single UChar for a BMP code point or
+ * a matched surrogate pair for a "supplementary code point".
+ */
+ c = source.get(sourceArrayIndex++);
+ /*
+ * Do not immediately check for single surrogates:
+ * Assume that they are unassigned and check for them in that case.
+ * This speeds up the conversion of assigned characters.
+ */
+ /* convert the Unicode code point in c into codepage bytes */
+ value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if(value>=minValue) {
+ /* assigned, write the output character bytes from value and length */
+ /* length==1 */
+ /* this is easy because we know that there is enough space */
+ target.put((byte)value);
+ --targetCapacity;
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ continue;
+ }
+ else if(!UTF16.isSurrogate((char)c)) {
+ /* normal, unassigned BMP character */
+ }
+ else if(UTF16.isLeadSurrogate((char)c)) {
+ //getTrail:
+ SideEffectsSingleBMP x = new SideEffectsSingleBMP(c, sourceArrayIndex);
+ doloop = getTrailSingleBMP(source, x, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ if(!doloop)
+ break;
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ break;
+ }
+
+ /* c does not have a mapping */
+
+ /* get the number of code units for c to correctly advance sourceIndex */
+ length = UTF16.getCharCount(c);
+
+ /* set offsets since the start or the last extension */
+ if(offsets!=null) {
+ int count = sourceArrayIndex-lastSource;
+
+ /* do not set the offset for this character */
+ count -= length;
+
+ while(count>0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ /* offsets and sourceIndex are now set for the current character */
+ }
+
+ /* try an extension mapping */
+ lastSource = sourceArrayIndex;
+ source.position(sourceArrayIndex);
+ c = fromU(c, source, target, offsets, sourceIndex, flush, cr);
+ sourceArrayIndex = source.position();
+ sourceIndex += length+(sourceArrayIndex-lastSource);
+ lastSource = sourceArrayIndex;
+
+ if(cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ break;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ targetCapacity = target.remaining();
+ length = source.limit() - sourceArrayIndex;
+ if(length<targetCapacity) {
+ targetCapacity=length;
+ }
+ }
+ }
+ }
+
+ if(sourceArrayIndex<source.limit() && !target.hasRemaining()) {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ }
+
+ /* set offsets since the start or the last callback */
+ if(offsets!=null) {
+ int count = sourceArrayIndex-lastSource;
+ while(count>0) {
+ offsets.put(sourceIndex++);
+ --count;
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ fromUChar32=c;
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
+ protected CoderResult cnvMBCSSingleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+
+ CoderResult[] cr = {CoderResult.UNDERFLOW};
+
+ int sourceArrayIndex;
+
+ char[] table;
+ byte[] results; //agljport:comment results is used to to get 16-bit values out of byte[] array
+
+ int c;
+ int sourceIndex, nextSourceIndex;
+
+ char value, minValue;
+
+ /* set up the local pointers */
+ short unicodeMask;
+ sourceArrayIndex = source.position();
+
+ table = sharedData.mbcs.fromUnicodeTable;
+
+ if((options&UConverterConstants.OPTION_SWAP_LFNL)!=0) {
+ results = sharedData.mbcs.swapLFNLFromUnicodeBytes; //agljport:comment should swapLFNLFromUnicodeBytes be a ByteBuffer so results can be a 16-bit view of it?
+ }
+ else {
+ results = sharedData.mbcs.fromUnicodeBytes; //agljport:comment should swapLFNLFromUnicodeBytes be a ByteBuffer so results can be a 16-bit view of it?
+ }
+
+ if(useFallback) {
+ /* use all roundtrip and fallback results */
+ minValue = 0x800;
+ }
+ else {
+ /* use only roundtrips and fallbacks from private-use characters */
+ minValue = 0xc00;
+ }
+ //agljport:comment hasSupplementary only used in getTrail block which now simply repeats the mask operation
+ unicodeMask = sharedData.mbcs.unicodeMask;
+
+ /* get the converter state from UConverter */
+ c = fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex= c==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ boolean doloop = true;
+ if(c!=0 && target.hasRemaining()) {
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
+ doloop = getTrailDouble(source, target, unicodeMask, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ }
+
+ if(doloop) {
+ while(sourceArrayIndex<source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one byte that
+ * overflows as a result of a multi-byte character or callback output
+ * from the last source character.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(target.hasRemaining()) {
+ /*
+ * Get a correct Unicode code point:
+ * a single UChar for a BMP code point or
+ * a matched surrogate pair for a "supplementary code point".
+ */
+ c = source.get(sourceArrayIndex++);
+ ++nextSourceIndex;
+ if(UTF16.isSurrogate((char)c)) {
+ if(UTF16.isLeadSurrogate((char)c)) {
+ //getTrail:
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
+ doloop = getTrailDouble(source, target, unicodeMask, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ if(doloop)
+ continue;
+ else
+ break;
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ break;
+ }
+ }
+
+ /* convert the Unicode code point in c into codepage bytes */
+ value = MBCS_SINGLE_RESULT_FROM_U(table, results, c);
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if(value>=minValue) {
+ /* assigned, write the output character bytes from value and length */
+ /* length==1 */
+ /* this is easy because we know that there is enough space */
+ target.put((byte)value);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ sourceIndex = nextSourceIndex;
+ }
+ else { /* unassigned */
+ /* try an extension mapping */
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
+ doloop = unassignedDouble(source, target, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ if(!doloop)
+ break;
+ }
+ }
+ else {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ fromUChar32=c;
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ /* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
+ protected CoderResult cnvMBCSDoubleFromUnicodeWithOffsets(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult[] cr = {CoderResult.UNDERFLOW};
+
+ int sourceArrayIndex;
+
+ char[] table;
+ byte[] bytes;
+
+ int c, sourceIndex, nextSourceIndex;
+
+ int stage2Entry;
+ int value;
+ int length;
+ short unicodeMask;
+
+ /* use optimized function if possible */
+ unicodeMask = sharedData.mbcs.unicodeMask;
+
+ /* set up the local pointers */
+ sourceArrayIndex = source.position();
+
+ table = sharedData.mbcs.fromUnicodeTable;
+
+ if((options&UConverterConstants.OPTION_SWAP_LFNL)!=0) {
+ bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;
+ } else {
+ bytes = sharedData.mbcs.fromUnicodeBytes;
+ }
+
+ /* get the converter state from UConverter */
+ c = fromUChar32;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex= c==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion loop */
+ boolean doloop = true;
+ if(c!=0 && target.hasRemaining()) {
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
+ doloop = getTrailDouble(source, target, unicodeMask, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ }
+
+ if(doloop) {
+ while(sourceArrayIndex<source.limit()) {
+ /*
+ * This following test is to see if available input would overflow the output.
+ * It does not catch output of more than one byte that
+ * overflows as a result of a multi-byte character or callback output
+ * from the last source character.
+ * Therefore, those situations also test for overflows and will
+ * then break the loop, too.
+ */
+ if(target.hasRemaining()) {
+ /*
+ * Get a correct Unicode code point:
+ * a single UChar for a BMP code point or
+ * a matched surrogate pair for a "supplementary code point".
+ */
+ c = source.get(sourceArrayIndex++);
+ ++nextSourceIndex;
+ /*
+ * This also tests if the codepage maps single surrogates.
+ * If it does, then surrogates are not paired but mapped separately.
+ * Note that in this case unmatched surrogates are not detected.
+ */
+ if(UTF16.isSurrogate((char)c) && (unicodeMask&UConverterConstants.HAS_SURROGATES) == 0) {
+ if(UTF16.isLeadSurrogate((char)c)) {
+ //getTrail:
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
+ doloop = getTrailDouble(source, target, unicodeMask, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+
+ if(doloop){
+ continue;
+ } else {
+ break;
+ }
+ }
+ else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(1);
+ break;
+ }
+ }
+
+ /* convert the Unicode code point in c into codepage bytes */
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+
+ /* get the bytes and the length for the output */
+ /* MBCS_OUTPUT_2 */
+ value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ if((value & UConverterConstants.UNSIGNED_INT_MASK) <=0xff) {
+ length=1;
+ }
+ else {
+ length=2;
+ }
+
+ /* is this code point assigned, or do we use fallbacks? */
+ if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || (isFromUUseFallback(c) && value!=0))) {
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry.
+ * There is no way with this data structure for fallback output
+ * to be a zero byte.
+ */
+
+ //unassigned:
+ SideEffectsDouble x = new SideEffectsDouble(c, sourceArrayIndex, sourceIndex, nextSourceIndex);
+
+ doloop = unassignedDouble(source, target, x, flush, cr);
+ c = x.c;
+ sourceArrayIndex = x.sourceArrayIndex;
+ sourceIndex = x.sourceIndex;
+ nextSourceIndex = x.nextSourceIndex;
+ if(doloop)
+ continue;
+ else
+ break;
+ }
+
+ /* write the output character bytes from value and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(length==1) {
+ /* this is easy because we know that there is enough space */
+ target.put((byte)value);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ }
+ else /* length==2 */ {
+ target.put((byte)(value>>>8));
+ if(2<=target.remaining()) {
+ target.put((byte)value);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ }
+ }
+ else {
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ errorBuffer[0]=(byte)value;
+ errorBufferLength=1;
+
+ /* target overflow */
+ cr[0] = CoderResult.OVERFLOW;
+ c=0;
+ break;
+ }
+ }
+
+ /* normal end of conversion: prepare for a new character */
+ c=0;
+ sourceIndex=nextSourceIndex;
+ continue;
+ }
+ else {
+ /* target is full */
+ cr[0] = CoderResult.OVERFLOW;
+ break;
+ }
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ fromUChar32=c;
+
+ /* write back the updated pointers */
+ source.position(sourceArrayIndex);
+
+ return cr[0];
+ }
+
+ protected final class SideEffectsSingleBMP {
+ int c, sourceArrayIndex;
+ SideEffectsSingleBMP(int c_, int sourceArrayIndex_)
+ {
+ c = c_;
+ sourceArrayIndex = sourceArrayIndex_;
+ }
+ }
+
+ // function made out of block labeled getTrail in ucnv_MBCSSingleFromUnicodeWithOffsets
+ // assumes input c is lead surrogate
+ protected final boolean getTrailSingleBMP(CharBuffer source, SideEffectsSingleBMP x, CoderResult[] cr)
+ {
+ if(x.sourceArrayIndex<source.limit()) {
+ /* test the following code unit */
+ char trail=source.get(x.sourceArrayIndex);
+ if(UTF16.isTrailSurrogate(trail)) {
+ ++x.sourceArrayIndex;
+ x.c = UCharacter.getCodePoint((char)x.c, trail);
+ /* this codepage does not map supplementary code points */
+ /* callback(unassigned) */
+ cr[0]=CoderResult.unmappableForLength(2);
+ return false;
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(2);
+ return false;
+ }
+ } else {
+ /* no more input */
+ return false;
+ }
+ //return true;
+ }
+
+ protected final class SideEffects {
+ int c, sourceArrayIndex, sourceIndex, nextSourceIndex, prevSourceIndex, prevLength;
+ SideEffects(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_, int prevSourceIndex_, int prevLength_)
+ {
+ c = c_;
+ sourceArrayIndex = sourceArrayIndex_;
+ sourceIndex = sourceIndex_;
+ nextSourceIndex = nextSourceIndex_;
+ prevSourceIndex = prevSourceIndex_;
+ prevLength = prevLength_;
+ }
+ }
+
+ // function made out of block labeled getTrail in ucnv_MBCSFromUnicodeWithOffsets
+ // assumes input c is lead surrogate
+ protected final boolean getTrail(CharBuffer source, ByteBuffer target, int unicodeMask, SideEffects x, boolean flush, CoderResult[] cr)
+ {
+ if(x.sourceArrayIndex<source.limit()) {
+ /* test the following code unit */
+ char trail = source.get(x.sourceArrayIndex);
+ if(UTF16.isTrailSurrogate(trail)) {
+ ++x.sourceArrayIndex;
+ ++x.nextSourceIndex;
+ x.c = UCharacter.getCodePoint((char)x.c, trail);
+ if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ fromUnicodeStatus = x.prevLength; /* save the old state */
+ /* callback(unassigned) */
+ return unassigned(source, target, null, x, flush, cr);
+ }
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(2);
+ return false;
+ }
+ } else {
+ /* no more input */
+ return false;
+ }
+ return true;
+ }
+
+ // function made out of block labeled unassigned in ucnv_MBCSFromUnicodeWithOffsets
+ protected final boolean unassigned(CharBuffer source, ByteBuffer target, IntBuffer offsets, SideEffects x, boolean flush, CoderResult[] cr)
+ {
+ /* try an extension mapping */
+ int sourceBegin = x.sourceArrayIndex;
+ source.position(x.sourceArrayIndex);
+ x.c = fromU(x.c, source, target, null, x.sourceIndex, flush, cr);
+ x.sourceArrayIndex = source.position();
+ x.nextSourceIndex += x.sourceArrayIndex-sourceBegin;
+ x.prevLength=(int)fromUnicodeStatus;
+
+ if(cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ return false;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ //x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
+
+ /* normal end of conversion: prepare for a new character */
+ if(offsets!=null) {
+ x.prevSourceIndex=x.sourceIndex;
+ x.sourceIndex=x.nextSourceIndex;
+ }
+ return true;
+ }
+ }
+
+ protected final class SideEffectsDouble {
+ int c, sourceArrayIndex, sourceIndex, nextSourceIndex;
+ SideEffectsDouble(int c_, int sourceArrayIndex_, int sourceIndex_, int nextSourceIndex_)
+ {
+ c = c_;
+ sourceArrayIndex = sourceArrayIndex_;
+ sourceIndex = sourceIndex_;
+ nextSourceIndex = nextSourceIndex_;
+ }
+ }
+
+ // function made out of block labeled getTrail in ucnv_MBCSDoubleFromUnicodeWithOffsets
+ // assumes input c is lead surrogate
+ protected final boolean getTrailDouble(CharBuffer source, ByteBuffer target, int unicodeMask, SideEffectsDouble x, boolean flush, CoderResult[] cr)
+ {
+ if(x.sourceArrayIndex<source.limit()) {
+ /* test the following code unit */
+ char trail=source.get(x.sourceArrayIndex);
+ if(UTF16.isTrailSurrogate(trail)) {
+ ++x.sourceArrayIndex;
+ ++x.nextSourceIndex;
+ x.c = UCharacter.getCodePoint((char)x.c, trail);
+ if((unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ /* callback(unassigned) */
+ return unassignedDouble(source, target, x, flush, cr);
+ }
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ cr[0] = CoderResult.malformedForLength(2);
+ return false;
+ }
+ } else {
+ /* no more input */
+ return false;
+ }
+ return true;
+ }
+
+ // function made out of block labeled unassigned in ucnv_MBCSDoubleFromUnicodeWithOffsets
+ protected final boolean unassignedDouble(CharBuffer source, ByteBuffer target, SideEffectsDouble x, boolean flush, CoderResult[] cr)
+ {
+ /* try an extension mapping */
+ int sourceBegin = x.sourceArrayIndex;
+ source.position(x.sourceArrayIndex);
+ x.c = fromU(x.c, source, target, null, x.sourceIndex, flush, cr);
+ x.sourceArrayIndex = source.position();
+ x.nextSourceIndex += x.sourceArrayIndex - sourceBegin;
+
+ if(cr[0].isError()) {
+ /* not mappable or buffer overflow */
+ return false;
+ } else {
+ /* a mapping was written to the target, continue */
+
+ /* recalculate the targetCapacity after an extension mapping */
+ //x.targetCapacity=pArgs.targetLimit-x.targetArrayIndex;
+
+ /* normal end of conversion: prepare for a new character */
+ x.sourceIndex=x.nextSourceIndex;
+ return true;
+ }
+ }
+ /**
+ * Overrides super class method
+ * @param encoder
+ * @param source
+ * @param target
+ * @param offsets
+ * @return
+ */
+ protected CoderResult cbFromUWriteSub ( CharsetEncoderICU encoder,
+ CharBuffer source, ByteBuffer target,
+ IntBuffer offsets){
+ CharsetMBCS cs = (CharsetMBCS) encoder.charset();
+ byte[] subchar, p;
+ byte[] buffer = new byte[4];
+ int length,i=0;
+ /* first, select between subChar and subChar1 */
+ if( cs.subChar1!=0 &&
+ (cs.sharedData.mbcs.extIndexes!=null ?
+ encoder.useSubChar1 :
+ (encoder.invalidUCharBuffer[0]<=0xff))
+ ) {
+ /* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */
+ subchar = new byte[1];
+ subchar[0]=cs.subChar1;
+ length=1;
+ } else {
+ /* select subChar in all other cases */
+ subchar=cs.subChar;
+ length=cs.subCharLen;
+ }
+
+ /* reset the selector for the next code point */
+ encoder.useSubChar1=false;
+
+ switch(cs.sharedData.mbcs.outputType) {
+ case MBCS_OUTPUT_2_SISO:
+ p=buffer;
+
+ /* fromUnicodeStatus contains prevLength */
+ switch(length) {
+ case 1:
+ if(encoder.fromUnicodeStatus==2) {
+ /* DBCS mode and SBCS sub char: change to SBCS */
+ encoder.fromUnicodeStatus=1;
+ p[i++]=UConverterConstants.SI;
+ }
+ p[i++]=subchar[0];
+ break;
+ case 2:
+ if(encoder.fromUnicodeStatus<=1) {
+ /* SBCS mode and DBCS sub char: change to DBCS */
+ encoder.fromUnicodeStatus=2;
+ p[i++]=UConverterConstants.SO;
+ }
+ p[i++]=subchar[0];
+ p[i++]=subchar[1];
+ break;
+ default:
+ throw new IllegalArgumentException();
+ }
+ return super.cbFromUWriteSub(encoder, source, target, offsets);
+ default:
+ return super.cbFromUWriteSub(encoder, source, target, offsets);
+ }
+ }
+ }
+
+
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderMBCS(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderMBCS(this);
+ }
+
+}
diff --git a/src/com/ibm/icu/charset/CharsetProviderICU.java b/src/com/ibm/icu/charset/CharsetProviderICU.java
new file mode 100644
index 0000000..6832184
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetProviderICU.java
@@ -0,0 +1,248 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.charset;
+
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+
+/**
+ * A concrete subclass of CharsetProvider for loading and providing charset converters
+ * in ICU.
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+public final class CharsetProviderICU extends CharsetProvider{
+
+ /**
+ * Default constructor
+ * @draft ICU 3.6
+ * @provisional This API might change or be removed in a future release.
+ */
+ public CharsetProviderICU() {
+ }
+
+ /**
+ * Constructs a charset for the given charset name.
+ * Implements the abstract method of super class.
+ * @param charsetName charset name
+ * @return charset objet for the given charset name, null if unsupported
+ * @stable ICU 3.6
+ */
+ public final Charset charsetForName(String charsetName){
+ try{
+ // get the canonical name
+ String icuCanonicalName = getICUCanonicalName(charsetName);
+
+ // create the converter object and return it
+ if(icuCanonicalName==null || icuCanonicalName.length()==0){
+ // this would make the Charset API to throw
+ // unsupported encoding exception
+ return null;
+ }
+ return getCharset(icuCanonicalName);
+ }catch(UnsupportedCharsetException ex){
+ }catch(IOException ex){
+ }
+ return null;
+ }
+ /**
+ * Gets the canonical name of the converter as defined by Java
+ * @param enc converter name
+ * @return canonical name of the converter
+ * @internal ICU 3.6
+ * @deprecated This API is for internal ICU use only
+ */
+ public static final String getICUCanonicalName(String enc)
+ throws UnsupportedCharsetException{
+ String canonicalName = null;
+ String ret = null;
+ try{
+ if(enc!=null){
+ if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){
+ ret = canonicalName;
+ }else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){
+ ret = canonicalName;
+ }else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){
+ ret = canonicalName;
+ }else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){
+ /* we have some aliases in the form x-blah .. match those first */
+ ret = canonicalName;
+ }else if(enc.indexOf("x-")==0){
+ /* TODO: Match with getJavaCanonicalName method */
+ /*
+ char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
+ strcpy(temp, encName+2);
+ */
+ ret = enc.substring(2);
+ }else{
+ /* unsupported encoding */
+ ret = "";
+ }
+ }
+ return ret;
+ }catch(IOException ex){
+ throw new UnsupportedCharsetException(enc);
+ }
+ }
+ private static final Charset getCharset(String icuCanonicalName) throws IOException{
+ String[] aliases = (String[])getAliases(icuCanonicalName);
+ String canonicalName = getJavaCanonicalName(icuCanonicalName);
+ return (CharsetICU.getCharset(icuCanonicalName,canonicalName, aliases));
+ }
+ /**
+ * Gets the canonical name of the converter as defined by Java
+ * @param icuCanonicalName converter name
+ * @return canonical name of the converter
+ * @internal ICU 3.6
+ */
+ private static String getJavaCanonicalName(String icuCanonicalName){
+ /*
+ If a charset listed in the IANA Charset Registry is supported by an implementation
+ of the Java platform then its canonical name must be the name listed in the registry.
+ Many charsets are given more than one name in the registry, in which case the registry
+ identifies one of the names as MIME-preferred. If a charset has more than one registry
+ name then its canonical name must be the MIME-preferred name and the other names in
+ the registry must be valid aliases. If a supported charset is not listed in the IANA
+ registry then its canonical name must begin with one of the strings "X-" or "x-".
+ */
+ if(icuCanonicalName==null ){
+ return null;
+ }
+ try{
+ String cName = null;
+ /* find out the alias with MIME tag */
+ if((cName=UConverterAlias.getStandardName(icuCanonicalName, "MIME"))!=null){
+ /* find out the alias with IANA tag */
+ }else if((cName=UConverterAlias.getStandardName(icuCanonicalName, "IANA"))!=null){
+ }else {
+ /*
+ check to see if an alias already exists with x- prefix, if yes then
+ make that the canonical name
+ */
+ int aliasNum = UConverterAlias.countAliases(icuCanonicalName);
+ String name;
+ for(int i=0;i<aliasNum;i++){
+ name = UConverterAlias.getAlias(icuCanonicalName, i);
+ if(name!=null && name.indexOf("x-")==0){
+ cName = name;
+ break;
+ }
+ }
+ /* last resort just append x- to any of the alias and
+ make it the canonical name */
+ if((cName==null || cName.length()==0)){
+ name = UConverterAlias.getStandardName(icuCanonicalName, "UTR22");
+ if(name==null && icuCanonicalName.indexOf(",")!=-1){
+ name = UConverterAlias.getAlias(icuCanonicalName, 1);
+ }
+ /* if there is no UTR22 canonical name .. then just return itself*/
+ if(name==null){
+ name = icuCanonicalName;
+ }
+ cName = "x-"+ name;
+ }
+ }
+ return cName;
+ }catch (IOException ex){
+
+ }
+ return null;
+ }
+
+ /**
+ * Gets the aliases associated with the converter name
+ * @param encName converter name
+ * @return converter names as elements in an object array
+ * @internal ICU 3.6
+ */
+ private static final String[] getAliases(String encName)throws IOException{
+ String[] ret = null;
+ int aliasNum = 0;
+ int i=0;
+ int j=0;
+ String aliasArray[/*50*/] = new String[50];
+
+ if(encName != null){
+ aliasNum = UConverterAlias.countAliases(encName);
+ for(i=0,j=0;i<aliasNum;i++){
+ String name = UConverterAlias.getAlias(encName,i);
+ if(name.indexOf('+')==-1 && name.indexOf(',')==-1){
+ aliasArray[j++]= name;
+ }
+ }
+ ret = new String[j];
+ for(;--j>=0;) {
+ ret[j] = aliasArray[j];
+ }
+
+ }
+ return (ret);
+
+ }
+
+ private static final void putCharsets(Map map){
+ int num = UConverterAlias.countAvailable();
+ for(int i=0;i<num;i++) {
+ String name = UConverterAlias.getAvailableName(i);
+ try {
+ Charset cs = getCharset(name);
+ map.put(cs, getJavaCanonicalName(name));
+ }catch(UnsupportedCharsetException ex){
+ }catch (IOException e) {
+ }
+ // add only charsets that can be created!
+ }
+ }
+
+ /**
+ * Returns an iterator for the available charsets.
+ * Implements the abstract method of super class.
+ * @return Iterator the charset name iterator
+ * @stable ICU 3.6
+ */
+ public final Iterator charsets(){
+ HashMap map = new HashMap();
+ putCharsets(map);
+ return map.keySet().iterator();
+ }
+
+ /**
+ * Gets the canonical names of available converters
+ * @return Object[] names as an object array
+ * @internal ICU 3.6
+ * @deprecated This API is for internal ICU use only
+ */
+ public static final Object[] getAvailableNames(){
+ HashMap map = new HashMap();
+ putCharsets(map);
+ return map.values().toArray();
+ }
+
+ /**
+ * Return all names available
+ * @return String[] an arrya of all available names
+ * @internal ICU 3.6
+ * @deprecated This API is for internal ICU use only
+ */
+ public static final String[] getAllNames(){
+ int num = UConverterAlias.countAvailable();
+ String[] names = new String[num];
+ for(int i=0;i<num;i++) {
+ names[i] = UConverterAlias.getAvailableName(i);
+ }
+ return names;
+ }
+}
diff --git a/src/com/ibm/icu/charset/CharsetUTF16.java b/src/com/ibm/icu/charset/CharsetUTF16.java
new file mode 100644
index 0000000..f77cee5
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetUTF16.java
@@ -0,0 +1,440 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+
+class CharsetUTF16 extends CharsetICU {
+
+ protected byte[] fromUSubstitution = new byte[]{(byte)0xff, (byte)0xfd};
+
+ public CharsetUTF16(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 4;
+ minBytesPerChar = 2;
+ maxCharsPerByte = 1;
+ }
+ class CharsetDecoderUTF16 extends CharsetDecoderICU{
+
+ public CharsetDecoderUTF16(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult cr = CoderResult.UNDERFLOW;
+ if(!source.hasRemaining() && toUnicodeStatus==0) {
+ /* no input, nothing to do */
+ return cr;
+ }
+ if(!target.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int sourceIndex=0, count=0, length, sourceArrayIndex;
+ char c=0, trail;
+ length = source.remaining();
+ sourceArrayIndex = source.position();
+
+ /* complete a partial UChar or pair from the last call */
+ if(toUnicodeStatus!=0) {
+ /*
+ * special case: single byte from a previous buffer,
+ * where the byte turned out not to belong to a trail surrogate
+ * and the preceding, unmatched lead surrogate was put into toUBytes[]
+ * for error handling
+ */
+ toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
+ toULength=1;
+ toUnicodeStatus=0;
+ }
+ if((count=toULength)!=0) {
+ byte[] pArray=toUBytesArray;
+ int pArrayIndex = toUBytesBegin;
+ do {
+ pArray[count++]=source.get(sourceArrayIndex++);
+ ++sourceIndex;
+ --length;
+ if(count==2) {
+ c=(char)(((pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK));
+ if(!UTF16.isSurrogate(c)) {
+ /* output the BMP code point */
+ target.put(c);
+ if(offsets!=null) {
+ offsets.put(-1);
+ }
+ count=0;
+ c=0;
+ break;
+ } else if(UTF16.isLeadSurrogate(c)) {
+ /* continue collecting bytes for the trail surrogate */
+ c=0; /* avoid unnecessary surrogate handling below */
+ } else {
+ /* fall through to error handling for an unmatched trail surrogate */
+ break;
+ }
+ } else if(count==4) {
+ c=(char)(((pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK));
+ trail=(char)(((pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK));
+ if(UTF16.isTrailSurrogate(trail)) {
+ /* output the surrogate pair */
+ target.put(c);
+ if(target.remaining()>=1) {
+ target.put(trail);
+ if(offsets!=null) {
+ offsets.put(-1);
+ offsets.put(-1);
+ }
+ } else /* targetCapacity==1 */ {
+ charErrorBufferArray[charErrorBufferBegin+0]=trail;
+ charErrorBufferLength=1;
+ return CoderResult.OVERFLOW;
+ }
+ count=0;
+ c=0;
+ break;
+ } else {
+ /* unmatched lead surrogate, handle here for consistent toUBytes[] */
+
+ /* back out reading the code unit after it */
+ if((source.position()-sourceArrayIndex)>=2) {
+ sourceArrayIndex-=2;
+ } else {
+ /*
+ * if the trail unit's first byte was in a previous buffer, then
+ * we need to put it into a special place because toUBytes[] will be
+ * used for the lead unit's bytes
+ */
+ toUnicodeStatus=0x100|pArray[pArrayIndex+2];
+ --sourceArrayIndex;
+ }
+ toULength=2;
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ break;
+ }
+ }
+ } while(length>0);
+ toULength=(byte)count;
+ }
+
+ /* copy an even number of bytes for complete UChars */
+ count=2*target.remaining();
+ if(count>length) {
+ count=length&~1;
+ }
+ if(c==0 && count>0) {
+ length-=count;
+ count>>=1;
+ //targetCapacity-=count;
+ if(offsets==null) {
+ do {
+ c=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK));
+ sourceArrayIndex+=2;
+ if(!UTF16.isSurrogate(c)) {
+ target.put(c);
+ } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+ UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+ ) {
+ sourceArrayIndex+=2;
+ --count;
+ target.put(c);
+ target.put(trail);
+ } else {
+ break;
+ }
+ } while(--count>0);
+ } else {
+ do {
+ c=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK));
+ sourceArrayIndex+=2;
+ if(!UTF16.isSurrogate(c)) {
+ target.put(c);
+ offsets.put(sourceIndex);
+ sourceIndex+=2;
+ } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+ UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+ ) {
+ sourceArrayIndex+=2;
+ --count;
+ target.put(c);
+ target.put(trail);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ sourceIndex+=4;
+ } else {
+ break;
+ }
+ } while(--count>0);
+ }
+
+ if(count==0) {
+ /* done with the loop for complete UChars */
+ c=0;
+ } else {
+ /* keep c for surrogate handling, trail will be set there */
+ length+=2*(count-1); /* one more byte pair was consumed than count decremented */
+ }
+ }
+
+ if(c!=0) {
+ /*
+ * c is a surrogate, and
+ * - source or target too short
+ * - or the surrogate is unmatched
+ */
+ toUBytesArray[toUBytesBegin+0]=(byte)(c>>>8);
+ toUBytesArray[toUBytesBegin+1]=(byte)c;
+ toULength=2;
+
+ if(UTF16.isLeadSurrogate(c)) {
+ if(length>=2) {
+ if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
+ /* output the surrogate pair, will overflow (see conditions comment above) */
+ sourceArrayIndex+=2;
+ length-=2;
+ target.put(c);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ charErrorBufferArray[charErrorBufferBegin+0]=trail;
+ charErrorBufferLength=1;
+ toULength=0;
+ cr = CoderResult.OVERFLOW;
+ } else {
+ /* unmatched lead surrogate */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ } else {
+ /* see if the trail surrogate is in the next buffer */
+ }
+ } else {
+ /* unmatched trail surrogate */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ }
+
+
+ /* check for a remaining source byte */
+ if(!cr.isError()){
+ if(length>0) {
+ if(!target.hasRemaining()) {
+ cr = CoderResult.OVERFLOW;
+ } else {
+ /* it must be length==1 because otherwise the above would have copied more */
+ toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
+ }
+ }
+ }
+ source.position(sourceArrayIndex);
+
+ return cr;
+ }
+
+ }
+ class CharsetEncoderUTF16 extends CharsetEncoderICU{
+
+ public CharsetEncoderUTF16(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ private final static int NEED_TO_WRITE_BOM = 1;
+
+ protected void implReset() {
+ super.implReset();
+ fromUnicodeStatus = NEED_TO_WRITE_BOM;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult cr = CoderResult.UNDERFLOW;
+ if(!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return cr;
+ }
+ char c;
+ /* write the BOM if necessary */
+ if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
+ byte bom[]={ (byte)0xfe, (byte)0xff };
+ cr = fromUWriteBytes(this,bom, 0, bom.length, target, offsets, -1);
+ if(cr.isError()){
+ return cr;
+ }
+ fromUnicodeStatus=0;
+ }
+
+ if(!target.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int sourceIndex = 0;
+ char trail = 0;
+ int length = source.remaining();
+ int sourceArrayIndex = source.position();
+
+ /* c!=0 indicates in several places outside the main loops that a surrogate was found */
+
+ if((c=(char)fromUChar32)!=0 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex)) && target.remaining()>=4) {
+ /* the last buffer ended with a lead surrogate, output the surrogate pair */
+ ++sourceArrayIndex;
+ --length;
+ target.put((byte)(c>>>8));
+ target.put((byte)c);
+ target.put((byte)(trail>>>8));
+ target.put((byte)trail);
+ if(offsets!=null && offsets.remaining()>=4) {
+ offsets.put(-1);
+ offsets.put(-1);
+ offsets.put(-1);
+ offsets.put(-1);
+ }
+ sourceIndex=1;
+ fromUChar32=c=0;
+ }
+ byte overflow[/*4*/] = new byte[4];
+
+ if(c==0) {
+ /* copy an even number of bytes for complete UChars */
+ int count=2*length;
+ int targetCapacity = target.remaining();
+ if(count>targetCapacity) {
+ count=targetCapacity&~1;
+ }
+ /* count is even */
+ targetCapacity-=count;
+ count>>=1;
+ length-=count;
+
+ if(offsets==null) {
+ while(count>0) {
+ c= source.get(sourceArrayIndex++);
+ if(!UTF16.isSurrogate(c)) {
+ target.put((byte)(c>>>8));
+ target.put((byte)c);
+
+ } else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+ ++sourceArrayIndex;
+ --count;
+ target.put((byte)(c>>>8));
+ target.put((byte)c);
+ target.put((byte)(trail>>>8));
+ target.put((byte)trail);
+ } else {
+ break;
+ }
+ --count;
+ }
+ } else {
+ while(count>0) {
+ c=source.get(sourceArrayIndex++);
+ if(!UTF16.isSurrogate(c)) {
+ target.put((byte)(c>>>8));
+ target.put((byte)c);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex++);
+ } else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+ ++sourceArrayIndex;
+ --count;
+ target.put((byte)(c>>>8));
+ target.put((byte)c);
+ target.put((byte)(trail>>>8));
+ target.put((byte)trail);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ sourceIndex+=2;
+ } else {
+ break;
+ }
+ --count;
+ }
+ }
+
+ if(count==0) {
+ /* done with the loop for complete UChars */
+ if(length>0 && targetCapacity>0) {
+ /*
+ * there is more input and some target capacity -
+ * it must be targetCapacity==1 because otherwise
+ * the above would have copied more;
+ * prepare for overflow output
+ */
+ if(!UTF16.isSurrogate(c=source.get(sourceArrayIndex++))) {
+ overflow[0]=(byte)(c>>>8);
+ overflow[1]=(byte)c;
+ length=2; /* 2 bytes to output */
+ c=0;
+ /* } else { keep c for surrogate handling, length will be set there */
+ }
+ } else {
+ length=0;
+ c=0;
+ }
+ } else {
+ /* keep c for surrogate handling, length will be set there */
+ targetCapacity+=2*count;
+ }
+ } else {
+ length=0; /* from here on, length counts the bytes in overflow[] */
+ }
+
+ if(c!=0) {
+ /*
+ * c is a surrogate, and
+ * - source or target too short
+ * - or the surrogate is unmatched
+ */
+ length=0;
+ if(UTF16.isLeadSurrogate(c)) {
+ if(sourceArrayIndex<source.limit()) {
+ if(UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+ /* output the surrogate pair, will overflow (see conditions comment above) */
+ ++sourceArrayIndex;
+ overflow[0]=(byte)(c>>>8);
+ overflow[1]=(byte)c;
+ overflow[2]=(byte)(trail>>>8);
+ overflow[3]=(byte)trail;
+ length=4; /* 4 bytes to output */
+ c=0;
+ } else {
+ /* unmatched lead surrogate */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ } else {
+ /* see if the trail surrogate is in the next buffer */
+ }
+ } else {
+ /* unmatched trail surrogate */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ fromUChar32=c;
+ }
+ source.position(sourceArrayIndex);
+ if(length>0) {
+ /* output length bytes with overflow (length>targetCapacity>0) */
+ cr = fromUWriteBytes(this, overflow, 0, length, target, offsets, sourceIndex);
+ }
+ return cr;
+ }
+ }
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderUTF16(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderUTF16(this);
+ }
+
+}
diff --git a/src/com/ibm/icu/charset/CharsetUTF16LE.java b/src/com/ibm/icu/charset/CharsetUTF16LE.java
new file mode 100644
index 0000000..bb2be61
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetUTF16LE.java
@@ -0,0 +1,444 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+
+/**
+ * @author Niti Hantaweepant
+ */
+class CharsetUTF16LE extends CharsetICU {
+
+ protected byte[] fromUSubstitution = new byte[]{(byte)0xfd, (byte)0xff};
+
+ public CharsetUTF16LE(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 4;
+ minBytesPerChar = 2;
+ maxCharsPerByte = 1;
+ }
+ class CharsetDecoderUTF16LE extends CharsetDecoderICU{
+
+ public CharsetDecoderUTF16LE(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult cr = CoderResult.UNDERFLOW;
+ if(!source.hasRemaining() && toUnicodeStatus==0) {
+ /* no input, nothing to do */
+ return cr;
+ }
+ if(!target.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int sourceIndex=0, count=0, length, sourceArrayIndex;
+ char c=0, trail;
+ length = source.remaining();
+ sourceArrayIndex = source.position();
+
+ /* complete a partial UChar or pair from the last call */
+ if(toUnicodeStatus!=0) {
+ /*
+ * special case: single byte from a previous buffer,
+ * where the byte turned out not to belong to a trail surrogate
+ * and the preceding, unmatched lead surrogate was put into toUBytes[]
+ * for error handling
+ */
+ toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
+ toULength=1;
+ toUnicodeStatus=0;
+ }
+ if((count=toULength)!=0) {
+ byte[] pArray=toUBytesArray;
+ int pArrayIndex = toUBytesBegin;
+ do {
+ pArray[count++]=source.get(sourceArrayIndex++);
+ ++sourceIndex;
+ --length;
+ if(count==2) {
+ c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
+ if(!UTF16.isSurrogate(c)) {
+ /* output the BMP code point */
+ target.put(c);
+ if(offsets!=null) {
+ offsets.put(-1);
+ }
+ count=0;
+ c=0;
+ break;
+ } else if(UTF16.isLeadSurrogate(c)) {
+ /* continue collecting bytes for the trail surrogate */
+ c=0; /* avoid unnecessary surrogate handling below */
+ } else {
+ /* fall through to error handling for an unmatched trail surrogate */
+ break;
+ }
+ } else if(count==4) {
+ c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
+ trail=(char)(((pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK));
+ if(UTF16.isTrailSurrogate(trail)) {
+ /* output the surrogate pair */
+ target.put(c);
+ if(target.remaining()>=1) {
+ target.put(trail);
+ if(offsets!=null) {
+ offsets.put(-1);
+ offsets.put(-1);
+ }
+ } else /* targetCapacity==1 */ {
+ charErrorBufferArray[charErrorBufferBegin+0]=trail;
+ charErrorBufferLength=1;
+ return CoderResult.OVERFLOW;
+ }
+ count=0;
+ c=0;
+ break;
+ } else {
+ /* unmatched lead surrogate, handle here for consistent toUBytes[] */
+
+ /* back out reading the code unit after it */
+ if((source.position()-sourceArrayIndex)>=2) {
+ sourceArrayIndex-=2;
+ } else {
+ /*
+ * if the trail unit's first byte was in a previous buffer, then
+ * we need to put it into a special place because toUBytes[] will be
+ * used for the lead unit's bytes
+ */
+ toUnicodeStatus=0x100|pArray[pArrayIndex+2];
+ --sourceArrayIndex;
+ }
+ toULength=2;
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ break;
+ }
+ }
+ } while(length>0);
+ toULength=(byte)count;
+ }
+
+ /* copy an even number of bytes for complete UChars */
+ count=2*target.remaining();
+ if(count>length) {
+ count=length&~1;
+ }
+ if(c==0 && count>0) {
+ length-=count;
+ count>>=1;
+ //targetCapacity-=count;
+ if(offsets==null) {
+ do {
+ c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
+ sourceArrayIndex+=2;
+ if(!UTF16.isSurrogate(c)) {
+ target.put(c);
+ } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+ UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+ ) {
+ sourceArrayIndex+=2;
+ --count;
+ target.put(c);
+ target.put(trail);
+ } else {
+ break;
+ }
+ } while(--count>0);
+ } else {
+ do {
+ c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
+ sourceArrayIndex+=2;
+ if(!UTF16.isSurrogate(c)) {
+ target.put(c);
+ offsets.put(sourceIndex);
+ sourceIndex+=2;
+ } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+ UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+ ) {
+ sourceArrayIndex+=2;
+ --count;
+ target.put(c);
+ target.put(trail);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ sourceIndex+=4;
+ } else {
+ break;
+ }
+ } while(--count>0);
+ }
+
+ if(count==0) {
+ /* done with the loop for complete UChars */
+ c=0;
+ } else {
+ /* keep c for surrogate handling, trail will be set there */
+ length+=2*(count-1); /* one more byte pair was consumed than count decremented */
+ }
+ }
+
+ if(c!=0) {
+ /*
+ * c is a surrogate, and
+ * - source or target too short
+ * - or the surrogate is unmatched
+ */
+
+ toUBytesArray[toUBytesBegin+0]=(byte)c;
+ toUBytesArray[toUBytesBegin+1]=(byte)(c>>>8);
+ toULength=2;
+
+ if(UTF16.isLeadSurrogate(c)) {
+ if(length>=2) {
+ if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
+ /* output the surrogate pair, will overflow (see conditions comment above) */
+ sourceArrayIndex+=2;
+ length-=2;
+ target.put(c);
+ if(offsets!=null) {
+ offsets.put(sourceIndex);
+ }
+ charErrorBufferArray[charErrorBufferBegin+0]=trail;
+ charErrorBufferLength=1;
+ toULength=0;
+ cr = CoderResult.OVERFLOW;
+ } else {
+ /* unmatched lead surrogate */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ } else {
+ /* see if the trail surrogate is in the next buffer */
+ }
+ } else {
+ /* unmatched trail surrogate */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ }
+
+
+ /* check for a remaining source byte */
+ if(!cr.isError()){
+ if(length>0) {
+ if(!target.hasRemaining()) {
+ cr = CoderResult.OVERFLOW;
+ } else {
+ /* it must be length==1 because otherwise the above would have copied more */
+ toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
+ }
+ }
+ }
+ source.position(sourceArrayIndex);
+
+ return cr;
+ }
+
+ }
+ class CharsetEncoderUTF16LE extends CharsetEncoderICU{
+
+ public CharsetEncoderUTF16LE(CharsetICU cs) {
+ super(cs, fromUSubstitution);
+ implReset();
+ }
+
+ private final static int NEED_TO_WRITE_BOM = 1;
+
+ protected void implReset() {
+ super.implReset();
+ fromUnicodeStatus = NEED_TO_WRITE_BOM;
+ }
+
+ protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult cr = CoderResult.UNDERFLOW;
+ if(!source.hasRemaining()) {
+ /* no input, nothing to do */
+ return cr;
+ }
+ char c;
+ /* write the BOM if necessary */
+ if(fromUnicodeStatus==NEED_TO_WRITE_BOM && writeBOM) {
+ byte bom[]={ (byte)0xff, (byte)0xfe };
+ cr = fromUWriteBytes(this,bom, 0, bom.length, target, offsets, -1);
+ if(cr.isError()){
+ return cr;
+ }
+ fromUnicodeStatus=0;
+ }
+
+ if(!target.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int sourceIndex = 0;
+ char trail = 0;
+ int length = source.remaining();
+ int sourceArrayIndex = source.position();
+
+ /* c!=0 indicates in several places outside the main loops that a surrogate was found */
+
+ if((c=(char)fromUChar32)!=0 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex)) && target.remaining()>=4) {
+ /* the last buffer ended with a lead surrogate, output the surrogate pair */
+ ++sourceArrayIndex;
+ --length;
+ target.put((byte)c);
+ target.put((byte)(c>>>8));
+ target.put((byte)trail);
+ target.put((byte)(trail>>>8));
+ if(offsets!=null && offsets.remaining()>=4) {
+ offsets.put(-1);
+ offsets.put(-1);
+ offsets.put(-1);
+ offsets.put(-1);
+ }
+ sourceIndex=1;
+ fromUChar32=c=0;
+ }
+ byte overflow[/*4*/] = new byte[4];
+
+ if(c==0) {
+ /* copy an even number of bytes for complete UChars */
+ int count=2*length;
+ int targetCapacity = target.remaining();
+ if(count>targetCapacity) {
+ count=targetCapacity&~1;
+ }
+ /* count is even */
+ targetCapacity-=count;
+ count>>=1;
+ length-=count;
+
+ if(offsets==null) {
+ while(count>0) {
+ c= source.get(sourceArrayIndex++);
+ if(!UTF16.isSurrogate(c)) {
+ target.put((byte)c);
+ target.put((byte)(c>>>8));
+
+ } else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+ ++sourceArrayIndex;
+ --count;
+ target.put((byte)c);
+ target.put((byte)(c>>>8));
+ target.put((byte)trail);
+ target.put((byte)(trail>>>8));
+ } else {
+ break;
+ }
+ --count;
+ }
+ } else {
+ while(count>0) {
+ c=source.get(sourceArrayIndex++);
+ if(!UTF16.isSurrogate(c)) {
+ target.put((byte)c);
+ target.put((byte)(c>>>8));
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex++);
+ } else if(UTF16.isLeadSurrogate(c) && count>=2 && UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+ ++sourceArrayIndex;
+ --count;
+ target.put((byte)c);
+ target.put((byte)(c>>>8));
+ target.put((byte)trail);
+ target.put((byte)(trail>>>8));
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ offsets.put(sourceIndex);
+ sourceIndex+=2;
+ } else {
+ break;
+ }
+ --count;
+ }
+ }
+
+ if(count==0) {
+ /* done with the loop for complete UChars */
+ if(length>0 && targetCapacity>0) {
+ /*
+ * there is more input and some target capacity -
+ * it must be targetCapacity==1 because otherwise
+ * the above would have copied more;
+ * prepare for overflow output
+ */
+ if(!UTF16.isSurrogate(c=source.get(sourceArrayIndex++))) {
+ overflow[0]=(byte)c;
+ overflow[1]=(byte)(c>>>8);
+ length=2; /* 2 bytes to output */
+ c=0;
+ /* } else { keep c for surrogate handling, length will be set there */
+ }
+ } else {
+ length=0;
+ c=0;
+ }
+ } else {
+ /* keep c for surrogate handling, length will be set there */
+ targetCapacity+=2*count;
+ }
+ } else {
+ length=0; /* from here on, length counts the bytes in overflow[] */
+ }
+
+ if(c!=0) {
+ /*
+ * c is a surrogate, and
+ * - source or target too short
+ * - or the surrogate is unmatched
+ */
+ length=0;
+ if(UTF16.isLeadSurrogate(c)) {
+ if(sourceArrayIndex<source.limit()) {
+ if(UTF16.isTrailSurrogate(trail=source.get(sourceArrayIndex))) {
+ /* output the surrogate pair, will overflow (see conditions comment above) */
+ ++sourceArrayIndex;
+ overflow[0]=(byte)c;
+ overflow[1]=(byte)(c>>>8);
+ overflow[2]=(byte)trail;
+ overflow[3]=(byte)(trail>>>8);
+ length=4; /* 4 bytes to output */
+ c=0;
+ } else {
+ /* unmatched lead surrogate */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ } else {
+ /* see if the trail surrogate is in the next buffer */
+ }
+ } else {
+ /* unmatched trail surrogate */
+ cr = CoderResult.malformedForLength(sourceArrayIndex);
+ }
+ fromUChar32=c;
+ }
+ source.position(sourceArrayIndex);
+ if(length>0) {
+ /* output length bytes with overflow (length>targetCapacity>0) */
+ cr = fromUWriteBytes(this, overflow, 0, length, target, offsets, sourceIndex);
+ }
+ return cr;
+ }
+ }
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoderUTF16LE(this);
+ }
+
+ public CharsetEncoder newEncoder() {
+ return new CharsetEncoderUTF16LE(this);
+ }
+
+}
diff --git a/src/com/ibm/icu/charset/CharsetUTF32.java b/src/com/ibm/icu/charset/CharsetUTF32.java
new file mode 100644
index 0000000..a42996b
--- /dev/null
+++ b/src/com/ibm/icu/charset/CharsetUTF32.java
@@ -0,0 +1,309 @@
+/**
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*
+*******************************************************************************
+*/
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+import com.ibm.icu.text.UTF16;
+/**
+ * @author Niti Hantaweepant
+ */
+class CharsetUTF32 extends CharsetICU {
+
+ protected byte[] fromUSubstitution = new byte[]{(byte)0, (byte)0, (byte)0xff, (byte)0xfd};
+
+ public CharsetUTF32(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+ super(icuCanonicalName, javaCanonicalName, aliases);
+ maxBytesPerChar = 4;
+ minBytesPerChar = 4;
+ maxCharsPerByte = 1;
+ }
+ class CharsetDecoderUTF32 extends CharsetDecoderICU{
+
+ public CharsetDecoderUTF32(CharsetICU cs) {
+ super(cs);
+ }
+
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+ CoderResult cr = CoderResult.UNDERFLOW;
+
+ int sourceArrayIndex = source.position();
+ int ch, i;
+
+ donefornow:
+ {
+ /* UTF-8 returns here for only non-offset, this needs to change.*/
+ if (toUnicodeStatus != 0 && target.hasRemaining()) {
+ i = toULength; /* res