main/classes/collate/src/com/ibm/icu/text/Collator.java - external/github.com/unicode-org/icu - Git at Google

 /**
 *******************************************************************************
 * Copyright (C) 1996-2015, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
 package com.ibm.icu.text;

 import java.util.Comparator;
 import java.util.Enumeration;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.Locale;
 import java.util.MissingResourceException;
 import java.util.Set;

 import com.ibm.icu.impl.ICUDebug;
 import com.ibm.icu.impl.ICUResourceBundle;
 import com.ibm.icu.impl.coll.CollationData;
 import com.ibm.icu.impl.coll.CollationRoot;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.util.Freezable;
 import com.ibm.icu.util.ICUException;
 import com.ibm.icu.util.ULocale;
 import com.ibm.icu.util.ULocale.Category;
 import com.ibm.icu.util.UResourceBundle;
 import com.ibm.icu.util.VersionInfo;

 /**
 * {@icuenhanced java.text.Collator}.{@icu _usage_}
 *
 * <p>Collator performs locale-sensitive string comparison. A concrete
 * subclass, RuleBasedCollator, allows customization of the collation
 * ordering by the use of rule sets.</p>
 *
 * <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link Freezable}.
 *
 * <p>Following the <a href=http://www.unicode.org>Unicode
 * Consortium</a>'s specifications for the
 * <a href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation
 * Algorithm (UCA)</a>, there are 5 different levels of strength used
 * in comparisons:
 *
 * <ul>
 * <li>PRIMARY strength: Typically, this is used to denote differences between
 *     base characters (for example, "a" &lt; "b").
 *     It is the strongest difference. For example, dictionaries are divided
 *     into different sections by base character.
 * <li>SECONDARY strength: Accents in the characters are considered secondary
 *     differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other
 *     differences
 *     between letters can also be considered secondary differences, depending
 *     on the language. A secondary difference is ignored when there is a
 *     primary difference anywhere in the strings.
 * <li>TERTIARY strength: Upper and lower case differences in characters are
 *     distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt;
 *     "a&ograve;"). In addition, a variant of a letter differs from the base
 *     form on the tertiary strength (such as "A" and "&#9398;"). Another
 *     example is the
 *     difference between large and small Kana. A tertiary difference is ignored
 *     when there is a primary or secondary difference anywhere in the strings.
 * <li>QUATERNARY strength: When punctuation is ignored
 *     (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
 *     Ignoring Punctuations in the User Guide</a>) at PRIMARY to TERTIARY
 *     strength, an additional strength level can
 *     be used to distinguish words with and without punctuation (for example,
 *     "ab" &lt; "a-b" &lt; "aB").
 *     This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
 *     difference. The QUATERNARY strength should only be used if ignoring
 *     punctuation is required.
 * <li>IDENTICAL strength:
 *     When all other strengths are equal, the IDENTICAL strength is used as a
 *     tiebreaker. The Unicode code point values of the NFD form of each string
 *     are compared, just in case there is no difference.
 *     For example, Hebrew cantellation marks are only distinguished at this
 *     strength. This strength should be used sparingly, as only code point
 *     value differences between two strings is an extremely rare occurrence.
 *     Using this strength substantially decreases the performance for both
 *     comparison and collation key generation APIs. This strength also
 *     increases the size of the collation key.
 * </ul>
 *
 * Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
 * the canonical decomposition mode and one that does not use any decomposition.
 * The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
 * is not supported here. If the canonical
 * decomposition mode is set, the Collator handles un-normalized text properly,
 * producing the same results as if the text were normalized in NFD. If
 * canonical decomposition is turned off, it is the user's responsibility to
 * ensure that all text is already in the appropriate form before performing
 * a comparison or before getting a CollationKey.</p>
 *
 * <p>For more information about the collation service see the
 * <a href="http://userguide.icu-project.org/collation">User Guide</a>.</p>
 *
 * <p>Examples of use
 * <pre>
 * // Get the Collator for US English and set its strength to PRIMARY
 * Collator usCollator = Collator.getInstance(Locale.US);
 * usCollator.setStrength(Collator.PRIMARY);
 * if (usCollator.compare("abc", "ABC") == 0) {
 *     System.out.println("Strings are equivalent");
 * }
 *
 * The following example shows how to compare two strings using the
 * Collator for the default locale.
 *
 * // Compare two strings in the default locale
 * Collator myCollator = Collator.getInstance();
 * myCollator.setDecomposition(NO_DECOMPOSITION);
 * if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
 *     System.out.println("&agrave;&#92;u0325 is not equals to a&#92;u0325&#768; without decomposition");
 *     myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
 *     if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
 *         System.out.println("Error: &agrave;&#92;u0325 should be equals to a&#92;u0325&#768; with decomposition");
 *     }
 *     else {
 *         System.out.println("&agrave;&#92;u0325 is equals to a&#92;u0325&#768; with decomposition");
 *     }
 * }
 * else {
 *     System.out.println("Error: &agrave;&#92;u0325 should be not equals to a&#92;u0325&#768; without decomposition");
 * }
 * </pre>
 * </p>
 * @see RuleBasedCollator
 * @see CollationKey
 * @author Syn Wee Quek
 * @stable ICU 2.8
 */
 public abstract class Collator implements Comparator<Object>, Freezable<Collator>, Cloneable
 {
     // public data members ---------------------------------------------------

     /**
      * Strongest collator strength value. Typically used to denote differences
      * between base characters. See class documentation for more explanation.
      * @see #setStrength
      * @see #getStrength
      * @stable ICU 2.8
      */
     public final static int PRIMARY = 0;

     /**
      * Second level collator strength value.
      * Accents in the characters are considered secondary differences.
      * Other differences between letters can also be considered secondary
      * differences, depending on the language.
      * See class documentation for more explanation.
      * @see #setStrength
      * @see #getStrength
      * @stable ICU 2.8
      */
     public final static int SECONDARY = 1;

     /**
      * Third level collator strength value.
      * Upper and lower case differences in characters are distinguished at this
      * strength level. In addition, a variant of a letter differs from the base
      * form on the tertiary level.
      * See class documentation for more explanation.
      * @see #setStrength
      * @see #getStrength
      * @stable ICU 2.8
      */
     public final static int TERTIARY = 2;

     /**
      * {@icu} Fourth level collator strength value.
      * When punctuation is ignored
      * (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
      * Ignoring Punctuation in the User Guide</a>) at PRIMARY to TERTIARY
      * strength, an additional strength level can
      * be used to distinguish words with and without punctuation.
      * See class documentation for more explanation.
      * @see #setStrength
      * @see #getStrength
      * @stable ICU 2.8
      */
     public final static int QUATERNARY = 3;

     /**
      * Smallest Collator strength value. When all other strengths are equal,
      * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
      * values of the NFD form of each string are compared, just in case there
      * is no difference.
      * See class documentation for more explanation.
      * </p>
      * <p>
      * Note this value is different from JDK's
      * </p>
      * @stable ICU 2.8
      */
     public final static int IDENTICAL = 15;

     /**
      * {@icunote} This is for backwards compatibility with Java APIs only.  It
      * should not be used, IDENTICAL should be used instead.  ICU's
      * collation does not support Java's FULL_DECOMPOSITION mode.
      * @stable ICU 3.4
      */
     public final static int FULL_DECOMPOSITION = IDENTICAL;

     /**
      * Decomposition mode value. With NO_DECOMPOSITION set, Strings
      * will not be decomposed for collation. This is the default
      * decomposition setting unless otherwise specified by the locale
      * used to create the Collator.</p>
      *
      * <p><strong>Note</strong> this value is different from the JDK's.</p>
      * @see #CANONICAL_DECOMPOSITION
      * @see #getDecomposition
      * @see #setDecomposition
      * @stable ICU 2.8
      */
     public final static int NO_DECOMPOSITION = 16;

     /**
      * Decomposition mode value. With CANONICAL_DECOMPOSITION set,
      * characters that are canonical variants according to the Unicode standard
      * will be decomposed for collation.</p>
      *
      * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
      * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
      * Unicode Technical Report #15</a>.
      * </p>
      * @see #NO_DECOMPOSITION
      * @see #getDecomposition
      * @see #setDecomposition
      * @stable ICU 2.8
      */
     public final static int CANONICAL_DECOMPOSITION = 17;

     /**
      * Reordering codes for non-script groups that can be reordered under collation.
      *
      * @see #getReorderCodes
      * @see #setReorderCodes
      * @see #getEquivalentReorderCodes
      * @stable ICU 4.8
      */
     public static interface ReorderCodes {
         /**
          * A special reordering code that is used to specify the default reordering codes for a locale.
          * @stable ICU 4.8
          */
         public final static int DEFAULT          = -1;  // == UScript.INVALID_CODE
         /**
          * A special reordering code that is used to specify no reordering codes.
          * @stable ICU 4.8
          */
         public final static int NONE          = UScript.UNKNOWN;
         /**
          * A special reordering code that is used to specify all other codes used for reordering except
          * for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
          * @stable ICU 4.8
          */
         public final static int OTHERS          = UScript.UNKNOWN;
         /**
          * Characters with the space property.
          * This is equivalent to the rule value "space".
          * @stable ICU 4.8
          */
         public final static int SPACE          = 0x1000;
         /**
          * The first entry in the enumeration of reordering groups. This is intended for use in
          * range checking and enumeration of the reorder codes.
          * @stable ICU 4.8
          */
         public final static int FIRST          = SPACE;
         /**
          * Characters with the punctuation property.
          * This is equivalent to the rule value "punct".
          * @stable ICU 4.8
          */
         public final static int PUNCTUATION    = 0x1001;
         /**
          * Characters with the symbol property.
          * This is equivalent to the rule value "symbol".
          * @stable ICU 4.8
          */
         public final static int SYMBOL         = 0x1002;
         /**
          * Characters with the currency property.
          * This is equivalent to the rule value "currency".
          * @stable ICU 4.8
          */
         public final static int CURRENCY       = 0x1003;
         /**
          * Characters with the digit property.
          * This is equivalent to the rule value "digit".
          * @stable ICU 4.8
          */
         public final static int DIGIT          = 0x1004;
         /**
          * The limit of the reorder codes. This is intended for use in range checking
          * and enumeration of the reorder codes.
          * @stable ICU 4.8
          */
         public final static int LIMIT          = 0x1005;
     }

     // public methods --------------------------------------------------------

     /**
      * Compares the equality of two Collator objects. Collator objects are equal if they have the same
      * collation (sorting & searching) behavior.
      *
      * <p>The base class checks for null and for equal types.
      * Subclasses should override.
      *
      * @param obj the Collator to compare to.
      * @return true if this Collator has exactly the same collation behavior as obj, false otherwise.
      * @stable ICU 2.8
      */
     @Override
     public boolean equals(Object obj) {
         // Subclasses: Call this method and then add more specific checks.
         return this == obj || (obj != null && getClass() == obj.getClass());
     }

     // public setters --------------------------------------------------------

     private void checkNotFrozen() {
         if (isFrozen()) {
             throw new UnsupportedOperationException("Attempt to modify frozen Collator");
         }
     }

     /**
      * Sets this Collator's strength attribute. The strength attribute
      * determines the minimum level of difference considered significant
      * during comparison.</p>
      *
      * <p>The base class method does nothing. Subclasses should override it if appropriate.
      *
      * <p>See the Collator class description for an example of use.</p>
      * @param newStrength the new strength value.
      * @see #getStrength
      * @see #PRIMARY
      * @see #SECONDARY
      * @see #TERTIARY
      * @see #QUATERNARY
      * @see #IDENTICAL
      * @throws IllegalArgumentException if the new strength value is not valid.
      * @stable ICU 2.8
      */
     public void setStrength(int newStrength)
     {
         checkNotFrozen();
     }

     /**
      * @return this, for chaining
      * @internal Used in UnicodeTools
      * @deprecated This API is ICU internal only.
      */
     @Deprecated
     public Collator setStrength2(int newStrength)
     {
         setStrength(newStrength);
         return this;
     }

     /**
      * Sets the decomposition mode of this Collator.  Setting this
      * decomposition attribute with CANONICAL_DECOMPOSITION allows the
      * Collator to handle un-normalized text properly, producing the
      * same results as if the text were normalized. If
      * NO_DECOMPOSITION is set, it is the user's responsibility to
      * insure that all text is already in the appropriate form before
      * a comparison or before getting a CollationKey. Adjusting
      * decomposition mode allows the user to select between faster and
      * more complete collation behavior.</p>
      *
      * <p>Since a great many of the world's languages do not require
      * text normalization, most locales set NO_DECOMPOSITION as the
      * default decomposition mode.</p>
      *
      * <p>The base class method does nothing. Subclasses should override it if appropriate.
      *
      * <p>See getDecomposition for a description of decomposition
      * mode.</p>
      *
      * @param decomposition the new decomposition mode
      * @see #getDecomposition
      * @see #NO_DECOMPOSITION
      * @see #CANONICAL_DECOMPOSITION
      * @throws IllegalArgumentException If the given value is not a valid
      *            decomposition mode.
      * @stable ICU 2.8
      */
     public void setDecomposition(int decomposition)
     {
         checkNotFrozen();
     }

     /**
      * Sets the reordering codes for this collator.
      * Collation reordering allows scripts and some other groups of characters
      * to be moved relative to each other. This reordering is done on top of
      * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
      * at the start and/or the end of the collation order. These groups are specified using
      * UScript codes and {@link Collator.ReorderCodes} entries.
      *
      * <p>By default, reordering codes specified for the start of the order are placed in the
      * order given after several special non-script blocks. These special groups of characters
      * are space, punctuation, symbol, currency, and digit. These special groups are represented with
      * {@link Collator.ReorderCodes} entries. Script groups can be intermingled with
      * these special non-script groups if those special groups are explicitly specified in the reordering.
      *
      * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS}
      * stands for any script that is not explicitly
      * mentioned in the list of reordering codes given. Anything that is after OTHERS
      * will go at the very end of the reordering in the order given.
      *
      * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT}
      * will reset the reordering for this collator
      * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
      * was specified when this collator was created from resource data or from rules. The
      * DEFAULT code <b>must</b> be the sole code supplied when it is used.
      * If not, then an {@link IllegalArgumentException} will be thrown.
      *
      * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE}
      * will remove any reordering for this collator.
      * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
      * NONE code <b>must</b> be the sole code supplied when it is used.
      *
      * @param order the reordering codes to apply to this collator; if this is null or an empty array
      * then this clears any existing reordering
      * @see #getReorderCodes
      * @see #getEquivalentReorderCodes
      * @see Collator.ReorderCodes
      * @see UScript
      * @stable ICU 4.8
      */
     public void setReorderCodes(int... order)
     {
         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
     }

     // public getters --------------------------------------------------------

     /**
      * Returns the Collator for the current default locale.
      * The default locale is determined by java.util.Locale.getDefault().
      * @return the Collator for the default locale (for example, en_US) if it
      *         is created successfully. Otherwise if there is no Collator
      *         associated with the current locale, the root collator
      *         will be returned.
      * @see java.util.Locale#getDefault()
      * @see #getInstance(Locale)
      * @stable ICU 2.8
      */
     public static final Collator getInstance()
     {
         return getInstance(ULocale.getDefault());
     }

     /**
      * Clones the collator.
      * @stable ICU 2.6
      * @return a clone of this collator.
      */
     public Object clone() throws CloneNotSupportedException {
         return super.clone();
     }

     // begin registry stuff

     /**
      * A factory used with registerFactory to register multiple collators and provide
      * display names for them.  If standard locale display names are sufficient,
      * Collator instances may be registered instead.
      * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses
      * ULocale instead of Locale.  Instead of overriding createCollator(Locale),
      * new implementations should override createCollator(ULocale).  Note that
      * one of these two methods <b>MUST</b> be overridden or else an infinite
      * loop will occur.
      * @stable ICU 2.6
      */
     public static abstract class CollatorFactory {
         /**
          * Return true if this factory will be visible.  Default is true.
          * If not visible, the locales supported by this factory will not
          * be listed by getAvailableLocales.
          *
          * @return true if this factory is visible
          * @stable ICU 2.6
          */
         public boolean visible() {
             return true;
         }

         /**
          * Return an instance of the appropriate collator.  If the locale
          * is not supported, return null.
          * <b>Note:</b> as of ICU4J 3.2, implementations should override
          * this method instead of createCollator(Locale).
          * @param loc the locale for which this collator is to be created.
          * @return the newly created collator.
          * @stable ICU 3.2
          */
         public Collator createCollator(ULocale loc) {
             return createCollator(loc.toLocale());
         }

         /**
          * Return an instance of the appropriate collator.  If the locale
          * is not supported, return null.
          * <p><b>Note:</b> as of ICU4J 3.2, implementations should override
          * createCollator(ULocale) instead of this method, and inherit this
          * method's implementation.  This method is no longer abstract
          * and instead delegates to createCollator(ULocale).
          * @param loc the locale for which this collator is to be created.
          * @return the newly created collator.
          * @stable ICU 2.6
          */
          public Collator createCollator(Locale loc) {
             return createCollator(ULocale.forLocale(loc));
         }

         /**
          * Return the name of the collator for the objectLocale, localized for the displayLocale.
          * If objectLocale is not visible or not defined by the factory, return null.
          * @param objectLocale the locale identifying the collator
          * @param displayLocale the locale for which the display name of the collator should be localized
          * @return the display name
          * @stable ICU 2.6
          */
         public String getDisplayName(Locale objectLocale, Locale displayLocale) {
             return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
         }

         /**
          * Return the name of the collator for the objectLocale, localized for the displayLocale.
          * If objectLocale is not visible or not defined by the factory, return null.
          * @param objectLocale the locale identifying the collator
          * @param displayLocale the locale for which the display name of the collator should be localized
          * @return the display name
          * @stable ICU 3.2
          */
         public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
             if (visible()) {
                 Set<String> supported = getSupportedLocaleIDs();
                 String name = objectLocale.getBaseName();
                 if (supported.contains(name)) {
                     return objectLocale.getDisplayName(displayLocale);
                 }
             }
             return null;
         }

         /**
          * Return an unmodifiable collection of the locale names directly
          * supported by this factory.
          *
          * @return the set of supported locale IDs.
          * @stable ICU 2.6
          */
         public abstract Set<String> getSupportedLocaleIDs();

         /**
          * Empty default constructor.
          * @stable ICU 2.6
          */
         protected CollatorFactory() {
         }
     }

     static abstract class ServiceShim {
         abstract Collator getInstance(ULocale l);
         abstract Object registerInstance(Collator c, ULocale l);
         abstract Object registerFactory(CollatorFactory f);
         abstract boolean unregister(Object k);
         abstract Locale[] getAvailableLocales(); // TODO remove
         abstract ULocale[] getAvailableULocales();
         abstract String getDisplayName(ULocale ol, ULocale dl);
     }

     private static ServiceShim shim;
     private static ServiceShim getShim() {
         // Note: this instantiation is safe on loose-memory-model configurations
         // despite lack of synchronization, since the shim instance has no state--
         // it's all in the class init.  The worst problem is we might instantiate
         // two shim instances, but they'll share the same state so that's ok.
         if (shim == null) {
             try {
                 Class<?> cls = Class.forName("com.ibm.icu.text.CollatorServiceShim");
                 shim = (ServiceShim)cls.newInstance();
             }
             catch (MissingResourceException e)
             {
                 ///CLOVER:OFF
                 throw e;
                 ///CLOVER:ON
             }
             catch (Exception e) {
                 ///CLOVER:OFF
                 if(DEBUG){
                     e.printStackTrace();
                 }
                 throw new ICUException(e);
                 ///CLOVER:ON
             }
         }
         return shim;
     }

     /**
      * Simpler/faster methods for ASCII than ones based on Unicode data.
      * TODO: There should be code like this somewhere already??
      */
     private static final class ASCII {
         static boolean equalIgnoreCase(CharSequence left, CharSequence right) {
             int length = left.length();
             if (length != right.length()) { return false; }
             for (int i = 0; i < length; ++i) {
                 char lc = left.charAt(i);
                 char rc = right.charAt(i);
                 if (lc == rc) { continue; }
                 if ('A' <= lc && lc <= 'Z') {
                     if ((lc + 0x20) == rc) { continue; }
                 } else if ('A' <= rc && rc <= 'Z') {
                     if ((rc + 0x20) == lc) { continue; }
                 }
                 return false;
             }
             return true;
         }
     }

     private static final boolean getYesOrNo(String keyword, String s) {
         if (ASCII.equalIgnoreCase(s, "yes")) {
             return true;
         }
         if (ASCII.equalIgnoreCase(s, "no")) {
             return false;
         }
         throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
     }

     private static final int getIntValue(String keyword, String s, String... values) {
         for (int i = 0; i < values.length; ++i) {
             if (ASCII.equalIgnoreCase(s, values[i])) {
                 return i;
             }
         }
         throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
     }

     private static final int getReorderCode(String keyword, String s) {
         return Collator.ReorderCodes.FIRST +
                 getIntValue(keyword, s, "space", "punct", "symbol", "currency", "digit");
         // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
         // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
         // Avoid introducing synonyms/aliases.
     }

     /**
      * Sets collation attributes according to locale keywords. See
      * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
      *
      * Using "alias" keywords and values where defined:
      * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
      * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
      */
     private static void setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc) {
         // Check for collation keywords that were already deprecated
         // before any were supported in createInstance() (except for "collation").
         String value = loc.getKeywordValue("colHiraganaQuaternary");
         if (value != null) {
             throw new UnsupportedOperationException("locale keyword kh/colHiraganaQuaternary");
         }
         value = loc.getKeywordValue("variableTop");
         if (value != null) {
             throw new UnsupportedOperationException("locale keyword vt/variableTop");
         }
         // Parse known collation keywords, ignore others.
         value = loc.getKeywordValue("colStrength");
         if (value != null) {
             // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
             int strength = getIntValue("colStrength", value,
                     "primary", "secondary", "tertiary", "quaternary", "identical");
             coll.setStrength(strength <= Collator.QUATERNARY ? strength : Collator.IDENTICAL);
         }
         value = loc.getKeywordValue("colBackwards");
         if (value != null) {
             if (rbc != null) {
                 rbc.setFrenchCollation(getYesOrNo("colBackwards", value));
             } else {
                 throw new UnsupportedOperationException(
                         "locale keyword kb/colBackwards only settable for RuleBasedCollator");
             }
         }
         value = loc.getKeywordValue("colCaseLevel");
         if (value != null) {
             if (rbc != null) {
                 rbc.setCaseLevel(getYesOrNo("colCaseLevel", value));
             } else {
                 throw new UnsupportedOperationException(
                         "locale keyword kb/colBackwards only settable for RuleBasedCollator");
             }
         }
         value = loc.getKeywordValue("colCaseFirst");
         if (value != null) {
             if (rbc != null) {
                 int cf = getIntValue("colCaseFirst", value, "no", "lower", "upper");
                 if (cf == 0) {
                     rbc.setLowerCaseFirst(false);
                     rbc.setUpperCaseFirst(false);
                 } else if (cf == 1) {
                     rbc.setLowerCaseFirst(true);
                 } else /* cf == 2 */ {
                     rbc.setUpperCaseFirst(true);
                 }
             } else {
                 throw new UnsupportedOperationException(
                         "locale keyword kf/colCaseFirst only settable for RuleBasedCollator");
             }
         }
         value = loc.getKeywordValue("colAlternate");
         if (value != null) {
             if (rbc != null) {
                 rbc.setAlternateHandlingShifted(
                         getIntValue("colAlternate", value, "non-ignorable", "shifted") != 0);
             } else {
                 throw new UnsupportedOperationException(
                         "locale keyword ka/colAlternate only settable for RuleBasedCollator");
             }
         }
         value = loc.getKeywordValue("colNormalization");
         if (value != null) {
             coll.setDecomposition(getYesOrNo("colNormalization", value) ?
                     Collator.CANONICAL_DECOMPOSITION : Collator.NO_DECOMPOSITION);
         }
         value = loc.getKeywordValue("colNumeric");
         if (value != null) {
             if (rbc != null) {
                 rbc.setNumericCollation(getYesOrNo("colNumeric", value));
             } else {
                 throw new UnsupportedOperationException(
                         "locale keyword kn/colNumeric only settable for RuleBasedCollator");
             }
         }
         value = loc.getKeywordValue("colReorder");
         if (value != null) {
             int[] codes = new int[UScript.CODE_LIMIT + Collator.ReorderCodes.LIMIT - Collator.ReorderCodes.FIRST];
             int codesLength = 0;
             int scriptNameStart = 0;
             for (;;) {
                 if (codesLength == codes.length) {
                     throw new IllegalArgumentException(
                             "too many script codes for colReorder locale keyword: " + value);
                 }
                 int limit = scriptNameStart;
                 while (limit < value.length() && value.charAt(limit) != '-') { ++limit; }
                 String scriptName = value.substring(scriptNameStart, limit);
                 int code;
                 if (scriptName.length() == 4) {
                     // Strict parsing, accept only 4-letter script codes, not long names.
                     code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptName);
                 } else {
                     code = getReorderCode("colReorder", scriptName);
                 }
                 codes[codesLength++] = code;
                 if (limit == value.length()) { break; }
                 scriptNameStart = limit + 1;
             }
             if (codesLength == 0) {
                 throw new IllegalArgumentException("no script codes for colReorder locale keyword");
             }
             int[] args = new int[codesLength];
             System.arraycopy(codes, 0, args, 0, codesLength);
             coll.setReorderCodes(args);
         }
         value = loc.getKeywordValue("kv");
         if (value != null) {
             coll.setMaxVariable(getReorderCode("kv", value));
         }
     }

     /**
      * {@icu} Returns the Collator for the desired locale.
      *
      * <p>For some languages, multiple collation types are available;
      * for example, "de@collation=phonebook".
      * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
      * in the old locale extension syntax ("el@colCaseFirst=upper")
      * or in language tag syntax ("el-u-kf-upper").
      * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
      *
      * @param locale the desired locale.
      * @return Collator for the desired locale if it is created successfully.
      *         Otherwise if there is no Collator
      *         associated with the current locale, the root collator will
      *         be returned.
      * @see java.util.Locale
      * @see java.util.ResourceBundle
      * @see #getInstance(Locale)
      * @see #getInstance()
      * @stable ICU 3.0
      */
     public static final Collator getInstance(ULocale locale) {
         // fetching from service cache is faster than instantiation
         if (locale == null) {
             locale = ULocale.getDefault();
         }
         Collator coll = getShim().getInstance(locale);
         if (!locale.getName().equals(locale.getBaseName())) {  // any keywords?
             setAttributesFromKeywords(locale, coll,
                     (coll instanceof RuleBasedCollator) ? (RuleBasedCollator)coll : null);
         }
         return coll;
     }

     /**
      * Returns the Collator for the desired locale.
      *
      * <p>For some languages, multiple collation types are available;
      * for example, "de-u-co-phonebk".
      * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
      * in the old locale extension syntax ("el@colCaseFirst=upper", only with {@link ULocale})
      * or in language tag syntax ("el-u-kf-upper").
      * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
      *
      * @param locale the desired locale.
      * @return Collator for the desired locale if it is created successfully.
      *         Otherwise if there is no Collator
      *         associated with the current locale, the root collator will
      *         be returned.
      * @see java.util.Locale
      * @see java.util.ResourceBundle
      * @see #getInstance(ULocale)
      * @see #getInstance()
      * @stable ICU 2.8
      */
     public static final Collator getInstance(Locale locale) {
         return getInstance(ULocale.forLocale(locale));
     }

     /**
      * {@icu} Registers a collator as the default collator for the provided locale.  The
      * collator should not be modified after it is registered.
      *
      * <p>Because ICU may choose to cache Collator objects internally, this must
      * be called at application startup, prior to any calls to
      * Collator.getInstance to avoid undefined behavior.
      *
      * @param collator the collator to register
      * @param locale the locale for which this is the default collator
      * @return an object that can be used to unregister the registered collator.
      *
      * @stable ICU 3.2
      */
     public static final Object registerInstance(Collator collator, ULocale locale) {
         return getShim().registerInstance(collator, locale);
     }

     /**
      * {@icu} Registers a collator factory.
      *
      * <p>Because ICU may choose to cache Collator objects internally, this must
      * be called at application startup, prior to any calls to
      * Collator.getInstance to avoid undefined behavior.
      *
      * @param factory the factory to register
      * @return an object that can be used to unregister the registered factory.
      *
      * @stable ICU 2.6
      */
     public static final Object registerFactory(CollatorFactory factory) {
         return getShim().registerFactory(factory);
     }

     /**
      * {@icu} Unregisters a collator previously registered using registerInstance.
      * @param registryKey the object previously returned by registerInstance.
      * @return true if the collator was successfully unregistered.
      * @stable ICU 2.6
      */
     public static final boolean unregister(Object registryKey) {
         if (shim == null) {
             return false;
         }
         return shim.unregister(registryKey);
     }

     /**
      * Returns the set of locales, as Locale objects, for which collators
      * are installed.  Note that Locale objects do not support RFC 3066.
      * @return the list of locales in which collators are installed.
      * This list includes any that have been registered, in addition to
      * those that are installed with ICU4J.
      * @stable ICU 2.4
      */
     public static Locale[] getAvailableLocales() {
         // TODO make this wrap getAvailableULocales later
         if (shim == null) {
             return ICUResourceBundle.getAvailableLocales(
                 ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
         }
         return shim.getAvailableLocales();
     }

     /**
      * {@icu} Returns the set of locales, as ULocale objects, for which collators
      * are installed.  ULocale objects support RFC 3066.
      * @return the list of locales in which collators are installed.
      * This list includes any that have been registered, in addition to
      * those that are installed with ICU4J.
      * @stable ICU 3.0
      */
     public static final ULocale[] getAvailableULocales() {
         if (shim == null) {
             return ICUResourceBundle.getAvailableULocales(
                 ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
         }
         return shim.getAvailableULocales();
     }

     /**
      * The list of keywords for this service.  This must be kept in sync with
      * the resource data.
      * @since ICU 3.0
      */
     private static final String[] KEYWORDS = { "collation" };

     /**
      * The resource name for this service.  Note that this is not the same as
      * the keyword for this service.
      * @since ICU 3.0
      */
     private static final String RESOURCE = "collations";

     /**
      * The resource bundle base name for this service.
      * *since ICU 3.0
      */

     private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME;

     /**
      * {@icu} Returns an array of all possible keywords that are relevant to
      * collation. At this point, the only recognized keyword for this
      * service is "collation".
      * @return an array of valid collation keywords.
      * @see #getKeywordValues
      * @stable ICU 3.0
      */
     public static final String[] getKeywords() {
         return KEYWORDS;
     }

     /**
      * {@icu} Given a keyword, returns an array of all values for
      * that keyword that are currently in use.
      * @param keyword one of the keywords returned by getKeywords.
      * @see #getKeywords
      * @stable ICU 3.0
      */
     public static final String[] getKeywordValues(String keyword) {
         if (!keyword.equals(KEYWORDS[0])) {
             throw new IllegalArgumentException("Invalid keyword: " + keyword);
         }
         return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
     }

     /**
      * {@icu} Given a key and a locale, returns an array of string values in a preferred
      * order that would make a difference. These are all and only those values where
      * the open (creation) of the service with the locale formed from the input locale
      * plus input keyword and that value has different behavior than creation with the
      * input locale alone.
      * @param key           one of the keys supported by this service.  For now, only
      *                      "collation" is supported.
      * @param locale        the locale
      * @param commonlyUsed  if set to true it will return only commonly used values
      *                      with the given locale in preferred order.  Otherwise,
      *                      it will return all the available values for the locale.
      * @return an array of string values for the given key and the locale.
      * @stable ICU 4.2
      */
     public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
                                                            boolean commonlyUsed) {
         // Note: The parameter commonlyUsed is actually not used.
         // The switch is in the method signature for consistency
         // with other locale services.

         // Read available collation values from collation bundles
         String baseLoc = locale.getBaseName();
         LinkedList<String> values = new LinkedList<String>();

         UResourceBundle bundle = UResourceBundle.getBundleInstance(
                 ICUResourceBundle.ICU_COLLATION_BASE_NAME, baseLoc);

         String defcoll = null;
         while (bundle != null) {
             UResourceBundle collations = bundle.get("collations");
             Enumeration<String> collEnum = collations.getKeys();
             while (collEnum.hasMoreElements()) {
                 String collkey = collEnum.nextElement();
                 if (collkey.equals("default")) {
                     if (defcoll == null) {
                         // Keep the default
                         defcoll = collations.getString("default");
                     }
                 } else if (!collkey.startsWith("private-") && !values.contains(collkey)) {
                     values.add(collkey);
                 }
             }
             bundle = ((ICUResourceBundle)bundle).getParent();
         }
         // Reordering
         Iterator<String> itr = values.iterator();
         String[] result = new String[values.size()];
         result[0] = defcoll;
         int idx = 1;
         while (itr.hasNext()) {
             String collKey = itr.next();
             if (!collKey.equals(defcoll)) {
                 result[idx++] = collKey;
             }
         }
         return result;
     }

     /**
      * {@icu} Returns the functionally equivalent locale for the given
      * requested locale, with respect to given keyword, for the
      * collation service.  If two locales return the same result, then
      * collators instantiated for these locales will behave
      * equivalently.  The converse is not always true; two collators
      * may in fact be equivalent, but return different results, due to
      * internal details.  The return result has no other meaning than
      * that stated above, and implies nothing as to the relationship
      * between the two locales.  This is intended for use by
      * applications who wish to cache collators, or otherwise reuse
      * collators when possible.  The functional equivalent may change
      * over time.  For more information, please see the <a
      * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services">
      * Locales and Services</a> section of the ICU User Guide.
      * @param keyword a particular keyword as enumerated by
      * getKeywords.
      * @param locID The requested locale
      * @param isAvailable If non-null, isAvailable[0] will receive and
      * output boolean that indicates whether the requested locale was
      * 'available' to the collation service. If non-null, isAvailable
      * must have length >= 1.
      * @return the locale
      * @stable ICU 3.0
      */
     public static final ULocale getFunctionalEquivalent(String keyword,
                                                         ULocale locID,
                                                         boolean isAvailable[]) {
         return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE,
                                                          keyword, locID, isAvailable, true);
     }

     /**
      * {@icu} Returns the functionally equivalent locale for the given
      * requested locale, with respect to given keyword, for the
      * collation service.
      * @param keyword a particular keyword as enumerated by
      * getKeywords.
      * @param locID The requested locale
      * @return the locale
      * @see #getFunctionalEquivalent(String,ULocale,boolean[])
      * @stable ICU 3.0
      */
     public static final ULocale getFunctionalEquivalent(String keyword,
                                                         ULocale locID) {
         return getFunctionalEquivalent(keyword, locID, null);
     }

     /**
      * {@icu} Returns the name of the collator for the objectLocale, localized for the
      * displayLocale.
      * @param objectLocale the locale of the collator
      * @param displayLocale the locale for the collator's display name
      * @return the display name
      * @stable ICU 2.6
      */
     static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
         return getShim().getDisplayName(ULocale.forLocale(objectLocale),
                                         ULocale.forLocale(displayLocale));
     }

     /**
      * {@icu} Returns the name of the collator for the objectLocale, localized for the
      * displayLocale.
      * @param objectLocale the locale of the collator
      * @param displayLocale the locale for the collator's display name
      * @return the display name
      * @stable ICU 3.2
      */
     static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
         return getShim().getDisplayName(objectLocale, displayLocale);
     }

     /**
      * {@icu} Returns the name of the collator for the objectLocale, localized for the
      * default <code>DISPLAY</code> locale.
      * @param objectLocale the locale of the collator
      * @return the display name
      * @see com.ibm.icu.util.ULocale.Category#DISPLAY
      * @stable ICU 2.6
      */
     static public String getDisplayName(Locale objectLocale) {
         return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault(Category.DISPLAY));
     }

     /**
      * {@icu} Returns the name of the collator for the objectLocale, localized for the
      * default <code>DISPLAY</code> locale.
      * @param objectLocale the locale of the collator
      * @return the display name
      * @see com.ibm.icu.util.ULocale.Category#DISPLAY
      * @stable ICU 3.2
      */
     static public String getDisplayName(ULocale objectLocale) {
         return getShim().getDisplayName(objectLocale, ULocale.getDefault(Category.DISPLAY));
     }

     /**
      * Returns this Collator's strength attribute. The strength attribute
      * determines the minimum level of difference considered significant.
      * </p>
      * {@icunote} This can return QUATERNARY strength, which is not supported by the
      * JDK version.
      * <p>
      * See the Collator class description for more details.
      * </p>
      * <p>The base class method always returns {@link #TERTIARY}.
      * Subclasses should override it if appropriate.
      *
      * @return this Collator's current strength attribute.
      * @see #setStrength
      * @see #PRIMARY
      * @see #SECONDARY
      * @see #TERTIARY
      * @see #QUATERNARY
      * @see #IDENTICAL
      * @stable ICU 2.8
      */
     public int getStrength()
     {
         return TERTIARY;
     }

     /**
      * Returns the decomposition mode of this Collator. The decomposition mode
      * determines how Unicode composed characters are handled.
      * </p>
      * <p>
      * See the Collator class description for more details.
      * </p>
      * <p>The base class method always returns {@link #NO_DECOMPOSITION}.
      * Subclasses should override it if appropriate.
      *
      * @return the decomposition mode
      * @see #setDecomposition
      * @see #NO_DECOMPOSITION
      * @see #CANONICAL_DECOMPOSITION
      * @stable ICU 2.8
      */
     public int getDecomposition()
     {
         return NO_DECOMPOSITION;
     }

     // public other methods -------------------------------------------------

     /**
      * Compares the equality of two text Strings using
      * this Collator's rules, strength and decomposition mode.  Convenience method.
      * @param source the source string to be compared.
      * @param target the target string to be compared.
      * @return true if the strings are equal according to the collation
      *         rules, otherwise false.
      * @see #compare
      * @throws NullPointerException thrown if either arguments is null.
      * @stable ICU 2.8
      */
     public boolean equals(String source, String target)
     {
         return (compare(source, target) == 0);
     }

     /**
      * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored
      * in this collator.
      * @return a pointer to a UnicodeSet object containing all the
      *         code points and sequences that may sort differently than
      *         in the root collator.
      * @stable ICU 2.4
      */
     public UnicodeSet getTailoredSet()
     {
         return new UnicodeSet(0, 0x10FFFF);
     }

     /**
      * Compares the source text String to the target text String according to
      * this Collator's rules, strength and decomposition mode.
      * Returns an integer less than,
      * equal to or greater than zero depending on whether the source String is
      * less than, equal to or greater than the target String. See the Collator
      * class description for an example of use.
      * </p>
      * @param source the source String.
      * @param target the target String.
      * @return Returns an integer value. Value is less than zero if source is
      *         less than target, value is zero if source and target are equal,
      *         value is greater than zero if source is greater than target.
      * @see CollationKey
      * @see #getCollationKey
      * @throws NullPointerException thrown if either argument is null.
      * @stable ICU 2.8
      */
     public abstract int compare(String source, String target);

     /**
      * Compares the source Object to the target Object.
      * </p>
      * @param source the source Object.
      * @param target the target Object.
      * @return Returns an integer value. Value is less than zero if source is
      *         less than target, value is zero if source and target are equal,
      *         value is greater than zero if source is greater than target.
      * @throws ClassCastException thrown if either arguments cannot be cast to CharSequence.
      * @stable ICU 4.2
      */
     public int compare(Object source, Object target) {
         return doCompare((CharSequence)source, (CharSequence)target);
     }

     /**
      * Compares two CharSequences.
      * The base class just calls compare(left.toString(), right.toString()).
      * Subclasses should instead implement this method and have the String API call this method.
      * @internal
      * @deprecated This API is ICU internal only.
      */
     @Deprecated
     protected int doCompare(CharSequence left, CharSequence right) {
         return compare(left.toString(), right.toString());
     }

     /**
      * <p>
      * Transforms the String into a CollationKey suitable for efficient
      * repeated comparison.  The resulting key depends on the collator's
      * rules, strength and decomposition mode.
      *
      * <p>Note that collation keys are often less efficient than simply doing comparison.
      * For more details, see the ICU User Guide.
      *
      * <p>See the CollationKey class documentation for more information.</p>
      * @param source the string to be transformed into a CollationKey.
      * @return the CollationKey for the given String based on this Collator's
      *         collation rules. If the source String is null, a null
      *         CollationKey is returned.
      * @see CollationKey
      * @see #compare(String, String)
      * @see #getRawCollationKey
      * @stable ICU 2.8
      */
     public abstract CollationKey getCollationKey(String source);

     /**
      * {@icu} Returns the simpler form of a CollationKey for the String source following
      * the rules of this Collator and stores the result into the user provided argument
      * key.  If key has a internal byte array of length that's too small for the result,
      * the internal byte array will be grown to the exact required size.
      *
      * <p>Note that collation keys are often less efficient than simply doing comparison.
      * For more details, see the ICU User Guide.
      *
      * @param source the text String to be transformed into a RawCollationKey
      * @return If key is null, a new instance of RawCollationKey will be
      *         created and returned, otherwise the user provided key will be
      *         returned.
      * @see #compare(String, String)
      * @see #getCollationKey
      * @see RawCollationKey
      * @stable ICU 2.8
      */
     public abstract RawCollationKey getRawCollationKey(String source,
                                                        RawCollationKey key);

     /**
      * {@icu} Sets the variable top to the top of the specified reordering group.
      * The variable top determines the highest-sorting character
      * which is affected by the alternate handling behavior.
      * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
      *
      * <p>The base class implementation throws an UnsupportedOperationException.
      * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
      *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
      *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
      * @return this
      * @see #getMaxVariable
      * @stable ICU 53
      */
     public Collator setMaxVariable(int group) {
         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
     }

     /**
      * {@icu} Returns the maximum reordering group whose characters are affected by
      * the alternate handling behavior.
      *
      * <p>The base class implementation returns Collator.ReorderCodes.PUNCTUATION.
      * @return the maximum variable reordering group.
      * @see #setMaxVariable
      * @stable ICU 53
      */
     public int getMaxVariable() {
         return Collator.ReorderCodes.PUNCTUATION;
     }

     /**
      * {@icu} Sets the variable top to the primary weight of the specified string.
      *
      * <p>Beginning with ICU 53, the variable top is pinned to
      * the top of one of the supported reordering groups,
      * and it must not be beyond the last of those groups.
      * See {@link #setMaxVariable(int)}.
      *
      * @param varTop one or more (if contraction) characters to which the
      *               variable top should be set
      * @return variable top primary weight
      * @exception IllegalArgumentException
      *                is thrown if varTop argument is not a valid variable top element. A variable top element is
      *                invalid when
      *                <ul>
      *                <li>it is a contraction that does not exist in the Collation order
      *                <li>the variable top is beyond
      *                    the last reordering group supported by setMaxVariable()
      *                <li>when the varTop argument is null or zero in length.
      *                </ul>
      * @see #getVariableTop
      * @see RuleBasedCollator#setAlternateHandlingShifted
      * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
      */
     @Deprecated
     public abstract int setVariableTop(String varTop);

     /**
      * {@icu} Gets the variable top value of a Collator.
      *
      * @return the variable top primary weight
      * @see #getMaxVariable
      * @stable ICU 2.6
      */
     public abstract int getVariableTop();

     /**
      * {@icu} Sets the variable top to the specified primary weight.
      *
      * <p>Beginning with ICU 53, the variable top is pinned to
      * the top of one of the supported reordering groups,
      * and it must not be beyond the last of those groups.
      * See {@link #setMaxVariable(int)}.
      *
      * @param varTop primary weight, as returned by setVariableTop or getVariableTop
      * @see #getVariableTop
      * @see #setVariableTop(String)
      * @deprecated ICU 53 Call setMaxVariable() instead.
      */
     @Deprecated
     public abstract void setVariableTop(int varTop);

     /**
      * {@icu} Returns the version of this collator object.
      * @return the version object associated with this collator
      * @stable ICU 2.8
      */
     public abstract VersionInfo getVersion();

     /**
      * {@icu} Returns the UCA version of this collator object.
      * @return the version object associated with this collator
      * @stable ICU 2.8
      */
     public abstract VersionInfo getUCAVersion();

     /**
      * Retrieves the reordering codes for this collator.
      * These reordering codes are a combination of UScript codes and ReorderCodes.
      * @return a copy of the reordering codes for this collator;
      * if none are set then returns an empty array
      * @see #setReorderCodes
      * @see #getEquivalentReorderCodes
      * @see Collator.ReorderCodes
      * @see UScript
      * @stable ICU 4.8
      */
     public int[] getReorderCodes()
     {
         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
     }

     /**
      * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
      * codes are grouped and must reorder together.
      * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
      * for example Hiragana and Katakana.
      *
      * @param reorderCode The reorder code to determine equivalence for.
      * @return the set of all reorder codes in the same group as the given reorder code.
      * @see #setReorderCodes
      * @see #getReorderCodes
      * @see Collator.ReorderCodes
      * @see UScript
      * @stable ICU 4.8
      */
     public static int[] getEquivalentReorderCodes(int reorderCode) {
         CollationData baseData = CollationRoot.getData();
         return baseData.getEquivalentScripts(reorderCode);
     }


     // Freezable interface implementation -------------------------------------------------

     /**
      * Determines whether the object has been frozen or not.
      *
      * <p>An unfrozen Collator is mutable and not thread-safe.
      * A frozen Collator is immutable and thread-safe.
      *
      * @stable ICU 4.8
      */
     public boolean isFrozen() {
         return false;
     }

     /**
      * Freezes the collator.
      * @return the collator itself.
      * @stable ICU 4.8
      */
     public Collator freeze() {
         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
     }

     /**
      * Provides for the clone operation. Any clone is initially unfrozen.
      * @stable ICU 4.8
      */
     public Collator cloneAsThawed() {
         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
     }

     /**
      * Empty default constructor to make javadocs happy
      * @stable ICU 2.4
      */
     protected Collator()
     {
     }

     private static final boolean DEBUG = ICUDebug.enabled("collator");

     // -------- BEGIN ULocale boilerplate --------

     /**
      * {@icu} Returns the locale that was used to create this object, or null.
      * This may may differ from the locale requested at the time of
      * this object's creation.  For example, if an object is created
      * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
      * drawn from <tt>en</tt> (the <i>actual</i> locale), and
      * <tt>en_US</tt> may be the most specific locale that exists (the
      * <i>valid</i> locale).
      *
      * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
      * contains a partial preview implementation.  The * <i>actual</i>
      * locale is returned correctly, but the <i>valid</i> locale is
      * not, in most cases.
      *
      * <p>The base class method always returns {@link ULocale#ROOT}.
      * Subclasses should override it if appropriate.
      *
      * @param type type of information requested, either {@link
      * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
      * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
      * @return the information specified by <i>type</i>, or null if
      * this object was not constructed from locale data.
      * @see com.ibm.icu.util.ULocale
      * @see com.ibm.icu.util.ULocale#VALID_LOCALE
      * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
      * @draft ICU 2.8 (retain)
      * @provisional This API might change or be removed in a future release.
      */
     public ULocale getLocale(ULocale.Type type) {
         return ULocale.ROOT;
     }

     /**
      * Set information about the locales that were used to create this
      * object.  If the object was not constructed from locale data,
      * both arguments should be set to null.  Otherwise, neither
      * should be null.  The actual locale must be at the same level or
      * less specific than the valid locale.  This method is intended
      * for use by factories or other entities that create objects of
      * this class.
      *
      * <p>The base class method does nothing. Subclasses should override it if appropriate.
      *
      * @param valid the most specific locale containing any resource
      * data, or null
      * @param actual the locale containing data used to construct this
      * object, or null
      * @see com.ibm.icu.util.ULocale
      * @see com.ibm.icu.util.ULocale#VALID_LOCALE
      * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
      */
     void setLocale(ULocale valid, ULocale actual) {}

     // -------- END ULocale boilerplate --------
 }