| /** |
| ******************************************************************************* |
| * Copyright (C) 1996-2014, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ******************************************************************************* |
| */ |
| package com.ibm.icu.text; |
| |
| import java.lang.reflect.InvocationTargetException; |
| import java.lang.reflect.Method; |
| import java.text.CharacterIterator; |
| import java.text.ParseException; |
| import java.util.Arrays; |
| import java.util.concurrent.locks.Lock; |
| import java.util.concurrent.locks.ReentrantLock; |
| |
| import com.ibm.icu.impl.Normalizer2Impl; |
| import com.ibm.icu.impl.Normalizer2Impl.ReorderingBuffer; |
| import com.ibm.icu.impl.Utility; |
| import com.ibm.icu.impl.coll.BOCSU; |
| import com.ibm.icu.impl.coll.Collation; |
| import com.ibm.icu.impl.coll.CollationCompare; |
| import com.ibm.icu.impl.coll.CollationData; |
| import com.ibm.icu.impl.coll.CollationFastLatin; |
| import com.ibm.icu.impl.coll.CollationIterator; |
| import com.ibm.icu.impl.coll.CollationKeys; |
| import com.ibm.icu.impl.coll.CollationKeys.SortKeyByteSink; |
| import com.ibm.icu.impl.coll.CollationLoader; |
| import com.ibm.icu.impl.coll.CollationRoot; |
| import com.ibm.icu.impl.coll.CollationSettings; |
| import com.ibm.icu.impl.coll.CollationTailoring; |
| import com.ibm.icu.impl.coll.ContractionsAndExpansions; |
| import com.ibm.icu.impl.coll.FCDUTF16CollationIterator; |
| import com.ibm.icu.impl.coll.SharedObject; |
| import com.ibm.icu.impl.coll.TailoredSet; |
| import com.ibm.icu.impl.coll.UTF16CollationIterator; |
| import com.ibm.icu.util.ULocale; |
| import com.ibm.icu.util.VersionInfo; |
| |
| /** |
| * <p> |
| * RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule |
| * sets. RuleBasedCollator is designed to be fully compliant to the <a |
| * href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation Algorithm (UCA)</a> and conforms to ISO 14651. |
| * </p> |
| * |
| * <p> |
| * Users are strongly encouraged to read the <a href="http://userguide.icu-project.org/collation">User |
| * Guide</a> for more information about the collation service before using this class. |
| * </p> |
| * |
| * <p> |
| * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class |
| * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the |
| * argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String) |
| * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while |
| * re-adjusting the attributes and orders of the characters in the specified rule accordingly. |
| * </p> |
| * |
| * <p> |
| * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale |
| * is not available, the orders eventually falls back to the |
| * <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. |
| * </p> |
| * |
| * <p> |
| * For information about the collation rule syntax and details about customization, please refer to the <a |
| * href="http://userguide.icu-project.org/collation/customization">Collation customization</a> section of the |
| * User Guide. |
| * </p> |
| * |
| * <p> |
| * <strong>Note</strong> that there are some differences between the Collation rule syntax used in Java and ICU4J: |
| * |
| * <ul> |
| * <li>According to the JDK documentation: <i> |
| * <p> |
| * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range |
| * \U0E40-\U0E44 precedes a Thai consonant of the range \U0E01-\U0E2E OR a Lao vowel of the range |
| * \U0EC0-\U0EC4 precedes a Lao consonant of the range \U0E81-\U0EAE then the vowel is placed after the |
| * consonant for collation purposes. |
| * </p> |
| * <p> |
| * If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on. |
| * </p> |
| * </i> |
| * <p> |
| * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly |
| * states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored. |
| * </p> |
| * <li>As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported. |
| * </ul> |
| * <p> |
| * <strong>Examples</strong> |
| * </p> |
| * <p> |
| * Creating Customized RuleBasedCollators: <blockquote> |
| * |
| * <pre> |
| * String simple = "& a < b < c < d"; |
| * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple); |
| * |
| * String norwegian = "& a , A < b , B < c , C < d , D < e , E " |
| * + "< f , F < g , G < h , H < i , I < j , " |
| * + "J < k , K < l , L < m , M < n , N < " |
| * + "o , O < p , P < q , Q < r , R < s , S < " |
| * + "t , T < u , U < v , V < w , W < x , X " |
| * + "< y , Y < z , Z < \u00E5 = a\u030A " |
| * + ", \u00C5 = A\u030A ; aa , AA < \u00E6 " |
| * + ", \u00C6 < \u00F8 , \u00D8"; |
| * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian); |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * Concatenating rules to combine <code>Collator</code>s: <blockquote> |
| * |
| * <pre> |
| * // Create an en_US Collator object |
| * RuleBasedCollator en_USCollator = (RuleBasedCollator) |
| * Collator.getInstance(new Locale("en", "US", "")); |
| * // Create a da_DK Collator object |
| * RuleBasedCollator da_DKCollator = (RuleBasedCollator) |
| * Collator.getInstance(new Locale("da", "DK", "")); |
| * // Combine the two |
| * // First, get the collation rules from en_USCollator |
| * String en_USRules = en_USCollator.getRules(); |
| * // Second, get the collation rules from da_DKCollator |
| * String da_DKRules = da_DKCollator.getRules(); |
| * RuleBasedCollator newCollator = |
| * new RuleBasedCollator(en_USRules + da_DKRules); |
| * // newCollator has the combined rules |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * Making changes to an existing RuleBasedCollator to create a new <code>Collator</code> object, by appending changes to |
| * the existing rule: <blockquote> |
| * |
| * <pre> |
| * // Create a new Collator object with additional rules |
| * String addRules = "& C < ch, cH, Ch, CH"; |
| * RuleBasedCollator myCollator = |
| * new RuleBasedCollator(en_USCollator.getRules() + addRules); |
| * // myCollator contains the new rules |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * How to change the order of non-spacing accents: <blockquote> |
| * |
| * <pre> |
| * // old rule with main accents |
| * String oldRules = "= \u0301 ; \u0300 ; \u0302 ; \u0308 " |
| * + "; \u0327 ; \u0303 ; \u0304 ; \u0305 " |
| * + "; \u0306 ; \u0307 ; \u0309 ; \u030A " |
| * + "; \u030B ; \u030C ; \u030D ; \u030E " |
| * + "; \u030F ; \u0310 ; \u0311 ; \u0312 " |
| * + "< a , A ; ae, AE ; \u00e6 , \u00c6 " |
| * + "< b , B < c, C < e, E & C < d , D"; |
| * // change the order of accent characters |
| * String addOn = "& \u0300 ; \u0308 ; \u0302"; |
| * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn); |
| * </pre> |
| * |
| * </blockquote> |
| * |
| * Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese |
| * characters in the Japanese <code>Collator</code>: <blockquote> |
| * |
| * <pre> |
| * // get en_US Collator rules |
| * RuleBasedCollator en_USCollator |
| * = (RuleBasedCollator)Collator.getInstance(Locale.US); |
| * // add a few Japanese characters to sort before English characters |
| * // suppose the last character before the first base letter 'a' in |
| * // the English collation rule is \u2212 |
| * String jaString = "& \u2212 < \u3041, \u3042 < \u3043, " |
| * + "\u3044"; |
| * RuleBasedCollator myJapaneseCollator |
| * = new RuleBasedCollator(en_USCollator.getRules() + jaString); |
| * </pre> |
| * |
| * </blockquote> |
| * </p> |
| * <p> |
| * This class is not subclassable |
| * </p> |
| * |
| * @author Syn Wee Quek |
| * @stable ICU 2.8 |
| */ |
| public final class RuleBasedCollator extends Collator { |
| // public constructors --------------------------------------------------- |
| |
| /** |
| * <p> |
| * Constructor that takes the argument rules for customization. |
| * The collator will be based on the CLDR root collation, with the |
| * attributes and re-ordering of the characters specified in the argument rules. |
| * </p> |
| * <p> |
| * See the User Guide's section on <a href="http://userguide.icu-project.org/collation/customization"> |
| * Collation Customization</a> for details on the rule syntax. |
| * </p> |
| * |
| * @param rules |
| * the collation rules to build the collation table from. |
| * @exception ParseException |
| * and IOException thrown. ParseException thrown when argument rules have an invalid syntax. |
| * IOException thrown when an error occurred while reading internal data. |
| * @stable ICU 2.8 |
| */ |
| public RuleBasedCollator(String rules) throws Exception { |
| if (rules == null) { |
| throw new IllegalArgumentException("Collation rules can not be null"); |
| } |
| validLocale = ULocale.ROOT; |
| internalBuildTailoring(rules); |
| } |
| |
| /** |
| * Implements from-rule constructors. |
| * @param rules rule string |
| * @throws Exception |
| */ |
| private final void internalBuildTailoring(String rules) throws Exception { |
| CollationTailoring base = CollationRoot.getRoot(); |
| // Most code using Collator does not need to build a Collator from rules. |
| // By using reflection, most code will not have a static dependency on the builder code. |
| // CollationBuilder builder = new CollationBuilder(base); |
| ClassLoader classLoader = getClass().getClassLoader(); |
| CollationTailoring t; |
| try { |
| Class<?> builderClass = classLoader.loadClass("com.ibm.icu.impl.coll.CollationBuilder"); |
| Object builder = builderClass.getConstructor(CollationTailoring.class).newInstance(base); |
| // builder.parseAndBuild(rules); |
| Method parseAndBuild = builderClass.getMethod("parseAndBuild", String.class); |
| t = (CollationTailoring)parseAndBuild.invoke(builder, rules); |
| } catch(InvocationTargetException e) { |
| throw (Exception)e.getTargetException(); |
| } |
| CollationSettings ts = t.settings.readOnly(); |
| char[] fastLatinPrimaries = new char[CollationFastLatin.LATIN_LIMIT]; |
| int fastLatinOptions = CollationFastLatin.getOptions(t.data, ts, fastLatinPrimaries); |
| if(fastLatinOptions != ts.fastLatinOptions || |
| (fastLatinOptions >= 0 && |
| !Arrays.equals(fastLatinPrimaries, ts.fastLatinPrimaries))) { |
| CollationSettings ownedSettings = t.settings.copyOnWrite(); |
| ownedSettings.fastLatinOptions = CollationFastLatin.getOptions( |
| t.data, ownedSettings, |
| ownedSettings.fastLatinPrimaries); |
| } |
| t.actualLocale = null; |
| adoptTailoring(t); |
| } |
| |
| // public methods -------------------------------------------------------- |
| |
| /** |
| * Clones the RuleBasedCollator |
| * |
| * @return a new instance of this RuleBasedCollator object |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public Object clone() throws CloneNotSupportedException { |
| if (isFrozen()) { |
| return this; |
| } |
| return cloneAsThawed(); |
| } |
| |
| private final void initMaxExpansions() { |
| synchronized(tailoring) { |
| if (tailoring.maxExpansions == null) { |
| tailoring.maxExpansions = CollationElementIterator.computeMaxExpansions(tailoring.data); |
| } |
| } |
| } |
| |
| /** |
| * Return a CollationElementIterator for the given String. |
| * |
| * @see CollationElementIterator |
| * @stable ICU 2.8 |
| */ |
| public CollationElementIterator getCollationElementIterator(String source) { |
| initMaxExpansions(); |
| return new CollationElementIterator(source, this); |
| } |
| |
| /** |
| * Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity will be |
| * preserved since a new copy will be created for use. |
| * |
| * @see CollationElementIterator |
| * @stable ICU 2.8 |
| */ |
| public CollationElementIterator getCollationElementIterator(CharacterIterator source) { |
| initMaxExpansions(); |
| CharacterIterator newsource = (CharacterIterator) source.clone(); |
| return new CollationElementIterator(newsource, this); |
| } |
| |
| /** |
| * Return a CollationElementIterator for the given UCharacterIterator. The source iterator's integrity will be |
| * preserved since a new copy will be created for use. |
| * |
| * @see CollationElementIterator |
| * @stable ICU 2.8 |
| */ |
| public CollationElementIterator getCollationElementIterator(UCharacterIterator source) { |
| initMaxExpansions(); |
| return new CollationElementIterator(source, this); |
| } |
| |
| // Freezable interface implementation ------------------------------------------------- |
| |
| /** |
| * Determines whether the object has been frozen or not. |
| * @stable ICU 4.8 |
| */ |
| @Override |
| public boolean isFrozen() { |
| return frozenLock != null; |
| } |
| |
| /** |
| * Freezes the collator. |
| * @return the collator itself. |
| * @stable ICU 4.8 |
| */ |
| @Override |
| public Collator freeze() { |
| if (!isFrozen()) { |
| frozenLock = new ReentrantLock(); |
| if (collationBuffer == null) { |
| collationBuffer = new CollationBuffer(data); |
| } |
| } |
| return this; |
| } |
| |
| /** |
| * Provides for the clone operation. Any clone is initially unfrozen. |
| * @stable ICU 4.8 |
| */ |
| @Override |
| public RuleBasedCollator cloneAsThawed() { |
| try { |
| RuleBasedCollator result = (RuleBasedCollator) super.clone(); |
| // since all collation data in the RuleBasedCollator do not change |
| // we can safely assign the result.fields to this collator |
| // except in cases where we can't |
| result.settings = settings.clone(); |
| result.collationBuffer = null; |
| result.frozenLock = null; |
| return result; |
| } catch (CloneNotSupportedException e) { |
| // Clone is implemented |
| return null; |
| } |
| } |
| |
| // public setters -------------------------------------------------------- |
| |
| private void checkNotFrozen() { |
| if (isFrozen()) { |
| throw new UnsupportedOperationException("Attempt to modify frozen RuleBasedCollator"); |
| } |
| } |
| |
| private final CollationSettings getOwnedSettings() { |
| return settings.copyOnWrite(); |
| } |
| |
| private final CollationSettings getDefaultSettings() { |
| return tailoring.settings.readOnly(); |
| } |
| |
| /** |
| * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator |
| * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a |
| * correct JIS collation order, distinguishing between Katakana and Hiragana characters. |
| * |
| * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. |
| * Since ICU 50, this attribute is not settable any more via API functions. |
| * Since CLDR 25/ICU 53, explicit quaternary relations are used |
| * to achieve the same Japanese sort order. |
| * |
| * @param flag |
| * true if Hiragana Quaternary mode is to be on, false otherwise |
| * @see #setHiraganaQuaternaryDefault |
| * @see #isHiraganaQuaternary |
| * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. |
| */ |
| @Deprecated |
| public void setHiraganaQuaternary(boolean flag) { |
| checkNotFrozen(); |
| } |
| |
| /** |
| * Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See |
| * setHiraganaQuaternary(boolean) for more details. |
| * |
| * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. |
| * Since ICU 50, this attribute is not settable any more via API functions. |
| * Since CLDR 25/ICU 53, explicit quaternary relations are used |
| * to achieve the same Japanese sort order. |
| * |
| * @see #setHiraganaQuaternary(boolean) |
| * @see #isHiraganaQuaternary |
| * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. |
| */ |
| @Deprecated |
| public void setHiraganaQuaternaryDefault() { |
| checkNotFrozen(); |
| } |
| |
| /** |
| * Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIARY. The |
| * default mode is false, and so lowercase characters sort before uppercase characters. If true, sort upper case |
| * characters first. |
| * |
| * @param upperfirst |
| * true to sort uppercase characters before lowercase characters, false to sort lowercase characters |
| * before uppercase characters |
| * @see #isLowerCaseFirst |
| * @see #isUpperCaseFirst |
| * @see #setLowerCaseFirst |
| * @see #setCaseFirstDefault |
| * @stable ICU 2.8 |
| */ |
| public void setUpperCaseFirst(boolean upperfirst) { |
| checkNotFrozen(); |
| if (upperfirst == isUpperCaseFirst()) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setCaseFirst(upperfirst ? CollationSettings.CASE_FIRST_AND_UPPER_MASK : 0); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIARY. The |
| * default mode is false. If true is set, the RuleBasedCollator will sort lower cased characters before the upper |
| * cased ones. Otherwise, if false is set, the RuleBasedCollator will ignore case preferences. |
| * |
| * @param lowerfirst |
| * true for sorting lower cased characters before upper cased characters, false to ignore case |
| * preferences. |
| * @see #isLowerCaseFirst |
| * @see #isUpperCaseFirst |
| * @see #setUpperCaseFirst |
| * @see #setCaseFirstDefault |
| * @stable ICU 2.8 |
| */ |
| public void setLowerCaseFirst(boolean lowerfirst) { |
| checkNotFrozen(); |
| if (lowerfirst == isLowerCaseFirst()) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setCaseFirst(lowerfirst ? CollationSettings.CASE_FIRST : 0); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the case first mode to the initial mode set during construction of the RuleBasedCollator. See |
| * setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more details. |
| * |
| * @see #isLowerCaseFirst |
| * @see #isUpperCaseFirst |
| * @see #setLowerCaseFirst(boolean) |
| * @see #setUpperCaseFirst(boolean) |
| * @stable ICU 2.8 |
| */ |
| public final void setCaseFirstDefault() { |
| checkNotFrozen(); |
| CollationSettings defaultSettings = getDefaultSettings(); |
| if(settings.readOnly() == defaultSettings) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setCaseFirstDefault(defaultSettings.options); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the alternate handling mode to the initial mode set during construction of the RuleBasedCollator. See |
| * setAlternateHandling(boolean) for more details. |
| * |
| * @see #setAlternateHandlingShifted(boolean) |
| * @see #isAlternateHandlingShifted() |
| * @stable ICU 2.8 |
| */ |
| public void setAlternateHandlingDefault() { |
| checkNotFrozen(); |
| CollationSettings defaultSettings = getDefaultSettings(); |
| if(settings.readOnly() == defaultSettings) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setAlternateHandlingDefault(defaultSettings.options); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the case level mode to the initial mode set during construction of the RuleBasedCollator. See |
| * setCaseLevel(boolean) for more details. |
| * |
| * @see #setCaseLevel(boolean) |
| * @see #isCaseLevel |
| * @stable ICU 2.8 |
| */ |
| public void setCaseLevelDefault() { |
| checkNotFrozen(); |
| CollationSettings defaultSettings = getDefaultSettings(); |
| if(settings.readOnly() == defaultSettings) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setFlagDefault(CollationSettings.CASE_LEVEL, defaultSettings.options); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the decomposition mode to the initial mode set during construction of the RuleBasedCollator. See |
| * setDecomposition(int) for more details. |
| * |
| * @see #getDecomposition |
| * @see #setDecomposition(int) |
| * @stable ICU 2.8 |
| */ |
| public void setDecompositionDefault() { |
| checkNotFrozen(); |
| CollationSettings defaultSettings = getDefaultSettings(); |
| if(settings.readOnly() == defaultSettings) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setFlagDefault(CollationSettings.CHECK_FCD, defaultSettings.options); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the French collation mode to the initial mode set during construction of the RuleBasedCollator. See |
| * setFrenchCollation(boolean) for more details. |
| * |
| * @see #isFrenchCollation |
| * @see #setFrenchCollation(boolean) |
| * @stable ICU 2.8 |
| */ |
| public void setFrenchCollationDefault() { |
| checkNotFrozen(); |
| CollationSettings defaultSettings = getDefaultSettings(); |
| if(settings.readOnly() == defaultSettings) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setFlagDefault(CollationSettings.BACKWARD_SECONDARY, defaultSettings.options); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the collation strength to the initial mode set during the construction of the RuleBasedCollator. See |
| * setStrength(int) for more details. |
| * |
| * @see #setStrength(int) |
| * @see #getStrength |
| * @stable ICU 2.8 |
| */ |
| public void setStrengthDefault() { |
| checkNotFrozen(); |
| CollationSettings defaultSettings = getDefaultSettings(); |
| if(settings.readOnly() == defaultSettings) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setStrengthDefault(defaultSettings.options); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Method to set numeric collation to its default value. |
| * |
| * @see #getNumericCollation |
| * @see #setNumericCollation |
| * @stable ICU 2.8 |
| */ |
| public void setNumericCollationDefault() { |
| checkNotFrozen(); |
| CollationSettings defaultSettings = getDefaultSettings(); |
| if(settings.readOnly() == defaultSettings) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setFlagDefault(CollationSettings.NUMERIC, defaultSettings.options); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false, |
| * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted |
| * backwards. See the section on <a href="http://userguide.icu-project.org/collation/architecture"> |
| * French collation</a> for more information. |
| * |
| * @param flag |
| * true to set the French collation on, false to set it off |
| * @stable ICU 2.8 |
| * @see #isFrenchCollation |
| * @see #setFrenchCollationDefault |
| */ |
| public void setFrenchCollation(boolean flag) { |
| checkNotFrozen(); |
| if(flag == isFrenchCollation()) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setFlag(CollationSettings.BACKWARD_SECONDARY, flag); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition |
| * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">Variable Weighting</a>. This |
| * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false, |
| * corresponding to the NON_IGNORABLE mode in UCA. In the NON_IGNORABLE mode, the RuleBasedCollator treats all |
| * the code points with non-ignorable primary weights in the same way. If the mode is set to true, the behavior |
| * corresponds to SHIFTED defined in UCA, this causes code points with PRIMARY orders that are equal or below the |
| * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order. |
| * |
| * @param shifted |
| * true if SHIFTED behavior for alternate handling is desired, false for the NON_IGNORABLE behavior. |
| * @see #isAlternateHandlingShifted |
| * @see #setAlternateHandlingDefault |
| * @stable ICU 2.8 |
| */ |
| public void setAlternateHandlingShifted(boolean shifted) { |
| checkNotFrozen(); |
| if(shifted == isAlternateHandlingShifted()) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setAlternateHandlingShifted(shifted); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * <p> |
| * When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY weight, known |
| * as the case level. The case level is used to distinguish large and small Japanese Kana characters. Case level |
| * could also be used in other situations. For example to distinguish certain Pinyin characters. The default value |
| * is false, which means the case level is not generated. The contents of the case level are affected by the case |
| * first mode. A simple way to ignore accent differences in a string is to set the strength to PRIMARY and enable |
| * case level. |
| * </p> |
| * <p> |
| * See the section on <a href="http://userguide.icu-project.org/collation/architecture">case |
| * level</a> for more information. |
| * </p> |
| * |
| * @param flag |
| * true if case level sorting is required, false otherwise |
| * @stable ICU 2.8 |
| * @see #setCaseLevelDefault |
| * @see #isCaseLevel |
| */ |
| public void setCaseLevel(boolean flag) { |
| checkNotFrozen(); |
| if(flag == isCaseLevel()) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setFlag(CollationSettings.CASE_LEVEL, flag); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the decomposition mode of this Collator. Setting this |
| * decomposition attribute with CANONICAL_DECOMPOSITION allows the |
| * Collator to handle un-normalized text properly, producing the |
| * same results as if the text were normalized. If |
| * NO_DECOMPOSITION is set, it is the user's responsibility to |
| * insure that all text is already in the appropriate form before |
| * a comparison or before getting a CollationKey. Adjusting |
| * decomposition mode allows the user to select between faster and |
| * more complete collation behavior.</p> |
| * |
| * <p>Since a great many of the world's languages do not require |
| * text normalization, most locales set NO_DECOMPOSITION as the |
| * default decomposition mode.</p> |
| * |
| * The default decompositon mode for the Collator is |
| * NO_DECOMPOSITON, unless specified otherwise by the locale used |
| * to create the Collator.</p> |
| * |
| * <p>See getDecomposition for a description of decomposition |
| * mode.</p> |
| * |
| * @param decomposition the new decomposition mode |
| * @see #getDecomposition |
| * @see #NO_DECOMPOSITION |
| * @see #CANONICAL_DECOMPOSITION |
| * @throws IllegalArgumentException If the given value is not a valid |
| * decomposition mode. |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public void setDecomposition(int decomposition) |
| { |
| checkNotFrozen(); |
| boolean flag; |
| switch(decomposition) { |
| case NO_DECOMPOSITION: |
| flag = false; |
| break; |
| case CANONICAL_DECOMPOSITION: |
| flag = true; |
| break; |
| default: |
| throw new IllegalArgumentException("Wrong decomposition mode."); |
| } |
| if(flag == settings.readOnly().getFlag(CollationSettings.CHECK_FCD)) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setFlag(CollationSettings.CHECK_FCD, flag); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets this Collator's strength attribute. The strength attribute determines the minimum level of difference |
| * considered significant during comparison. |
| * |
| * <p>See the Collator class description for an example of use. |
| * |
| * @param newStrength |
| * the new strength value. |
| * @see #getStrength |
| * @see #setStrengthDefault |
| * @see #PRIMARY |
| * @see #SECONDARY |
| * @see #TERTIARY |
| * @see #QUATERNARY |
| * @see #IDENTICAL |
| * @exception IllegalArgumentException |
| * If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public void setStrength(int newStrength) { |
| checkNotFrozen(); |
| if(newStrength == getStrength()) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setStrength(newStrength); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * {@icu} Sets the variable top to the top of the specified reordering group. |
| * The variable top determines the highest-sorting character |
| * which is affected by the alternate handling behavior. |
| * If that attribute is set to NON_IGNORABLE, then the variable top has no effect. |
| * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION, |
| * Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY; |
| * or Collator.ReorderCodes.DEFAULT to restore the default max variable group |
| * @return this |
| * @see #getMaxVariable |
| * @draft ICU 53 |
| * @provisional This API might change or be removed in a future release. |
| */ |
| @Override |
| public RuleBasedCollator setMaxVariable(int group) { |
| // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1. |
| int value; |
| if(group == Collator.ReorderCodes.DEFAULT) { |
| value = -1; // UCOL_DEFAULT |
| } else if(Collator.ReorderCodes.FIRST <= group && group <= Collator.ReorderCodes.CURRENCY) { |
| value = group - Collator.ReorderCodes.FIRST; |
| } else { |
| throw new IllegalArgumentException("illegal max variable group " + group); |
| } |
| int oldValue = settings.readOnly().getMaxVariable(); |
| if(value == oldValue) { |
| return this; |
| } |
| CollationSettings defaultSettings = getDefaultSettings(); |
| if(settings.readOnly() == defaultSettings) { |
| if(value < 0) { // UCOL_DEFAULT |
| return this; |
| } |
| } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| |
| if(group == Collator.ReorderCodes.DEFAULT) { |
| group = Collator.ReorderCodes.FIRST + defaultSettings.getMaxVariable(); |
| } |
| long varTop = data.getLastPrimaryForGroup(group); |
| assert(varTop != 0); |
| ownedSettings.setMaxVariable(value, defaultSettings.options); |
| ownedSettings.variableTop = varTop; |
| setFastLatinOptions(ownedSettings); |
| return this; |
| } |
| |
| /** |
| * {@icu} Returns the maximum reordering group whose characters are affected by |
| * the alternate handling behavior. |
| * @return the maximum variable reordering group. |
| * @see #setMaxVariable |
| * @draft ICU 53 |
| * @provisional This API might change or be removed in a future release. |
| */ |
| @Override |
| public int getMaxVariable() { |
| return Collator.ReorderCodes.FIRST + settings.readOnly().getMaxVariable(); |
| } |
| |
| /** |
| * {@icu} Sets the variable top to the primary weight of the specified string. |
| * |
| * <p>Beginning with ICU 53, the variable top is pinned to |
| * the top of one of the supported reordering groups, |
| * and it must not be beyond the last of those groups. |
| * See {@link #setMaxVariable(int)}. |
| * |
| * @param varTop |
| * one or more (if contraction) characters to which the variable top should be set |
| * @return variable top primary weight |
| * @exception IllegalArgumentException |
| * is thrown if varTop argument is not a valid variable top element. A variable top element is |
| * invalid when |
| * <ul> |
| * <li>it is a contraction that does not exist in the Collation order |
| * <li>the variable top is beyond |
| * the last reordering group supported by setMaxVariable() |
| * <li>when the varTop argument is null or zero in length. |
| * </ul> |
| * @see #getVariableTop |
| * @see RuleBasedCollator#setAlternateHandlingShifted |
| * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead. |
| */ |
| @Override |
| public int setVariableTop(String varTop) { |
| checkNotFrozen(); |
| if (varTop == null || varTop.length() == 0) { |
| throw new IllegalArgumentException("Variable top argument string can not be null or zero in length."); |
| } |
| boolean numeric = settings.readOnly().isNumeric(); |
| long ce1, ce2; |
| if(settings.readOnly().dontCheckFCD()) { |
| UTF16CollationIterator ci = new UTF16CollationIterator(data, numeric, varTop, 0); |
| ce1 = ci.nextCE(); |
| ce2 = ci.nextCE(); |
| } else { |
| FCDUTF16CollationIterator ci = new FCDUTF16CollationIterator(data, numeric, varTop, 0); |
| ce1 = ci.nextCE(); |
| ce2 = ci.nextCE(); |
| } |
| if(ce1 == Collation.NO_CE || ce2 != Collation.NO_CE) { |
| throw new IllegalArgumentException("Variable top argument string must map to exactly one collation element"); |
| } |
| internalSetVariableTop(ce1 >>> 32); |
| return (int)settings.readOnly().variableTop; |
| } |
| |
| /** |
| * {@icu} Sets the variable top to the specified primary weight. |
| * |
| * <p>Beginning with ICU 53, the variable top is pinned to |
| * the top of one of the supported reordering groups, |
| * and it must not be beyond the last of those groups. |
| * See {@link #setMaxVariable(int)}. |
| * |
| * @param varTop primary weight, as returned by setVariableTop or getVariableTop |
| * @see #getVariableTop |
| * @see #setVariableTop(String) |
| * @deprecated ICU 53 Call setMaxVariable() instead. |
| */ |
| @Override |
| public void setVariableTop(int varTop) { |
| checkNotFrozen(); |
| internalSetVariableTop(varTop & 0xffffffffL); |
| } |
| |
| private void internalSetVariableTop(long varTop) { |
| if(varTop != settings.readOnly().variableTop) { |
| // Pin the variable top to the end of the reordering group which contains it. |
| // Only a few special groups are supported. |
| int group = data.getGroupForPrimary(varTop); |
| if(group < Collator.ReorderCodes.FIRST || Collator.ReorderCodes.CURRENCY < group) { |
| throw new IllegalArgumentException("The variable top must be a primary weight in " + |
| "the space/punctuation/symbols/currency symbols range"); |
| } |
| long v = data.getLastPrimaryForGroup(group); |
| assert(v != 0 && v >= varTop); |
| varTop = v; |
| if(varTop != settings.readOnly().variableTop) { |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setMaxVariable(group - Collator.ReorderCodes.FIRST, |
| getDefaultSettings().options); |
| ownedSettings.variableTop = varTop; |
| setFastLatinOptions(ownedSettings); |
| } |
| } |
| } |
| |
| /** |
| * {@icu} When numeric collation is turned on, this Collator makes |
| * substrings of digits sort according to their numeric values. |
| * |
| * <p>This is a way to get '100' to sort AFTER '2'. Note that the longest |
| * digit substring that can be treated as a single unit is |
| * 254 digits (not counting leading zeros). If a digit substring is |
| * longer than that, the digits beyond the limit will be treated as a |
| * separate digit substring. |
| * |
| * <p>A "digit" in this sense is a code point with General_Category=Nd, |
| * which does not include circled numbers, roman numerals, etc. |
| * Only a contiguous digit substring is considered, that is, |
| * non-negative integers without separators. |
| * There is no support for plus/minus signs, decimals, exponents, etc. |
| * |
| * @param flag |
| * true to turn numeric collation on and false to turn it off |
| * @see #getNumericCollation |
| * @see #setNumericCollationDefault |
| * @stable ICU 2.8 |
| */ |
| public void setNumericCollation(boolean flag) { |
| checkNotFrozen(); |
| // sort substrings of digits as numbers |
| if(flag == getNumericCollation()) { return; } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setFlag(CollationSettings.NUMERIC, flag); |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| /** |
| * Sets the reordering codes for this collator. |
| * Collation reordering allows scripts and some other defined blocks of characters |
| * to be moved relative to each other as a block. This reordering is done on top of |
| * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed |
| * at the start and/or the end of the collation order. |
| * <p>By default, reordering codes specified for the start of the order are placed in the |
| * order given after a group of “special” non-script blocks. These special groups of characters |
| * are space, punctuation, symbol, currency, and digit. These special groups are represented with |
| * {@link Collator.ReorderCodes}. Script groups can be intermingled with |
| * these special non-script blocks if those special blocks are explicitly specified in the reordering. |
| * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS} stands for any script that is not explicitly |
| * mentioned in the list of reordering codes given. Anything that is after {@link Collator.ReorderCodes#OTHERS OTHERS} |
| * will go at the very end of the reordering in the order given. |
| * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT} will reset the reordering for this collator |
| * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that |
| * was specified when this collator was created from resource data or from rules. The |
| * {@link Collator.ReorderCodes#DEFAULT DEFAULT} code <b>must</b> be the sole code supplied when it used. If not |
| * that will result in an {@link IllegalArgumentException} being thrown. |
| * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE} will remove any reordering for this collator. |
| * The result of setting no reordering will be to have the DUCET/CLDR reordering used. The |
| * {@link Collator.ReorderCodes#NONE NONE} code <b>must</b> be the sole code supplied when it used. |
| * @param order the reordering codes to apply to this collator; if this is null or an empty array |
| * then this clears any existing reordering |
| * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts) |
| * @see #getReorderCodes |
| * @see Collator#getEquivalentReorderCodes |
| * @stable ICU 4.8 |
| */ |
| @Override |
| public void setReorderCodes(int... order) { |
| checkNotFrozen(); |
| if(order == null ? |
| settings.readOnly().reorderCodes.length == 0 : |
| Arrays.equals(order, settings.readOnly().reorderCodes)) { |
| return; |
| } |
| int length = (order != null) ? order.length : 0; |
| CollationSettings defaultSettings = getDefaultSettings(); |
| if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) { |
| if(settings.readOnly() != defaultSettings) { |
| CollationSettings ownedSettings = getOwnedSettings(); |
| ownedSettings.setReordering(defaultSettings.reorderCodes, |
| defaultSettings.reorderTable); |
| setFastLatinOptions(ownedSettings); |
| } |
| return; |
| } |
| CollationSettings ownedSettings = getOwnedSettings(); |
| if(length == 0) { |
| ownedSettings.resetReordering(); |
| } else { |
| byte[] reorderTable = new byte[256]; |
| data.makeReorderTable(order, reorderTable); |
| ownedSettings.setReordering(order.clone(), reorderTable); |
| } |
| setFastLatinOptions(ownedSettings); |
| } |
| |
| private void setFastLatinOptions(CollationSettings ownedSettings) { |
| ownedSettings.fastLatinOptions = CollationFastLatin.getOptions( |
| data, ownedSettings, ownedSettings.fastLatinPrimaries); |
| } |
| |
| // public getters -------------------------------------------------------- |
| |
| /** |
| * Gets the collation tailoring rules for this RuleBasedCollator. |
| * Equivalent to String getRules(false). |
| * |
| * @return the collation tailoring rules |
| * @see #getRules(boolean) |
| * @stable ICU 2.8 |
| */ |
| public String getRules() { |
| return tailoring.rules; |
| } |
| |
| /** |
| * Returns current rules. |
| * The argument defines whether full rules (root collation + tailored) rules are returned |
| * or just the tailoring. |
| * |
| * <p>The root collation rules are an <i>approximation</i> of the root collator's sort order. |
| * They are almost never used or useful at runtime and can be removed from the data. |
| * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide: |
| * Collation Customization, Building on Existing Locales</a> |
| * |
| * <p>{@link #getRules()} should normally be used instead. |
| * @param fullrules |
| * true if the rules that defines the full set of collation order is required, otherwise false for |
| * returning only the tailored rules |
| * @return the current rules that defines this Collator. |
| * @see #getRules() |
| * @stable ICU 2.6 |
| */ |
| public String getRules(boolean fullrules) { |
| if (!fullrules) { |
| return tailoring.rules; |
| } |
| return CollationLoader.getRootRules() + tailoring.rules; |
| } |
| |
| /** |
| * Get a UnicodeSet that contains all the characters and sequences tailored in this collator. |
| * |
| * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently |
| * than in the root collator. |
| * @stable ICU 2.4 |
| */ |
| @Override |
| public UnicodeSet getTailoredSet() { |
| UnicodeSet tailored = new UnicodeSet(); |
| if(data.base != null) { |
| new TailoredSet(tailored).forData(data); |
| } |
| return tailored; |
| } |
| |
| /** |
| * Gets unicode sets containing contractions and/or expansions of a collator |
| * |
| * @param contractions |
| * if not null, set to contain contractions |
| * @param expansions |
| * if not null, set to contain expansions |
| * @param addPrefixes |
| * add the prefix contextual elements to contractions |
| * @throws Exception |
| * Throws an exception if any errors occurs. |
| * @stable ICU 3.4 |
| */ |
| public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes) |
| throws Exception { |
| if (contractions != null) { |
| contractions.clear(); |
| } |
| if (expansions != null) { |
| expansions.clear(); |
| } |
| new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data); |
| } |
| |
| /** |
| * Adds the contractions that start with character c to the set. |
| * Ignores prefixes. Used by AlphabeticIndex. |
| * @internal |
| * @deprecated This API is ICU internal only. |
| */ |
| void internalAddContractions(int c, UnicodeSet set) { |
| new ContractionsAndExpansions(set, null, null, false).forCodePoint(data, c); |
| } |
| |
| /** |
| * <p> |
| * Get a Collation key for the argument String source from this RuleBasedCollator. |
| * </p> |
| * <p> |
| * General recommendation: <br> |
| * If comparison are to be done to the same String multiple times, it would be more efficient to generate |
| * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If the each |
| * Strings are compared to only once, using the method RuleBasedCollator.compare(String, String) will have a better |
| * performance. |
| * </p> |
| * <p> |
| * See the class documentation for an explanation about CollationKeys. |
| * </p> |
| * |
| * @param source |
| * the text String to be transformed into a collation key. |
| * @return the CollationKey for the given String based on this RuleBasedCollator's collation rules. If the source |
| * String is null, a null CollationKey is returned. |
| * @see CollationKey |
| * @see #compare(String, String) |
| * @see #getRawCollationKey |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public CollationKey getCollationKey(String source) { |
| if (source == null) { |
| return null; |
| } |
| CollationBuffer buffer = null; |
| try { |
| buffer = getCollationBuffer(); |
| return getCollationKey(source, buffer); |
| } finally { |
| releaseCollationBuffer(buffer); |
| } |
| } |
| |
| private CollationKey getCollationKey(String source, CollationBuffer buffer) { |
| buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer); |
| return new CollationKey(source, buffer.rawCollationKey); |
| } |
| |
| /** |
| * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the |
| * result into the user provided argument key. If key has a internal byte array of length that's too small for the |
| * result, the internal byte array will be grown to the exact required size. |
| * |
| * @param source the text String to be transformed into a RawCollationKey |
| * @param key output RawCollationKey to store results |
| * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user |
| * provided key will be returned. |
| * @see #getCollationKey |
| * @see #compare(String, String) |
| * @see RawCollationKey |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public RawCollationKey getRawCollationKey(String source, RawCollationKey key) { |
| if (source == null) { |
| return null; |
| } |
| CollationBuffer buffer = null; |
| try { |
| buffer = getCollationBuffer(); |
| return getRawCollationKey(source, key, buffer); |
| } finally { |
| releaseCollationBuffer(buffer); |
| } |
| } |
| |
| private static final class CollationKeyByteSink extends SortKeyByteSink { |
| CollationKeyByteSink(RawCollationKey key) { |
| super(key.bytes); |
| key_ = key; |
| } |
| |
| @Override |
| protected void AppendBeyondCapacity(byte[] bytes, int start, int n, int length) { |
| // n > 0 && appended_ > capacity_ |
| if (Resize(n, length)) { |
| System.arraycopy(bytes, start, buffer_, length, n); |
| } |
| } |
| |
| @Override |
| protected boolean Resize(int appendCapacity, int length) { |
| int newCapacity = 2 * buffer_.length; |
| int altCapacity = length + 2 * appendCapacity; |
| if (newCapacity < altCapacity) { |
| newCapacity = altCapacity; |
| } |
| if (newCapacity < 200) { |
| newCapacity = 200; |
| } |
| // Do not call key_.ensureCapacity(newCapacity) because we do not |
| // keep key_.size in sync with appended_. |
| // We only set it when we are done. |
| byte[] newBytes = new byte[newCapacity]; |
| System.arraycopy(buffer_, 0, newBytes, 0, length); |
| buffer_ = key_.bytes = newBytes; |
| return true; |
| } |
| |
| private RawCollationKey key_; |
| } |
| |
| private RawCollationKey getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer) { |
| if (key == null) { |
| key = new RawCollationKey(simpleKeyLengthEstimate(source)); |
| } else if (key.bytes == null) { |
| key.bytes = new byte[simpleKeyLengthEstimate(source)]; |
| } |
| CollationKeyByteSink sink = new CollationKeyByteSink(key); |
| writeSortKey(source, sink, buffer); |
| key.size = sink.NumberOfBytesAppended(); |
| return key; |
| } |
| |
| private int simpleKeyLengthEstimate(CharSequence source) { |
| return 2 * source.length() + 10; |
| } |
| |
| private void writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer) { |
| boolean numeric = settings.readOnly().isNumeric(); |
| if(settings.readOnly().dontCheckFCD()) { |
| buffer.leftUTF16CollIter.setText(numeric, s, 0); |
| CollationKeys.writeSortKeyUpToQuaternary( |
| buffer.leftUTF16CollIter, data.compressibleBytes, settings.readOnly(), |
| sink, Collation.PRIMARY_LEVEL, |
| CollationKeys.SIMPLE_LEVEL_FALLBACK, true); |
| } else { |
| buffer.leftFCDUTF16Iter.setText(numeric, s, 0); |
| CollationKeys.writeSortKeyUpToQuaternary( |
| buffer.leftFCDUTF16Iter, data.compressibleBytes, settings.readOnly(), |
| sink, Collation.PRIMARY_LEVEL, |
| CollationKeys.SIMPLE_LEVEL_FALLBACK, true); |
| } |
| if(settings.readOnly().getStrength() == IDENTICAL) { |
| writeIdenticalLevel(s, sink); |
| } |
| sink.Append(Collation.TERMINATOR_BYTE); |
| } |
| |
| private void writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink) { |
| // NFD quick check |
| int nfdQCYesLimit = data.nfcImpl.decompose(s, 0, s.length(), null); |
| sink.Append(Collation.LEVEL_SEPARATOR_BYTE); |
| // Sync the ByteArrayWrapper size with the key length. |
| sink.key_.size = sink.NumberOfBytesAppended(); |
| int prev = 0; |
| if(nfdQCYesLimit != 0) { |
| prev = BOCSU.writeIdenticalLevelRun(prev, s, 0, nfdQCYesLimit, sink.key_); |
| } |
| // Is there non-NFD text? |
| if(nfdQCYesLimit < s.length()) { |
| int destLengthEstimate = s.length() - nfdQCYesLimit; |
| StringBuilder nfd = new StringBuilder(); |
| data.nfcImpl.decompose(s, nfdQCYesLimit, s.length(), nfd, destLengthEstimate); |
| BOCSU.writeIdenticalLevelRun(prev, nfd, 0, nfd.length(), sink.key_); |
| } |
| // Sync the key with the buffer again which got bytes appended and may have been reallocated. |
| sink.setBufferAndAppended(sink.key_.bytes, sink.key_.size); |
| } |
| |
| /** |
| * Returns the CEs for the string. |
| * @param str the string |
| * @internal for tests & tools |
| * @deprecated This API is ICU internal only. |
| */ |
| public long[] internalGetCEs(CharSequence str) { |
| CollationBuffer buffer = null; |
| try { |
| buffer = getCollationBuffer(); |
| boolean numeric = settings.readOnly().isNumeric(); |
| CollationIterator iter; |
| if(settings.readOnly().dontCheckFCD()) { |
| buffer.leftUTF16CollIter.setText(numeric, str, 0); |
| iter = buffer.leftUTF16CollIter; |
| } else { |
| buffer.leftFCDUTF16Iter.setText(numeric, str, 0); |
| iter = buffer.leftFCDUTF16Iter; |
| } |
| int length = iter.fetchCEs() - 1; |
| assert length >= 0 && iter.getCE(length) == Collation.NO_CE; |
| long[] ces = new long[length]; |
| System.arraycopy(iter.getCEs(), 0, ces, 0, length); |
| return ces; |
| } finally { |
| releaseCollationBuffer(buffer); |
| } |
| } |
| |
| /** |
| * Returns this Collator's strength attribute. The strength attribute |
| * determines the minimum level of difference considered significant. |
| * |
| * <p>{@icunote} This can return QUATERNARY strength, which is not supported by the |
| * JDK version. |
| * |
| * <p>See the Collator class description for more details. |
| * |
| * @return this Collator's current strength attribute. |
| * @see #setStrength |
| * @see #PRIMARY |
| * @see #SECONDARY |
| * @see #TERTIARY |
| * @see #QUATERNARY |
| * @see #IDENTICAL |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public int getStrength() { |
| return settings.readOnly().getStrength(); |
| } |
| |
| /** |
| * Returns the decomposition mode of this Collator. The decomposition mode |
| * determines how Unicode composed characters are handled. |
| * |
| * <p>See the Collator class description for more details. |
| * |
| * @return the decomposition mode |
| * @see #setDecomposition |
| * @see #NO_DECOMPOSITION |
| * @see #CANONICAL_DECOMPOSITION |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public int getDecomposition() { |
| return (settings.readOnly().options & CollationSettings.CHECK_FCD) != 0 ? |
| CANONICAL_DECOMPOSITION : NO_DECOMPOSITION; |
| } |
| |
| /** |
| * Return true if an uppercase character is sorted before the corresponding lowercase character. See |
| * setCaseFirst(boolean) for details. |
| * |
| * @see #setUpperCaseFirst |
| * @see #setLowerCaseFirst |
| * @see #isLowerCaseFirst |
| * @see #setCaseFirstDefault |
| * @return true if upper cased characters are sorted before lower cased characters, false otherwise |
| * @stable ICU 2.8 |
| */ |
| public boolean isUpperCaseFirst() { |
| return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST_AND_UPPER_MASK); |
| } |
| |
| /** |
| * Return true if a lowercase character is sorted before the corresponding uppercase character. See |
| * setCaseFirst(boolean) for details. |
| * |
| * @see #setUpperCaseFirst |
| * @see #setLowerCaseFirst |
| * @see #isUpperCaseFirst |
| * @see #setCaseFirstDefault |
| * @return true lower cased characters are sorted before upper cased characters, false otherwise |
| * @stable ICU 2.8 |
| */ |
| public boolean isLowerCaseFirst() { |
| return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST); |
| } |
| |
| /** |
| * Checks if the alternate handling behavior is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true, |
| * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the |
| * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more |
| * details. |
| * |
| * @return true or false |
| * @see #setAlternateHandlingShifted(boolean) |
| * @see #setAlternateHandlingDefault |
| * @stable ICU 2.8 |
| */ |
| public boolean isAlternateHandlingShifted() { |
| return settings.readOnly().getAlternateHandling(); |
| } |
| |
| /** |
| * Checks if case level is set to true. See setCaseLevel(boolean) for details. |
| * |
| * @return the case level mode |
| * @see #setCaseLevelDefault |
| * @see #isCaseLevel |
| * @see #setCaseLevel(boolean) |
| * @stable ICU 2.8 |
| */ |
| public boolean isCaseLevel() { |
| return (settings.readOnly().options & CollationSettings.CASE_LEVEL) != 0; |
| } |
| |
| /** |
| * Checks if French Collation is set to true. See setFrenchCollation(boolean) for details. |
| * |
| * @return true if French Collation is set to true, false otherwise |
| * @see #setFrenchCollation(boolean) |
| * @see #setFrenchCollationDefault |
| * @stable ICU 2.8 |
| */ |
| public boolean isFrenchCollation() { |
| return (settings.readOnly().options & CollationSettings.BACKWARD_SECONDARY) != 0; |
| } |
| |
| /** |
| * Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details. |
| * |
| * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. |
| * Since ICU 50, this attribute is not settable any more via API functions. |
| * Since CLDR 25/ICU 53, explicit quaternary relations are used |
| * to achieve the same Japanese sort order. |
| * |
| * @return false |
| * @see #setHiraganaQuaternaryDefault |
| * @see #setHiraganaQuaternary(boolean) |
| * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. |
| */ |
| @Deprecated |
| public boolean isHiraganaQuaternary() { |
| return false; |
| } |
| |
| /** |
| * {@icu} Gets the variable top value of a Collator. |
| * |
| * @return the variable top primary weight |
| * @see #getMaxVariable |
| * @stable ICU 2.6 |
| */ |
| @Override |
| public int getVariableTop() { |
| return (int)settings.readOnly().variableTop; |
| } |
| |
| /** |
| * Method to retrieve the numeric collation value. When numeric collation is turned on, this Collator generates a |
| * collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER '2' |
| * |
| * @see #setNumericCollation |
| * @see #setNumericCollationDefault |
| * @return true if numeric collation is turned on, false otherwise |
| * @stable ICU 2.8 |
| */ |
| public boolean getNumericCollation() { |
| return (settings.readOnly().options & CollationSettings.NUMERIC) != 0; |
| } |
| |
| /** |
| * Retrieves the reordering codes for this collator. |
| * These reordering codes are a combination of UScript codes and ReorderCodes. |
| * @return a copy of the reordering codes for this collator; |
| * if none are set then returns an empty array |
| * @see #setReorderCodes |
| * @see Collator#getEquivalentReorderCodes |
| * @stable ICU 4.8 |
| */ |
| @Override |
| public int[] getReorderCodes() { |
| return settings.readOnly().reorderCodes.clone(); |
| } |
| |
| // public other methods ------------------------------------------------- |
| |
| /** |
| * {@inheritDoc} |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public boolean equals(Object obj) { |
| if (this == obj) { |
| return true; |
| } |
| if (!super.equals(obj)) { |
| return false; |
| } |
| RuleBasedCollator o = (RuleBasedCollator) obj; |
| if(!settings.readOnly().equals(o.settings.readOnly())) { return false; } |
| if(data == o.data) { return true; } |
| boolean thisIsRoot = data.base == null; |
| boolean otherIsRoot = o.data.base == null; |
| assert(!thisIsRoot || !otherIsRoot); // otherwise their data pointers should be == |
| if(thisIsRoot != otherIsRoot) { return false; } |
| if((thisIsRoot || tailoring.rules.length() != 0) && |
| (otherIsRoot || o.tailoring.rules.length() != 0)) { |
| // Shortcut: If both collators have valid rule strings, then compare those. |
| if(tailoring.rules.equals(o.tailoring.rules)) { return true; } |
| } |
| // Different rule strings can result in the same or equivalent tailoring. |
| // The rule strings are optional in ICU resource bundles, although included by default. |
| // cloneBinary() drops the rule string. |
| UnicodeSet thisTailored = getTailoredSet(); |
| UnicodeSet otherTailored = o.getTailoredSet(); |
| if(!thisTailored.equals(otherTailored)) { return false; } |
| // For completeness, we should compare all of the mappings; |
| // or we should create a list of strings, sort it with one collator, |
| // and check if both collators compare adjacent strings the same |
| // (order & strength, down to quaternary); or similar. |
| // Testing equality of collators seems unusual. |
| return true; |
| } |
| |
| /** |
| * Generates a unique hash code for this RuleBasedCollator. |
| * |
| * @return the unique hash code for this Collator |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public int hashCode() { |
| int h = settings.readOnly().hashCode(); |
| if(data.base == null) { return h; } // root collator |
| // Do not rely on the rule string, see comments in operator==(). |
| UnicodeSet set = getTailoredSet(); |
| UnicodeSetIterator iter = new UnicodeSetIterator(set); |
| while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) { |
| h ^= data.getCE32(iter.codepoint); |
| } |
| return h; |
| } |
| |
| /** |
| * Compares the source text String to the target text String according to the collation rules, strength and |
| * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero |
| * depending on whether the source String is less than, equal to or greater than the target String. See the Collator |
| * class description for an example of use. </p> |
| * <p> |
| * General recommendation: <br> |
| * If comparison are to be done to the same String multiple times, it would be more efficient to generate |
| * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed |
| * performance is critical and object instantiation is to be reduced, further optimization may be achieved by |
| * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method |
| * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey |
| * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key |
| * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String, |
| * String) will have a better performance. |
| * </p> |
| * |
| * @param source |
| * the source text String. |
| * @param target |
| * the target text String. |
| * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source |
| * and target are equal, value is greater than zero if source is greater than target. |
| * @see CollationKey |
| * @see #getCollationKey |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public int compare(String source, String target) { |
| return doCompare(source, target); |
| } |
| |
| /** |
| * Abstract iterator for identical-level string comparisons. |
| * Returns FCD code points and handles temporary switching to NFD. |
| * |
| * <p>As with CollationIterator, |
| * Java NFDIterator instances are partially constructed and cached, |
| * and completed when reset for use. |
| * C++ NFDIterator instances are stack-allocated. |
| */ |
| private static abstract class NFDIterator { |
| /** |
| * Partial constructor, must call reset(). |
| */ |
| NFDIterator() {} |
| final void reset() { |
| index = -1; |
| } |
| |
| /** |
| * Returns the next code point from the internal normalization buffer, |
| * or else the next text code point. |
| * Returns -1 at the end of the text. |
| */ |
| final int nextCodePoint() { |
| if(index >= 0) { |
| if(index == decomp.length()) { |
| index = -1; |
| } else { |
| int c = Character.codePointAt(decomp, index); |
| index += Character.charCount(c); |
| return c; |
| } |
| } |
| return nextRawCodePoint(); |
| } |
| /** |
| * @param nfcImpl |
| * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint() |
| * @return the first code point in c's decomposition, |
| * or c itself if it was decomposed already or if it does not decompose |
| */ |
| final int nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c) { |
| if(index >= 0) { return c; } |
| decomp = nfcImpl.getDecomposition(c); |
| if(decomp == null) { return c; } |
| c = Character.codePointAt(decomp, 0); |
| index = Character.charCount(c); |
| return c; |
| } |
| |
| /** |
| * Returns the next text code point in FCD order. |
| * Returns -1 at the end of the text. |
| */ |
| protected abstract int nextRawCodePoint(); |
| |
| private String decomp; |
| private int index; |
| } |
| |
| private static class UTF16NFDIterator extends NFDIterator { |
| UTF16NFDIterator() {} |
| void setText(CharSequence seq, int start) { |
| reset(); |
| s = seq; |
| pos = start; |
| } |
| |
| @Override |
| protected int nextRawCodePoint() { |
| if(pos == s.length()) { return Collation.SENTINEL_CP; } |
| int c = Character.codePointAt(s, pos); |
| pos += Character.charCount(c); |
| return c; |
| } |
| |
| protected CharSequence s; |
| protected int pos; |
| } |
| |
| private static final class FCDUTF16NFDIterator extends UTF16NFDIterator { |
| FCDUTF16NFDIterator() {} |
| void setText(Normalizer2Impl nfcImpl, CharSequence seq, int start) { |
| reset(); |
| int spanLimit = nfcImpl.makeFCD(seq, start, seq.length(), null); |
| if(spanLimit == seq.length()) { |
| s = seq; |
| pos = start; |
| } else { |
| if(str == null) { |
| str = new StringBuilder(); |
| } else { |
| str.setLength(0); |
| } |
| str.append(seq, start, spanLimit); |
| ReorderingBuffer buffer = new ReorderingBuffer(nfcImpl, str, seq.length() - start); |
| nfcImpl.makeFCD(seq, spanLimit, seq.length(), buffer); |
| s = str; |
| pos = 0; |
| } |
| } |
| |
| private StringBuilder str; |
| } |
| |
| private static final int compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right) { |
| for(;;) { |
| // Fetch the next FCD code point from each string. |
| int leftCp = left.nextCodePoint(); |
| int rightCp = right.nextCodePoint(); |
| if(leftCp == rightCp) { |
| if(leftCp < 0) { break; } |
| continue; |
| } |
| // If they are different, then decompose each and compare again. |
| if(leftCp < 0) { |
| leftCp = -2; // end of string |
| } else if(leftCp == 0xfffe) { |
| leftCp = -1; // U+FFFE: merge separator |
| } else { |
| leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp); |
| } |
| if(rightCp < 0) { |
| rightCp = -2; // end of string |
| } else if(rightCp == 0xfffe) { |
| rightCp = -1; // U+FFFE: merge separator |
| } else { |
| rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp); |
| } |
| if(leftCp < rightCp) { return Collation.LESS; } |
| if(leftCp > rightCp) { return Collation.GREATER; } |
| } |
| return Collation.EQUAL; |
| } |
| |
| /** |
| * Compares two CharSequences. |
| * @internal |
| * @deprecated This API is ICU internal only. |
| */ |
| @Override |
| protected int doCompare(CharSequence left, CharSequence right) { |
| if(left == right) { |
| return Collation.EQUAL; |
| } |
| |
| // Identical-prefix test. |
| int equalPrefixLength = 0; |
| for(;;) { |
| if(equalPrefixLength == left.length()) { |
| if(equalPrefixLength == right.length()) { return Collation.EQUAL; } |
| break; |
| } else if(equalPrefixLength == right.length() || |
| left.charAt(equalPrefixLength) != right.charAt(equalPrefixLength)) { |
| break; |
| } |
| ++equalPrefixLength; |
| } |
| |
| CollationSettings roSettings = settings.readOnly(); |
| boolean numeric = roSettings.isNumeric(); |
| if(equalPrefixLength > 0) { |
| if((equalPrefixLength != left.length() && |
| data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) || |
| (equalPrefixLength != right.length() && |
| data.isUnsafeBackward(right.charAt(equalPrefixLength), numeric))) { |
| // Identical prefix: Back up to the start of a contraction or reordering sequence. |
| while(--equalPrefixLength > 0 && |
| data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) {} |
| } |
| // Notes: |
| // - A longer string can compare equal to a prefix of it if only ignorables follow. |
| // - With a backward level, a longer string can compare less-than a prefix of it. |
| |
| // Pass the actual start of each string into the CollationIterators, |
| // plus the equalPrefixLength position, |
| // so that prefix matches back into the equal prefix work. |
| } |
| |
| int result; |
| int fastLatinOptions = roSettings.fastLatinOptions; |
| if(fastLatinOptions >= 0 && |
| (equalPrefixLength == left.length() || |
| left.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX) && |
| (equalPrefixLength == right.length() || |
| right.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX)) { |
| result = CollationFastLatin.compareUTF16(data.fastLatinTable, |
| roSettings.fastLatinPrimaries, |
| fastLatinOptions, |
| left, right, equalPrefixLength); |
| } else { |
| result = CollationFastLatin.BAIL_OUT_RESULT; |
| } |
| |
| if(result == CollationFastLatin.BAIL_OUT_RESULT) { |
| CollationBuffer buffer = null; |
| try { |
| buffer = getCollationBuffer(); |
| if(roSettings.dontCheckFCD()) { |
| buffer.leftUTF16CollIter.setText(numeric, left, equalPrefixLength); |
| buffer.rightUTF16CollIter.setText(numeric, right, equalPrefixLength); |
| result = CollationCompare.compareUpToQuaternary( |
| buffer.leftUTF16CollIter, buffer.rightUTF16CollIter, roSettings); |
| } else { |
| buffer.leftFCDUTF16Iter.setText(numeric, left, equalPrefixLength); |
| buffer.rightFCDUTF16Iter.setText(numeric, right, equalPrefixLength); |
| result = CollationCompare.compareUpToQuaternary( |
| buffer.leftFCDUTF16Iter, buffer.rightFCDUTF16Iter, roSettings); |
| } |
| } finally { |
| releaseCollationBuffer(buffer); |
| } |
| } |
| if(result != Collation.EQUAL || roSettings.getStrength() < Collator.IDENTICAL) { |
| return result; |
| } |
| |
| CollationBuffer buffer = null; |
| try { |
| buffer = getCollationBuffer(); |
| // Compare identical level. |
| Normalizer2Impl nfcImpl = data.nfcImpl; |
| if(roSettings.dontCheckFCD()) { |
| buffer.leftUTF16NFDIter.setText(left, equalPrefixLength); |
| buffer.rightUTF16NFDIter.setText(right, equalPrefixLength); |
| return compareNFDIter(nfcImpl, buffer.leftUTF16NFDIter, buffer.rightUTF16NFDIter); |
| } else { |
| buffer.leftFCDUTF16NFDIter.setText(nfcImpl, left, equalPrefixLength); |
| buffer.rightFCDUTF16NFDIter.setText(nfcImpl, right, equalPrefixLength); |
| return compareNFDIter(nfcImpl, buffer.leftFCDUTF16NFDIter, buffer.rightFCDUTF16NFDIter); |
| } |
| } finally { |
| releaseCollationBuffer(buffer); |
| } |
| } |
| |
| // package private constructors ------------------------------------------ |
| |
| RuleBasedCollator(CollationTailoring t, ULocale vl) { |
| data = t.data; |
| settings = t.settings.clone(); |
| tailoring = t; |
| validLocale = vl; |
| actualLocaleIsSameAsValid = false; |
| } |
| |
| private void adoptTailoring(CollationTailoring t) { |
| assert(settings == null && data == null && tailoring == null); |
| data = t.data; |
| settings = t.settings.clone(); |
| tailoring = t; |
| validLocale = t.actualLocale; |
| actualLocaleIsSameAsValid = false; |
| } |
| |
| // package private methods ----------------------------------------------- |
| |
| /** |
| * Tests whether a character is "unsafe" for use as a collation starting point. |
| * |
| * @param c code point or code unit |
| * @return true if c is unsafe |
| * @see CollationElementIterator#setOffset(int) |
| */ |
| final boolean isUnsafe(int c) { |
| return data.isUnsafeBackward(c, settings.readOnly().isNumeric()); |
| } |
| |
| /** |
| * Frozen state of the collator. |
| */ |
| private Lock frozenLock; |
| |
| private static final class CollationBuffer { |
| private CollationBuffer(CollationData data) { |
| leftUTF16CollIter = new UTF16CollationIterator(data); |
| rightUTF16CollIter = new UTF16CollationIterator(data); |
| leftFCDUTF16Iter = new FCDUTF16CollationIterator(data); |
| rightFCDUTF16Iter = new FCDUTF16CollationIterator(data); |
| leftUTF16NFDIter = new UTF16NFDIterator(); |
| rightUTF16NFDIter = new UTF16NFDIterator(); |
| leftFCDUTF16NFDIter = new FCDUTF16NFDIterator(); |
| rightFCDUTF16NFDIter = new FCDUTF16NFDIterator(); |
| } |
| |
| UTF16CollationIterator leftUTF16CollIter; |
| UTF16CollationIterator rightUTF16CollIter; |
| FCDUTF16CollationIterator leftFCDUTF16Iter; |
| FCDUTF16CollationIterator rightFCDUTF16Iter; |
| |
| UTF16NFDIterator leftUTF16NFDIter; |
| UTF16NFDIterator rightUTF16NFDIter; |
| FCDUTF16NFDIterator leftFCDUTF16NFDIter; |
| FCDUTF16NFDIterator rightFCDUTF16NFDIter; |
| |
| RawCollationKey rawCollationKey; |
| } |
| |
| /** |
| * Get the version of this collator object. |
| * |
| * @return the version object associated with this collator |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public VersionInfo getVersion() { |
| VersionInfo version = tailoring.version; |
| int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor(); |
| return VersionInfo.getInstance( |
| version.getMajor() + (rtVersion << 4) + (rtVersion >> 4), |
| version.getMinor(), version.getMilli(), version.getMicro()); |
| } |
| |
| /** |
| * Get the UCA version of this collator object. |
| * |
| * @return the version object associated with this collator |
| * @stable ICU 2.8 |
| */ |
| @Override |
| public VersionInfo getUCAVersion() { |
| VersionInfo v = getVersion(); |
| // Note: This is tied to how the current implementation encodes the UCA version |
| // in the overall getVersion(). |
| // Alternatively, we could load the root collator and get at lower-level data from there. |
| // Either way, it will reflect the input collator's UCA version only |
| // if it is a known implementation. |
| // (C++ comment) It would be cleaner to make this a virtual Collator method. |
| // (In Java, it is virtual.) |
| return VersionInfo.getInstance(v.getMinor() >> 3, v.getMinor() & 7, v.getMilli() >> 6, 0); |
| } |
| |
| private CollationBuffer collationBuffer; |
| |
| private final CollationBuffer getCollationBuffer() { |
| if (isFrozen()) { |
| frozenLock.lock(); |
| } else if (collationBuffer == null) { |
| collationBuffer = new CollationBuffer(data); |
| } |
| return collationBuffer; |
| } |
| |
| private final void releaseCollationBuffer(CollationBuffer buffer) { |
| if (isFrozen()) { |
| frozenLock.unlock(); |
| } |
| } |
| |
| /** |
| * {@inheritDoc} |
| * @draft ICU 53 (retain) |
| * @provisional This API might change or be removed in a future release. |
| */ |
| @Override |
| public ULocale getLocale(ULocale.Type type) { |
| if (type == ULocale.ACTUAL_LOCALE) { |
| return actualLocaleIsSameAsValid ? validLocale : tailoring.actualLocale; |
| } else if(type == ULocale.VALID_LOCALE) { |
| return validLocale; |
| } else { |
| throw new IllegalArgumentException("unknown ULocale.Type " + type); |
| } |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| void setLocale(ULocale valid, ULocale actual) { |
| // This method is called |
| // by other protected functions that checks and makes sure that |
| // valid and actual are not null before passing |
| assert (valid == null) == (actual == null); |
| // Another check we could do is that the actual locale is at |
| // the same level or less specific than the valid locale. |
| // TODO: Starting with Java 7, use Objects.equals(a, b). |
| if(Utility.objectEquals(actual, tailoring.actualLocale)) { |
| actualLocaleIsSameAsValid = false; |
| } else { |
| assert(Utility.objectEquals(actual, valid)); |
| actualLocaleIsSameAsValid = true; |
| } |
| // Do not modify tailoring.actualLocale: |
| // We cannot be sure that that would be thread-safe. |
| validLocale = valid; |
| } |
| |
| CollationData data; |
| SharedObject.Reference<CollationSettings> settings; // reference-counted |
| CollationTailoring tailoring; // C++: reference-counted |
| private ULocale validLocale; |
| // Note: No need in Java to track which attributes have been set explicitly. |
| // int or EnumSet explicitlySetAttributes; |
| |
| private boolean actualLocaleIsSameAsValid; |
| } |