| // © 2018 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html#License |
| |
| package com.ibm.icu.lang; |
| |
| import com.ibm.icu.impl.CharacterPropertiesImpl; |
| import com.ibm.icu.text.UnicodeSet; |
| import com.ibm.icu.util.CodePointMap; |
| import com.ibm.icu.util.CodePointTrie; |
| import com.ibm.icu.util.MutableCodePointTrie; |
| |
| /** |
| * Sets and maps for Unicode properties. |
| * The methods here return an object per property: |
| * A set for each ICU-supported binary property with all code points for which the property is true. |
| * A map for each ICU-supported enumerated/catalog/int-valued property |
| * which maps all Unicode code points to their values for that property. |
| * |
| * <p>For details see the method descriptions. |
| * For lookup of property values by code point see class {@link UCharacter}. |
| * |
| * @draft ICU 63 |
| * @provisional This API might change or be removed in a future release. |
| */ |
| public final class CharacterProperties { |
| private CharacterProperties() {} // all-static |
| |
| private static final UnicodeSet sets[] = new UnicodeSet[UProperty.BINARY_LIMIT]; |
| private static final CodePointMap maps[] = new CodePointMap[UProperty.INT_LIMIT - UProperty.INT_START]; |
| |
| private static UnicodeSet makeSet(int property) { |
| UnicodeSet set = new UnicodeSet(); |
| UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(property); |
| int numRanges = inclusions.getRangeCount(); |
| int startHasProperty = -1; |
| |
| for (int i = 0; i < numRanges; ++i) { |
| int rangeEnd = inclusions.getRangeEnd(i); |
| for (int c = inclusions.getRangeStart(i); c <= rangeEnd; ++c) { |
| // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch. |
| if (UCharacter.hasBinaryProperty(c, property)) { |
| if (startHasProperty < 0) { |
| // Transition from false to true. |
| startHasProperty = c; |
| } |
| } else if (startHasProperty >= 0) { |
| // Transition from true to false. |
| set.add(startHasProperty, c - 1); |
| startHasProperty = -1; |
| } |
| } |
| } |
| if (startHasProperty >= 0) { |
| set.add(startHasProperty, 0x10FFFF); |
| } |
| |
| return set.freeze(); |
| } |
| |
| private static CodePointMap makeMap(int property) { |
| int nullValue = property == UProperty.SCRIPT ? UScript.UNKNOWN : 0; |
| MutableCodePointTrie mutableTrie = new MutableCodePointTrie(nullValue, nullValue); |
| UnicodeSet inclusions = CharacterPropertiesImpl.getInclusionsForProperty(property); |
| int numRanges = inclusions.getRangeCount(); |
| int start = 0; |
| int value = nullValue; |
| |
| for (int i = 0; i < numRanges; ++i) { |
| int rangeEnd = inclusions.getRangeEnd(i); |
| for (int c = inclusions.getRangeStart(i); c <= rangeEnd; ++c) { |
| // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. |
| int nextValue = UCharacter.getIntPropertyValue(c, property); |
| if (value != nextValue) { |
| if (value != nullValue) { |
| mutableTrie.setRange(start, c - 1, value); |
| } |
| start = c; |
| value = nextValue; |
| } |
| } |
| } |
| if (value != 0) { |
| mutableTrie.setRange(start, 0x10FFFF, value); |
| } |
| |
| CodePointTrie.Type type; |
| if (property == UProperty.BIDI_CLASS || property == UProperty.GENERAL_CATEGORY) { |
| type = CodePointTrie.Type.FAST; |
| } else { |
| type = CodePointTrie.Type.SMALL; |
| } |
| CodePointTrie.ValueWidth valueWidth; |
| // TODO: UCharacterProperty.IntProperty |
| int max = UCharacter.getIntPropertyMaxValue(property); |
| if (max <= 0xff) { |
| valueWidth = CodePointTrie.ValueWidth.BITS_8; |
| } else if (max <= 0xffff) { |
| valueWidth = CodePointTrie.ValueWidth.BITS_16; |
| } else { |
| valueWidth = CodePointTrie.ValueWidth.BITS_32; |
| } |
| return mutableTrie.buildImmutable(type, valueWidth); |
| } |
| |
| /** |
| * Returns a frozen UnicodeSet for a binary property. |
| * Throws an exception if the property number is not one for a binary property. |
| * |
| * <p>The returned set contains all code points for which the property is true. |
| * |
| * @param property {@link UProperty#BINARY_START}..{@link UProperty#BINARY_LIMIT}-1 |
| * @return the property as a set |
| * @see UProperty |
| * @see UCharacter#hasBinaryProperty |
| * @draft ICU 63 |
| * @provisional This API might change or be removed in a future release. |
| */ |
| public static final UnicodeSet getBinaryPropertySet(int property) { |
| if (property < 0 || UProperty.BINARY_LIMIT <= property) { |
| throw new IllegalArgumentException("" + property + |
| " is not a constant for a UProperty binary property"); |
| } |
| synchronized(sets) { |
| UnicodeSet set = sets[property]; |
| if (set == null) { |
| sets[property] = set = makeSet(property); |
| } |
| return set; |
| } |
| } |
| |
| /** |
| * Returns an immutable CodePointMap for an enumerated/catalog/int-valued property. |
| * Throws an exception if the property number is not one for an "int property". |
| * |
| * <p>The returned object maps all Unicode code points to their values for that property. |
| * For documentation of the integer values see {@link UCharacter#getIntPropertyValue(int, int)}. |
| * |
| * <p>The actual type of the returned object differs between properties |
| * and may change over time. |
| * |
| * @param property {@link UProperty#INT_START}..{@link UProperty#INT_LIMIT}-1 |
| * @return the property as a map |
| * @see UProperty |
| * @see UCharacter#getIntPropertyValue |
| * @draft ICU 63 |
| * @provisional This API might change or be removed in a future release. |
| */ |
| public static final CodePointMap getIntPropertyMap(int property) { |
| if (property < UProperty.INT_START || UProperty.INT_LIMIT <= property) { |
| throw new IllegalArgumentException("" + property + |
| " is not a constant for a UProperty int property"); |
| } |
| synchronized(maps) { |
| CodePointMap map = maps[property - UProperty.INT_START]; |
| if (map == null) { |
| maps[property - UProperty.INT_START] = map = makeMap(property); |
| } |
| return map; |
| } |
| } |
| } |