| /* |
| ******************************************************************************* |
| * Copyright (C) 1996-2014, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| */ |
| package com.ibm.icu.dev.util; |
| |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.LinkedHashSet; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| import java.util.TreeMap; |
| import java.util.TreeSet; |
| |
| import com.ibm.icu.impl.Utility; |
| import com.ibm.icu.text.StringTransform; |
| import com.ibm.icu.text.UTF16; |
| import com.ibm.icu.text.UnicodeSet; |
| import com.ibm.icu.text.UnicodeSetIterator; |
| import com.ibm.icu.util.Freezable; |
| |
| /** |
| * Class for mapping Unicode characters and strings to values, optimized for single code points, |
| * where ranges of code points have the same value. |
| * Much smaller storage than using HashMap, and much faster and more compact than |
| * a list of UnicodeSets. The API design mimics Map<String,T> but can't extend it due to some |
| * necessary changes (much as UnicodeSet mimics Set<String>). Note that nulls are not permitted as values; |
| * that is, a put(x,null) is the same as remove(x).<br> |
| * At this point "" is also not allowed as a key, although that may change. |
| * @author markdavis |
| */ |
| |
| public final class UnicodeMap<T> implements Cloneable, Freezable, StringTransform, Iterable<String> { |
| /** |
| * For serialization |
| */ |
| //private static final long serialVersionUID = -6540936876295804105L; |
| static final boolean ASSERTIONS = false; |
| static final long GROWTH_PERCENT = 200; // 100 is no growth! |
| static final long GROWTH_GAP = 10; // extra bump! |
| |
| private int length; |
| // two parallel arrays to save memory. Wish Java had structs. |
| private int[] transitions; |
| /* package private */ T[] values; |
| |
| private LinkedHashSet<T> availableValues = new LinkedHashSet<T>(); |
| private transient boolean staleAvailableValues; |
| |
| private transient boolean errorOnReset; |
| private volatile transient boolean locked; |
| private int lastIndex; |
| private Map<String,T> stringMap; |
| |
| { clear(); } |
| |
| public UnicodeMap<T> clear() { |
| if (locked) { |
| throw new UnsupportedOperationException("Attempt to modify locked object"); |
| } |
| length = 2; |
| transitions = new int[] {0,0x110000,0,0,0,0,0,0,0,0}; |
| values = (T[]) new Object[10]; |
| |
| availableValues.clear(); |
| staleAvailableValues = false; |
| |
| errorOnReset = false; |
| lastIndex = 0; |
| stringMap = null; |
| return this; |
| } |
| |
| /* Boilerplate */ |
| public boolean equals(Object other) { |
| if (other == null) return false; |
| try { |
| UnicodeMap that = (UnicodeMap) other; |
| if (length != that.length) return false; |
| for (int i = 0; i < length-1; ++i) { |
| if (transitions[i] != that.transitions[i]) return false; |
| if (!areEqual(values[i], that.values[i])) return false; |
| } |
| return true; |
| } catch (ClassCastException e) { |
| return false; |
| } |
| } |
| |
| public static boolean areEqual(Object a , Object b) { |
| if (a == b) return true; |
| if (a == null || b == null) return false; |
| return a.equals(b); |
| } |
| |
| public int hashCode() { |
| int result = length; |
| // TODO might want to abbreviate this for speed. |
| for (int i = 0; i < length-1; ++i) { |
| result = 37*result + transitions[i]; |
| result = 37*result + values[i].hashCode(); |
| } |
| return result; |
| } |
| |
| /** |
| * Standard clone. Warning, as with Collections, does not do deep clone. |
| */ |
| public UnicodeMap<T> cloneAsThawed() { |
| UnicodeMap<T> that = new UnicodeMap<T>(); |
| that.length = length; |
| that.transitions = (int[]) transitions.clone(); |
| that.values = (T[]) values.clone(); |
| that.availableValues = new LinkedHashSet<T>(availableValues); |
| that.locked = false; |
| return that; |
| } |
| |
| /* for internal consistency checking */ |
| |
| void _checkInvariants() { |
| if (length < 2 |
| || length > transitions.length |
| || transitions.length != values.length) { |
| throw new IllegalArgumentException("Invariant failed: Lengths bad"); |
| } |
| for (int i = 1; i < length-1; ++i) { |
| if (areEqual(values[i-1], values[i])) { |
| throw new IllegalArgumentException("Invariant failed: values shared at " |
| + "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">" |
| + "\t" + Utility.hex(i) + ": <" + values[i] + ">" |
| ); |
| } |
| } |
| if (transitions[0] != 0 || transitions[length-1] != 0x110000) { |
| throw new IllegalArgumentException("Invariant failed: bounds set wrong"); |
| } |
| for (int i = 1; i < length-1; ++i) { |
| if (transitions[i-1] >= transitions[i]) { |
| throw new IllegalArgumentException("Invariant failed: not monotonic" |
| + "\t" + Utility.hex(i-1) + ": " + transitions[i-1] |
| + "\t" + Utility.hex(i) + ": " + transitions[i] |
| ); |
| } |
| } |
| } |
| |
| /** |
| * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1] |
| * Assumes that 0 <= codepoint <= 0x10FFFF |
| * @param codepoint |
| * @return the index |
| */ |
| private int _findIndex(int c) { |
| int lo = 0; |
| int hi = length - 1; |
| int i = (lo + hi) >>> 1; |
| // invariant: c >= list[lo] |
| // invariant: c < list[hi] |
| while (i != lo) { |
| if (c < transitions[i]) { |
| hi = i; |
| } else { |
| lo = i; |
| } |
| i = (lo + hi) >>> 1; |
| } |
| if (ASSERTIONS) _checkFind(c, lo); |
| return lo; |
| } |
| |
| private void _checkFind(int codepoint, int value) { |
| int other = __findIndex(codepoint); |
| if (other != value) { |
| throw new IllegalArgumentException("Invariant failed: binary search" |
| + "\t" + Utility.hex(codepoint) + ": " + value |
| + "\tshould be: " + other); |
| } |
| } |
| |
| private int __findIndex(int codepoint) { |
| for (int i = length-1; i > 0; --i) { |
| if (transitions[i] <= codepoint) return i; |
| } |
| return 0; |
| } |
| |
| /* |
| * Try indexed lookup |
| |
| static final int SHIFT = 8; |
| int[] starts = new int[0x10FFFF>>SHIFT]; // lowest transition index where codepoint>>x can be found |
| boolean startsValid = false; |
| private int findIndex(int codepoint) { |
| if (!startsValid) { |
| int start = 0; |
| for (int i = 1; i < length; ++i) { |
| |
| } |
| } |
| for (int i = length-1; i > 0; --i) { |
| if (transitions[i] <= codepoint) return i; |
| } |
| return 0; |
| } |
| */ |
| |
| /** |
| * Remove the items from index through index+count-1. |
| * Logically reduces the size of the internal arrays. |
| * @param index |
| * @param count |
| */ |
| private void _removeAt(int index, int count) { |
| for (int i = index + count; i < length; ++i) { |
| transitions[i-count] = transitions[i]; |
| values[i-count] = values[i]; |
| } |
| length -= count; |
| } |
| /** |
| * Add a gap from index to index+count-1. |
| * The values there are undefined, and must be set. |
| * Logically grows arrays to accomodate. Actual growth is limited |
| * @param index |
| * @param count |
| */ |
| private void _insertGapAt(int index, int count) { |
| int newLength = length + count; |
| int[] oldtransitions = transitions; |
| T[] oldvalues = values; |
| if (newLength > transitions.length) { |
| int allocation = (int) (GROWTH_GAP + (newLength * GROWTH_PERCENT) / 100); |
| transitions = new int[allocation]; |
| values = (T[]) new Object[allocation]; |
| for (int i = 0; i < index; ++i) { |
| transitions[i] = oldtransitions[i]; |
| values[i] = oldvalues[i]; |
| } |
| } |
| for (int i = length - 1; i >= index; --i) { |
| transitions[i+count] = oldtransitions[i]; |
| values[i+count] = oldvalues[i]; |
| } |
| length = newLength; |
| } |
| |
| /** |
| * Associates code point with value. Removes any previous association. |
| * All code that calls this MUST check for frozen first! |
| * @param codepoint |
| * @param value |
| * @return this, for chaining |
| */ |
| private UnicodeMap _put(int codepoint, T value) { |
| // Warning: baseIndex is an invariant; must |
| // be defined such that transitions[baseIndex] < codepoint |
| // at end of this routine. |
| int baseIndex; |
| if (transitions[lastIndex] <= codepoint |
| && codepoint < transitions[lastIndex+1]) { |
| baseIndex = lastIndex; |
| } else { |
| baseIndex = _findIndex(codepoint); |
| } |
| int limitIndex = baseIndex + 1; |
| // cases are (a) value is already set |
| if (areEqual(values[baseIndex], value)) return this; |
| if (locked) { |
| throw new UnsupportedOperationException("Attempt to modify locked object"); |
| } |
| if (errorOnReset && values[baseIndex] != null) { |
| throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint) |
| + " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value); |
| } |
| |
| // adjust the available values |
| staleAvailableValues = true; |
| availableValues.add(value); // add if not there already |
| |
| int baseCP = transitions[baseIndex]; |
| int limitCP = transitions[limitIndex]; |
| // we now start walking through the difference case, |
| // based on whether we are at the start or end of range |
| // and whether the range is a single character or multiple |
| |
| if (baseCP == codepoint) { |
| // CASE: At very start of range |
| boolean connectsWithPrevious = |
| baseIndex != 0 && areEqual(value, values[baseIndex-1]); |
| |
| if (limitCP == codepoint + 1) { |
| // CASE: Single codepoint range |
| boolean connectsWithFollowing = |
| baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1 |
| |
| if (connectsWithPrevious) { |
| // A1a connects with previous & following, so remove index |
| if (connectsWithFollowing) { |
| _removeAt(baseIndex, 2); |
| } else { |
| _removeAt(baseIndex, 1); // extend previous |
| } |
| --baseIndex; // fix up |
| } else if (connectsWithFollowing) { |
| _removeAt(baseIndex, 1); // extend following backwards |
| transitions[baseIndex] = codepoint; |
| } else { |
| // doesn't connect on either side, just reset |
| values[baseIndex] = value; |
| } |
| } else if (connectsWithPrevious) { |
| // A.1: start of multi codepoint range |
| // if connects |
| ++transitions[baseIndex]; // extend previous |
| } else { |
| // otherwise insert new transition |
| transitions[baseIndex] = codepoint+1; // fix following range |
| _insertGapAt(baseIndex, 1); |
| values[baseIndex] = value; |
| transitions[baseIndex] = codepoint; |
| } |
| } else if (limitCP == codepoint + 1) { |
| // CASE: at end of range |
| // if connects, just back up range |
| boolean connectsWithFollowing = |
| baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1 |
| |
| if (connectsWithFollowing) { |
| --transitions[limitIndex]; |
| return this; |
| } else { |
| _insertGapAt(limitIndex, 1); |
| transitions[limitIndex] = codepoint; |
| values[limitIndex] = value; |
| } |
| } else { |
| // CASE: in middle of range |
| // insert gap, then set the new range |
| _insertGapAt(++baseIndex,2); |
| transitions[baseIndex] = codepoint; |
| values[baseIndex] = value; |
| transitions[baseIndex+1] = codepoint + 1; |
| values[baseIndex+1] = values[baseIndex-1]; // copy lower range values |
| } |
| lastIndex = baseIndex; // store for next time |
| return this; |
| } |
| |
| private UnicodeMap _putAll(int startCodePoint, int endCodePoint, T value) { |
| // TODO optimize |
| for (int i = startCodePoint; i <= endCodePoint; ++i) { |
| _put(i, value); |
| if (ASSERTIONS) _checkInvariants(); |
| } |
| return this; |
| } |
| |
| /** |
| * Sets the codepoint value. |
| * @param codepoint |
| * @param value |
| * @return this (for chaining) |
| */ |
| public UnicodeMap<T> put(int codepoint, T value) { |
| if (codepoint < 0 || codepoint > 0x10FFFF) { |
| throw new IllegalArgumentException("Codepoint out of range: " + codepoint); |
| } |
| _put(codepoint, value); |
| if (ASSERTIONS) _checkInvariants(); |
| return this; |
| } |
| |
| /** |
| * Sets the codepoint value. |
| * @param codepoint |
| * @param value |
| * @return this (for chaining) |
| */ |
| public UnicodeMap<T> put(String string, T value) { |
| int v = UnicodeSet.getSingleCodePoint(string); |
| if (v == Integer.MAX_VALUE) { |
| if (locked) { |
| throw new UnsupportedOperationException("Attempt to modify locked object"); |
| } |
| if (value != null) { |
| if (stringMap == null) { |
| stringMap = new TreeMap<String,T>(); |
| } |
| stringMap.put(string, value); |
| staleAvailableValues = true; |
| } else if (stringMap != null) { |
| if (stringMap.remove(string) != null) { |
| staleAvailableValues = true; |
| } |
| } |
| return this; |
| } |
| return put(v, value); |
| } |
| |
| /** |
| * Adds bunch o' codepoints; otherwise like put. |
| * @param codepoints |
| * @param value |
| * @return this (for chaining) |
| */ |
| public UnicodeMap<T> putAll(UnicodeSet codepoints, T value) { |
| UnicodeSetIterator it = new UnicodeSetIterator(codepoints); |
| while (it.nextRange()) { |
| if (it.string == null) { |
| _putAll(it.codepoint, it.codepointEnd, value); |
| } else { |
| put(it.string, value); |
| } |
| } |
| return this; |
| } |
| |
| /** |
| * Adds bunch o' codepoints; otherwise like add. |
| * @param startCodePoint |
| * @param endCodePoint |
| * @param value |
| * @return this (for chaining) |
| */ |
| public UnicodeMap<T> putAll(int startCodePoint, int endCodePoint, T value) { |
| if (locked) { |
| throw new UnsupportedOperationException("Attempt to modify locked object"); |
| } |
| if (startCodePoint < 0 || endCodePoint > 0x10FFFF) { |
| throw new IllegalArgumentException("Codepoint out of range: " |
| + Utility.hex(startCodePoint) + ".." + Utility.hex(endCodePoint)); |
| } |
| return _putAll(startCodePoint, endCodePoint, value); |
| } |
| |
| /** |
| * Add all the (main) values from a UnicodeMap |
| * @param unicodeMap the property to add to the map |
| * @return this (for chaining) |
| */ |
| public UnicodeMap<T> putAll(UnicodeMap<T> unicodeMap) { |
| for (int i = 0; i < unicodeMap.length; ++i) { |
| T value = unicodeMap.values[i]; |
| if (value != null) { |
| _putAll(unicodeMap.transitions[i], unicodeMap.transitions[i+1]-1, value); |
| } |
| if (ASSERTIONS) _checkInvariants(); |
| } |
| if (unicodeMap.stringMap != null && !unicodeMap.stringMap.isEmpty()) { |
| if (stringMap == null) { |
| stringMap = new TreeMap<String,T>(); |
| } |
| stringMap.putAll(unicodeMap.stringMap); |
| } |
| return this; |
| } |
| |
| /** |
| * Add all the (main) values from a Unicode property |
| * @param prop the property to add to the map |
| * @return this (for chaining) |
| */ |
| public UnicodeMap<T> putAllFiltered(UnicodeMap<T> prop, UnicodeSet filter) { |
| // TODO optimize |
| for (UnicodeSetIterator it = new UnicodeSetIterator(filter); it.next();) { |
| if (it.codepoint != UnicodeSetIterator.IS_STRING) { |
| T value = prop.getValue(it.codepoint); |
| if (value != null) { |
| _put(it.codepoint, value); |
| } |
| } |
| } |
| // now do the strings |
| for (String key : filter.strings()) { |
| T value = prop.get(key); |
| if (value != null) { |
| put(key, value); |
| } |
| } |
| return this; |
| } |
| |
| /** |
| * Set the currently unmapped Unicode code points to the given value. |
| * @param value the value to set |
| * @return this (for chaining) |
| */ |
| public UnicodeMap<T> setMissing(T value) { |
| // fast path, if value not yet present |
| if (!getAvailableValues().contains(value)) { |
| staleAvailableValues = true; |
| availableValues.add(value); |
| for (int i = 0; i < length; ++i) { |
| if (values[i] == null) values[i] = value; |
| } |
| return this; |
| } else { |
| return putAll(keySet(null), value); |
| } |
| } |
| /** |
| * Returns the keyset consisting of all the keys that would produce the given value. Deposits into |
| * result if it is not null. Remember to clear if you just want |
| * the new values. |
| */ |
| public UnicodeSet keySet(T value, UnicodeSet result) { |
| if (result == null) result = new UnicodeSet(); |
| for (int i = 0; i < length - 1; ++i) { |
| if (areEqual(value, values[i])) { |
| result.add(transitions[i], transitions[i+1]-1); |
| } |
| } |
| if (value != null && stringMap != null) { |
| for (String key : stringMap.keySet()) { |
| T newValue = stringMap.get(key); |
| if (value.equals(newValue)) { |
| result.add((String)key); |
| } |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Returns the keyset consisting of all the keys that would produce the given value. |
| * the new values. |
| */ |
| public UnicodeSet keySet(T value) { |
| return keySet(value,null); |
| } |
| |
| /** |
| * Returns the keyset consisting of all the keys that would produce (non-null) values. |
| */ |
| public UnicodeSet keySet() { |
| UnicodeSet result = new UnicodeSet(); |
| for (int i = 0; i < length - 1; ++i) { |
| if (values[i] != null) { |
| result.add(transitions[i], transitions[i+1]-1); |
| } |
| } |
| if (stringMap != null) { |
| result.addAll(stringMap.keySet()); |
| } |
| return result; |
| } |
| |
| /** |
| * Returns the list of possible values. Deposits each non-null value into |
| * result. Creates result if it is null. Remember to clear result if |
| * you are not appending to existing collection. |
| * @param result |
| * @return result |
| */ |
| public <U extends Collection<T>> U values(U result) { |
| if (staleAvailableValues) { |
| // collect all the current values |
| // retain them in the availableValues |
| Set<T> temp = new HashSet<T>(); |
| for (int i = 0; i < length - 1; ++i) { |
| if (values[i] != null) temp.add(values[i]); |
| } |
| availableValues.retainAll(temp); |
| if (stringMap != null) { |
| availableValues.addAll(stringMap.values()); |
| } |
| staleAvailableValues = false; |
| } |
| if (result == null) { |
| result = (U) new ArrayList<T>(availableValues.size()); |
| } |
| result.addAll(availableValues); |
| return result; |
| } |
| |
| /** |
| * Convenience method |
| */ |
| public Collection<T> values() { |
| return getAvailableValues(null); |
| } |
| /** |
| * Gets the value associated with a given code point. |
| * Returns null, if there is no such value. |
| * @param codepoint |
| * @return the value |
| */ |
| public T get(int codepoint) { |
| if (codepoint < 0 || codepoint > 0x10FFFF) { |
| throw new IllegalArgumentException("Codepoint out of range: " + codepoint); |
| } |
| return values[_findIndex(codepoint)]; |
| } |
| |
| /** |
| * Gets the value associated with a given code point. |
| * Returns null, if there is no such value. |
| * @param codepoint |
| * @return the value |
| */ |
| public T get(String value) { |
| if (UTF16.hasMoreCodePointsThan(value, 1)) { |
| if (stringMap == null) { |
| return null; |
| } |
| return stringMap.get(value); |
| } |
| return getValue(UTF16.charAt(value, 0)); |
| } |
| |
| |
| /** |
| * Change a new string from the source string according to the mappings. |
| * For each code point cp, if getValue(cp) is null, append the character, otherwise append getValue(cp).toString() |
| * TODO: extend to strings |
| * @param source |
| * @return |
| */ |
| public String transform(String source) { |
| StringBuffer result = new StringBuffer(); |
| int cp; |
| for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { |
| cp = UTF16.charAt(source, i); |
| T mResult = getValue(cp); |
| if (mResult != null) { |
| result.append(mResult); |
| } else { |
| UTF16.append(result, cp); |
| } |
| } |
| return result.toString(); |
| } |
| |
| /** |
| * Used to add complex values, where the value isn't replaced but in some sense composed |
| * @author markdavis |
| */ |
| public abstract static class Composer<T> { |
| /** |
| * This will be called with either a string or a code point. The result is the new value for that item. |
| * If the codepoint is used, the string is null; if the string is used, the codepoint is -1. |
| * @param a |
| * @param b |
| */ |
| public abstract T compose(int codePoint, String string, T a, T b); |
| } |
| |
| public UnicodeMap<T> composeWith(UnicodeMap<T> other, Composer<T> composer) { |
| for (T value : other.getAvailableValues()) { |
| UnicodeSet set = other.keySet(value); |
| composeWith(set, value, composer); |
| } |
| return this; |
| } |
| |
| public UnicodeMap<T> composeWith(UnicodeSet set, T value, Composer<T> composer) { |
| for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { |
| int i = it.codepoint; |
| if (i == UnicodeSetIterator.IS_STRING) { |
| String s = it.string; |
| T v1 = getValue(s); |
| T v3 = composer.compose(-1, s, v1, value); |
| if (v1 != v3 && (v1 == null || !v1.equals(v3))) { |
| put(s, v3); |
| } |
| } else { |
| T v1 = getValue(i); |
| T v3 = composer.compose(i, null, v1, value); |
| if (v1 != v3 && (v1 == null || !v1.equals(v3))) { |
| put(i, v3); |
| } |
| } |
| } |
| return this; |
| } |
| |
| public String toString() { |
| return toString(null); |
| } |
| |
| public String toString(Comparator<T> collected) { |
| StringBuffer result = new StringBuffer(); |
| if (collected == null) { |
| for (int i = 0; i < length-1; ++i) { |
| T value = values[i]; |
| if (value == null) continue; |
| int start = transitions[i]; |
| int end = transitions[i+1]-1; |
| result.append(Utility.hex(start)); |
| if (start != end) result.append("-").append(Utility.hex(end)); |
| result.append("=").append(value.toString()).append("\n"); |
| } |
| if (stringMap != null) { |
| for (String s : stringMap.keySet()) { |
| result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n"); |
| } |
| } |
| } else { |
| Set<T> set = values(new TreeSet<T>(collected)); |
| for (Iterator<T> it = set.iterator(); it.hasNext();) { |
| T value = it.next(); |
| UnicodeSet s = keySet(value); |
| result.append(value).append("=").append(s.toString()).append("\n"); |
| } |
| } |
| return result.toString(); |
| } |
| /** |
| * @return Returns the errorOnReset value. |
| */ |
| public boolean getErrorOnReset() { |
| return errorOnReset; |
| } |
| /** |
| * Puts the UnicodeMap into a state whereby new mappings are accepted, but changes to old mappings cause an exception. |
| * @param errorOnReset The errorOnReset to set. |
| */ |
| public UnicodeMap<T> setErrorOnReset(boolean errorOnReset) { |
| this.errorOnReset = errorOnReset; |
| return this; |
| } |
| |
| /* (non-Javadoc) |
| * @see com.ibm.icu.dev.test.util.Freezable#isFrozen() |
| */ |
| public boolean isFrozen() { |
| // TODO Auto-generated method stub |
| return locked; |
| } |
| |
| /* (non-Javadoc) |
| * @see com.ibm.icu.dev.test.util.Freezable#lock() |
| */ |
| public UnicodeMap<T> freeze() { |
| locked = true; |
| return this; |
| } |
| |
| /** |
| * Utility to find the maximal common prefix of two strings. |
| * TODO: fix supplemental support |
| */ |
| static public int findCommonPrefix(String last, String s) { |
| int minLen = Math.min(last.length(), s.length()); |
| for (int i = 0; i < minLen; ++i) { |
| if (last.charAt(i) != s.charAt(i)) return i; |
| } |
| return minLen; |
| } |
| |
| /** |
| * Get the number of ranges; used for getRangeStart/End. The ranges together cover all of the single-codepoint keys in the UnicodeMap. Other keys can be gotten with getStrings(). |
| */ |
| public int getRangeCount() { |
| return length-1; |
| } |
| |
| /** |
| * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset. |
| */ |
| public int getRangeStart(int range) { |
| return transitions[range]; |
| } |
| |
| /** |
| * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset. |
| */ |
| public int getRangeEnd(int range) { |
| return transitions[range+1] - 1; |
| } |
| |
| /** |
| * Get the value for the range. |
| */ |
| public T getRangeValue(int range) { |
| return values[range]; |
| } |
| |
| /** |
| * Get the strings that are not in the ranges. Returns null if there are none. |
| * @return |
| */ |
| public Set<String> getNonRangeStrings() { |
| if (stringMap == null || stringMap.isEmpty()) { |
| return null; |
| } |
| return Collections.unmodifiableSet(stringMap.keySet()); |
| } |
| |
| static final boolean DEBUG_WRITE = false; |
| |
| /* (non-Javadoc) |
| * @see java.util.Map#containsKey(java.lang.Object) |
| */ |
| public boolean containsKey(String key) { |
| return getValue(key) != null; |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Map#containsKey(java.lang.Object) |
| */ |
| public boolean containsKey(int key) { |
| return getValue(key) != null; |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Map#containsValue(java.lang.Object) |
| */ |
| public boolean containsValue(T value) { |
| // TODO Optimize |
| return getAvailableValues().contains(value); |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Map#isEmpty() |
| */ |
| public boolean isEmpty() { |
| return size() == 0; |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Map#putAll(java.util.Map) |
| */ |
| public UnicodeMap<T> putAll(Map<? extends String, ? extends T> map) { |
| for (String key : map.keySet()) { |
| put(key,map.get(key)); |
| } |
| return this; |
| } |
| |
| /** |
| * Utility for extracting map |
| */ |
| public UnicodeMap<T> putAllIn(Map<? super String, ? super T> map) { |
| for (String key : keySet()) { |
| map.put(key, get(key)); |
| } |
| return this; |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Map#remove(java.lang.Object) |
| */ |
| public UnicodeMap<T> remove(String key) { |
| return put(key, null); |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Map#remove(java.lang.Object) |
| */ |
| public UnicodeMap<T> remove(int key) { |
| return put(key, null); |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Map#size() |
| */ |
| public int size() { |
| int result = stringMap == null ? 0 : stringMap.size(); |
| for (int i = 0; i < length-1; ++i) { |
| T value = values[i]; |
| if (value == null) continue; |
| result += transitions[i+1] - transitions[i]; |
| } |
| return result; |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Map#entrySet() |
| */ |
| public Iterable<Entry<String,T>> entrySet() { |
| return new EntrySetX(); |
| } |
| |
| private class EntrySetX implements Iterable<Entry<String, T>> { |
| public Iterator<Entry<String, T>> iterator() { |
| return new IteratorX(); |
| } |
| public String toString() { |
| StringBuffer b = new StringBuffer(); |
| for (Iterator it = iterator(); it.hasNext();) { |
| Object item = it.next(); |
| b.append(item.toString()).append(' '); |
| } |
| return b.toString(); |
| } |
| } |
| |
| private class IteratorX implements Iterator<Entry<String, T>> { |
| Iterator<String> iterator = keySet().iterator(); |
| |
| /* (non-Javadoc) |
| * @see java.util.Iterator#hasNext() |
| */ |
| public boolean hasNext() { |
| return iterator.hasNext(); |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Iterator#next() |
| */ |
| public Entry<String, T> next() { |
| String key = iterator.next(); |
| return new ImmutableEntry(key, get(key)); |
| } |
| |
| /* (non-Javadoc) |
| * @see java.util.Iterator#remove() |
| */ |
| public void remove() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| } |
| |
| /** |
| * Struct-like class used to iterate over a UnicodeMap in a for loop. |
| * If the value is a string, then codepoint == codepointEnd == -1. Otherwise the string is null; |
| * Caution: The contents may change during the iteration! |
| */ |
| public static class EntryRange<T> { |
| public int codepoint; |
| public int codepointEnd; |
| public String string; |
| public T value; |
| @Override |
| public String toString() { |
| return (string != null ? Utility.hex(string) |
| : Utility.hex(codepoint) + (codepoint == codepointEnd ? "" : ".." + Utility.hex(codepointEnd))) |
| + "=" + value; |
| } |
| } |
| |
| /** |
| * Returns an Iterable over EntryRange, designed for efficient for loops over UnicodeMaps. |
| * Caution: For efficiency, the EntryRange may be reused, so the EntryRange may change on each iteration! |
| * The value is guaranteed never to be null. |
| * @return entry range, for for loops |
| */ |
| public Iterable<EntryRange<T>> entryRanges() { |
| return new EntryRanges(); |
| } |
| |
| private class EntryRanges implements Iterable<EntryRange<T>>, Iterator<EntryRange<T>> { |
| private int pos; |
| private EntryRange<T> result = new EntryRange<T>(); |
| private int lastRealRange = values[length-2] == null ? length - 2 : length - 1; |
| private Iterator<Entry<String, T>> stringIterator = stringMap == null ? null : stringMap.entrySet().iterator(); |
| |
| public Iterator<EntryRange<T>> iterator() { |
| return this; |
| } |
| public boolean hasNext() { |
| return pos < lastRealRange || (stringIterator != null && stringIterator.hasNext()); |
| } |
| public EntryRange<T> next() { |
| // a range may be null, but then the next one must not be (except the final range) |
| if (pos < lastRealRange) { |
| T temp = values[pos]; |
| if (temp == null) { |
| temp = values[++pos]; |
| } |
| result.codepoint = transitions[pos]; |
| result.codepointEnd = transitions[pos+1]-1; |
| result.string = null; |
| result.value = temp; |
| ++pos; |
| } else { |
| Entry<String, T> entry = stringIterator.next(); |
| result.codepoint = result.codepointEnd = -1; |
| result.string = entry.getKey(); |
| result.value = entry.getValue(); |
| } |
| return result; |
| } |
| public void remove() { |
| throw new UnsupportedOperationException(); |
| } |
| } |
| |
| /* (non-Javadoc) |
| * @see java.lang.Iterable#iterator() |
| */ |
| public Iterator<String> iterator() { |
| return keySet().iterator(); |
| } |
| |
| /** |
| * Old form for compatibility |
| */ |
| public T getValue(String key) { |
| return get(key); |
| } |
| |
| /** |
| * Old form for compatibility |
| */ |
| public T getValue(int key) { |
| // TODO Auto-generated method stub |
| return get(key); |
| } |
| |
| /** |
| * Old form for compatibility |
| */ |
| public Collection<T> getAvailableValues() { |
| return values(); |
| } |
| |
| /** |
| * Old form for compatibility |
| */ |
| public <U extends Collection<T>> U getAvailableValues(U result) { |
| return values(result); |
| } |
| |
| /** |
| * Old form for compatibility |
| */ |
| public UnicodeSet getSet(T value) { |
| return keySet(value); |
| } |
| |
| /** |
| * Old form for compatibility |
| */ |
| public UnicodeSet getSet(T value, UnicodeSet result) { |
| return keySet(value, result); |
| } |
| |
| // This is to support compressed serialization. It works; just commented out for now as we shift to Generics |
| // TODO Fix once generics are cleaned up. |
| // // TODO Fix to serialize more than just strings. |
| // // Only if all the items are strings will we do the following compression |
| // // Otherwise we'll just use Java Serialization, bulky as it is |
| // public void writeExternal(ObjectOutput out1) throws IOException { |
| // DataOutputCompressor sc = new DataOutputCompressor(out1); |
| // // if all objects are strings |
| // Collection<T> availableVals = getAvailableValues(); |
| // boolean allStrings = allAreString(availableVals); |
| // sc.writeBoolean(allStrings); |
| // Map object_index = new LinkedHashMap(); |
| // if (allAreString(availableVals)) { |
| // sc.writeStringSet(new TreeSet(availableVals), object_index); |
| // } else { |
| // sc.writeCollection(availableVals, object_index); |
| // } |
| // sc.writeUInt(length); |
| // int lastTransition = -1; |
| // int lastValueNumber = 0; |
| // if (DEBUG_WRITE) System.out.println("Trans count: " + length); |
| // for (int i = 0; i < length; ++i) { |
| // int valueNumber = ((Integer)object_index.get(values[i])).intValue(); |
| // if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + valueNumber); |
| // |
| // int deltaTransition = transitions[i] - lastTransition; |
| // lastTransition = transitions[i]; |
| // int deltaValueNumber = valueNumber - lastValueNumber; |
| // lastValueNumber = valueNumber; |
| // |
| // deltaValueNumber <<= 1; // make room for one bit |
| // boolean canCombine = deltaTransition == 1; |
| // if (canCombine) deltaValueNumber |= 1; |
| // sc.writeInt(deltaValueNumber); |
| // if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + deltaValueNumber); |
| // if (!canCombine) { |
| // sc.writeUInt(deltaTransition); |
| // if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition); |
| // } |
| // } |
| // sc.flush(); |
| // } |
| // |
| // /** |
| // * |
| // */ |
| // private boolean allAreString(Collection<T> availableValues2) { |
| // //if (true) return false; |
| // for (Iterator<T> it = availableValues2.iterator(); it.hasNext();) { |
| // if (!(it.next() instanceof String)) return false; |
| // } |
| // return true; |
| // } |
| // |
| // public void readExternal(ObjectInput in1) throws IOException, ClassNotFoundException { |
| // DataInputCompressor sc = new DataInputCompressor(in1); |
| // boolean allStrings = sc.readBoolean(); |
| // T[] valuesList; |
| // availableValues = new LinkedHashSet(); |
| // if (allStrings) { |
| // valuesList = sc.readStringSet(availableValues); |
| // } else { |
| // valuesList = sc.readCollection(availableValues); |
| // } |
| // length = sc.readUInt(); |
| // transitions = new int[length]; |
| // if (DEBUG_WRITE) System.out.println("Trans count: " + length); |
| // values = (T[]) new Object[length]; |
| // int currentTransition = -1; |
| // int currentValue = 0; |
| // int deltaTransition; |
| // for (int i = 0; i < length; ++i) { |
| // int temp = sc.readInt(); |
| // if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + temp); |
| // boolean combined = (temp & 1) != 0; |
| // temp >>= 1; |
| // values[i] = valuesList[currentValue += temp]; |
| // if (!combined) { |
| // deltaTransition = sc.readUInt(); |
| // if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition); |
| // } else { |
| // deltaTransition = 1; |
| // } |
| // transitions[i] = currentTransition += deltaTransition; // delta value |
| // if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + currentValue); |
| // } |
| // } |
| |
| public final UnicodeMap<T> removeAll(UnicodeSet set) { |
| return putAll(set, null); |
| } |
| |
| public final UnicodeMap<T> removeAll(UnicodeMap<T> reference) { |
| return removeRetainAll(reference, true); |
| } |
| |
| public final UnicodeMap<T> retainAll(UnicodeSet set) { |
| UnicodeSet toNuke = new UnicodeSet(); |
| // TODO Optimize |
| for (EntryRange<T> ae : entryRanges()) { |
| if (ae.string != null) { |
| if (!set.contains(ae.string)) { |
| toNuke.add(ae.string); |
| } |
| } else { |
| for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) { |
| if (!set.contains(i)) { |
| toNuke.add(i); |
| } |
| } |
| } |
| } |
| return putAll(toNuke, null); |
| } |
| |
| public final UnicodeMap<T> retainAll(UnicodeMap<T> reference) { |
| return removeRetainAll(reference, false); |
| } |
| |
| private final UnicodeMap<T> removeRetainAll(UnicodeMap<T> reference, boolean remove) { |
| UnicodeSet toNuke = new UnicodeSet(); |
| // TODO Optimize |
| for (EntryRange<T> ae : entryRanges()) { |
| if (ae.string != null) { |
| if (ae.value.equals(reference.get(ae.string)) == remove) { |
| toNuke.add(ae.string); |
| } |
| } else { |
| for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) { |
| if (ae.value.equals(reference.get(i)) == remove) { |
| toNuke.add(i); |
| } |
| } |
| } |
| } |
| return putAll(toNuke, null); |
| } |
| |
| /** |
| * Returns the keys that consist of multiple code points. |
| * @return |
| */ |
| public Set<String> stringKeys() { |
| return stringMap.keySet(); |
| } |
| } |