blob: ef09edc2b0108f8d96d01171010153269c9b3741 [file] [log] [blame]
/*
********************************************************************************
* Copyright (C) 2010-2014, Google, International Business Machines Corporation *
* and others. All Rights Reserved. *
********************************************************************************
*/
package com.ibm.icu.lang;
/**
* A number of utilities for dealing with CharSequences and related classes.
* For accessing codepoints with a CharSequence, also see
* <ul>
* <li>{@link java.lang.Character#codePointAt(CharSequence, int)}</li>
* <li>{@link java.lang.Character#codePointBefore(CharSequence, int)}</li>
* <li>{@link java.lang.Character#codePointCount(CharSequence, int, int)}</li>
* <li>{@link java.lang.Character#charCount(int)}</li>
* <li>{@link java.lang.Character#offsetByCodePoints(CharSequence, int, int)}</li>
* <li>{@link java.lang.Character#toChars(int, char[], int)}</li>
* <li>{@link java.lang.Character#toCodePoint(char, char)}</li>
* </ul>
* @author markdavis
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public class CharSequences {
// TODO
// compareTo(a, b);
// compareToIgnoreCase(a, b)
// contentEquals(a, b)
// contentEqualsIgnoreCase(a, b)
// contains(a, b) => indexOf >= 0
// endsWith(a, b)
// startsWith(a, b)
// lastIndexOf(a, b, fromIndex)
// indexOf(a, ch, fromIndex)
// lastIndexOf(a, ch, fromIndex);
// s.trim() => UnicodeSet.trim(CharSequence s); return a subsequence starting with the first character not in the set to the last character not in the set.
// add UnicodeSet.split(CharSequence s);
/**
* Find the longest n such that a[aIndex,n] = b[bIndex,n], and n is on a character boundary.
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static int matchAfter(CharSequence a, CharSequence b, int aIndex, int bIndex) {
int i = aIndex, j = bIndex;
int alen = a.length();
int blen = b.length();
for (; i < alen && j < blen; ++i, ++j) {
char ca = a.charAt(i);
char cb = b.charAt(j);
if (ca != cb) {
break;
}
}
// if we failed a match make sure that we didn't match half a character
int result = i - aIndex;
if (result != 0 && !onCharacterBoundary(a, i) && !onCharacterBoundary(b, j)) {
--result; // backup
}
return result;
}
/**
* Count the code point length. Unpaired surrogates count as 1.
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public int codePointLength(CharSequence s) {
return Character.codePointCount(s, 0, s.length());
// int length = s.length();
// int result = length;
// for (int i = 1; i < length; ++i) {
// char ch = s.charAt(i);
// if (0xDC00 <= ch && ch <= 0xDFFF) {
// char ch0 = s.charAt(i-1);
// if (0xD800 <= ch && ch <= 0xDbFF) {
// --result;
// }
// }
// }
}
/**
* Utility function for comparing codepoint to string without generating new
* string.
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static final boolean equals(int codepoint, CharSequence other) {
if (other == null) {
return false;
}
switch (other.length()) {
case 1: return codepoint == other.charAt(0);
case 2: return codepoint > 0xFFFF && codepoint == Character.codePointAt(other, 0);
default: return false;
}
}
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static final boolean equals(CharSequence other, int codepoint) {
return equals(codepoint, other);
}
/**
* Utility to compare a string to a code point.
* Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString())
* and comparing, but much faster (no object creation).
* Actually, there is one difference; a null compares as less.
* Note that this (=String) order is UTF-16 order -- *not* code point order.
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static int compare(CharSequence string, int codePoint) {
if (codePoint < Character.MIN_CODE_POINT || codePoint > Character.MAX_CODE_POINT) {
throw new IllegalArgumentException();
}
int stringLength = string.length();
if (stringLength == 0) {
return -1;
}
char firstChar = string.charAt(0);
int offset = codePoint - Character.MIN_SUPPLEMENTARY_CODE_POINT;
if (offset < 0) { // BMP codePoint
int result = firstChar - codePoint;
if (result != 0) {
return result;
}
return stringLength - 1;
}
// non BMP
char lead = (char)((offset >>> 10) + Character.MIN_HIGH_SURROGATE);
int result = firstChar - lead;
if (result != 0) {
return result;
}
if (stringLength > 1) {
char trail = (char)((offset & 0x3ff) + Character.MIN_LOW_SURROGATE);
result = string.charAt(1) - trail;
if (result != 0) {
return result;
}
}
return stringLength - 2;
}
/**
* Utility to compare a string to a code point.
* Same results as turning the code point into a string and comparing, but much faster (no object creation).
* Actually, there is one difference; a null compares as less.
* Note that this (=String) order is UTF-16 order -- *not* code point order.
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static int compare(int codepoint, CharSequence a) {
return -compare(a, codepoint);
}
/**
* Return the value of the first code point, if the string is exactly one code point. Otherwise return Integer.MAX_VALUE.
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static int getSingleCodePoint(CharSequence s) {
int length = s.length();
if (length < 1 || length > 2) {
return Integer.MAX_VALUE;
}
int result = Character.codePointAt(s, 0);
return (result < 0x10000) == (length == 1) ? result : Integer.MAX_VALUE;
}
/**
* Utility function for comparing objects that may be null
* string.
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static final <T extends Object> boolean equals(T a, T b) {
return a == null ? b == null
: b == null ? false
: a.equals(b);
}
/**
* Utility for comparing the contents of CharSequences
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static int compare(CharSequence a, CharSequence b) {
int alength = a.length();
int blength = b.length();
int min = alength <= blength ? alength : blength;
for (int i = 0; i < min; ++i) {
int diff = a.charAt(i) - b.charAt(i);
if (diff != 0) {
return diff;
}
}
return alength - blength;
}
/**
* Utility for comparing the contents of CharSequences
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static boolean equalsChars(CharSequence a, CharSequence b) {
// do length test first for fast path
return a.length() == b.length() && compare(a,b) == 0;
}
/**
* Are we on a character boundary?
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static boolean onCharacterBoundary(CharSequence s, int i) {
return i <= 0
|| i >= s.length()
|| !Character.isHighSurrogate(s.charAt(i-1))
|| !Character.isLowSurrogate(s.charAt(i));
}
/**
* Find code point in string.
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static int indexOf(CharSequence s, int codePoint) {
int cp;
for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
cp = Character.codePointAt(s, i);
if (cp == codePoint) {
return i;
}
}
return -1;
}
/**
* Utility function for simplified, more robust loops, such as:
* <pre>
* for (int codePoint : CharSequences.codePoints(string)) {
* doSomethingWith(codePoint);
* }
* </pre>
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static int[] codePoints(CharSequence s) {
int[] result = new int[s.length()]; // in the vast majority of cases, the length is the same
int j = 0;
for (int i = 0; i < s.length(); ++i) {
char cp = s.charAt(i);
if (cp >= 0xDC00 && cp <= 0xDFFF && i != 0 ) { // hand-code for speed
char last = (char) result[j-1];
if (last >= 0xD800 && last <= 0xDBFF) {
// Note: j-1 is safe, because j can only be zero if i is zero. But i!=0 in this block.
result[j-1] = Character.toCodePoint(last, cp);
continue;
}
}
result[j++] = cp;
}
if (j == result.length) {
return result;
}
int[] shortResult = new int[j];
System.arraycopy(result, 0, shortResult, 0, j);
return shortResult;
}
private CharSequences() {
}
}