|  | /* | 
|  | ******************************************************************************* | 
|  | * Copyright (C) 1999, International Business Machines Corporation and         * | 
|  | * others. All Rights Reserved.                                                * | 
|  | ******************************************************************************* | 
|  | * | 
|  | * File unistr.cpp | 
|  | * | 
|  | * Modification History: | 
|  | * | 
|  | *   Date        Name        Description | 
|  | *   09/25/98    stephen     Creation. | 
|  | *   04/20/99    stephen     Overhauled per 4/16 code review. | 
|  | *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX | 
|  | *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from | 
|  | *                           Replaceable. | 
|  | ******************************************************************************* | 
|  | */ | 
|  |  | 
|  | #include "unicode/utypes.h" | 
|  | #include "unicode/putil.h" | 
|  | #include "unicode/locid.h" | 
|  | #include "cstring.h" | 
|  | #include "cmemory.h" | 
|  | #include "unicode/ustring.h" | 
|  | #include "mutex.h" | 
|  | #include "unicode/unistr.h" | 
|  | #include "uhash.h" | 
|  |  | 
|  | #if U_IOSTREAM_SOURCE >= 199711 | 
|  | #include <iostream> | 
|  | using namespace std; | 
|  | #elif U_IOSTREAM_SOURCE >= 198506 | 
|  | #include <iostream.h> | 
|  | #endif | 
|  |  | 
|  | #if 0 | 
|  | //DEBUGGING | 
|  | void | 
|  | print(const UnicodeString& s, | 
|  | const char *name) | 
|  | { | 
|  | UChar c; | 
|  | cout << name << ":|"; | 
|  | for(int i = 0; i < s.length(); ++i) { | 
|  | c = s[i]; | 
|  | if(c>= 0x007E || c < 0x0020) | 
|  | cout << "[0x" << hex << s[i] << "]"; | 
|  | else | 
|  | cout << (char) s[i]; | 
|  | } | 
|  | cout << '|' << endl; | 
|  | } | 
|  |  | 
|  | void | 
|  | print(const UChar *s, | 
|  | int32_t len, | 
|  | const char *name) | 
|  | { | 
|  | UChar c; | 
|  | cout << name << ":|"; | 
|  | for(int i = 0; i < len; ++i) { | 
|  | c = s[i]; | 
|  | if(c>= 0x007E || c < 0x0020) | 
|  | cout << "[0x" << hex << s[i] << "]"; | 
|  | else | 
|  | cout << (char) s[i]; | 
|  | } | 
|  | cout << '|' << endl; | 
|  | } | 
|  | // END DEBUGGING | 
|  | #endif | 
|  |  | 
|  | // Local function definitions for now | 
|  |  | 
|  | // need to copy areas that may overlap | 
|  | inline void | 
|  | us_arrayCopy(const UChar *src, int32_t srcStart, | 
|  | UChar *dst, int32_t dstStart, int32_t count) | 
|  | { | 
|  | if(count>0) { | 
|  | uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); | 
|  | } | 
|  | } | 
|  |  | 
|  | UConverter* UnicodeString::fgDefaultConverter  = 0; | 
|  |  | 
|  | //======================================== | 
|  | // Constructors | 
|  | //======================================== | 
|  | UnicodeString::UnicodeString() | 
|  | : fArray(fStackBuffer), | 
|  | fLength(0), | 
|  | fCapacity(US_STACKBUF_SIZE), | 
|  | fFlags(kShortString) | 
|  | {} | 
|  |  | 
|  | UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) | 
|  | : fArray(0), | 
|  | fLength(0), | 
|  | fCapacity(US_STACKBUF_SIZE), | 
|  | fFlags(0) | 
|  | { | 
|  | if(count <= 0) { | 
|  | // just allocate and do not do anything else | 
|  | allocate(capacity); | 
|  | } else { | 
|  | // count > 0, allocate and fill the new string with count c's | 
|  | int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount; | 
|  | if(capacity < length) { | 
|  | capacity = length; | 
|  | } | 
|  | if(allocate(capacity)) { | 
|  | int32_t i = 0; | 
|  |  | 
|  | // fill the new string with c | 
|  | if(unitCount == 1) { | 
|  | // fill with length UChars | 
|  | while(i < length) { | 
|  | fArray[i++] = (UChar)c; | 
|  | } | 
|  | } else { | 
|  | // get the code units for c | 
|  | UChar units[UTF_MAX_CHAR_LENGTH]; | 
|  | UTF_APPEND_CHAR_UNSAFE(units, i, c); | 
|  |  | 
|  | // now it must be i==unitCount | 
|  | i = 0; | 
|  |  | 
|  | // for Unicode, unitCount can only be 1, 2, 3, or 4 | 
|  | // 1 is handled above | 
|  | switch(unitCount) { | 
|  | case 2: | 
|  | while(i < length) { | 
|  | fArray[i++]=units[0]; | 
|  | fArray[i++]=units[1]; | 
|  | } | 
|  | break; | 
|  | case 3: | 
|  | while(i < length) { | 
|  | fArray[i++]=units[0]; | 
|  | fArray[i++]=units[1]; | 
|  | fArray[i++]=units[2]; | 
|  | } | 
|  | break; | 
|  | case 4: | 
|  | while(i < length) { | 
|  | fArray[i++]=units[0]; | 
|  | fArray[i++]=units[1]; | 
|  | fArray[i++]=units[2]; | 
|  | fArray[i++]=units[3]; | 
|  | } | 
|  | break; | 
|  | default: | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | fLength = length; | 
|  | } | 
|  | } | 
|  |  | 
|  | UnicodeString::UnicodeString(UChar ch) | 
|  | : fArray(fStackBuffer), | 
|  | fLength(1), | 
|  | fCapacity(US_STACKBUF_SIZE), | 
|  | fFlags(kShortString) | 
|  | { | 
|  | fStackBuffer[0] = ch; | 
|  | } | 
|  |  | 
|  | UnicodeString::UnicodeString(UChar32 ch) | 
|  | : fArray(fStackBuffer), | 
|  | fLength(1), | 
|  | fCapacity(US_STACKBUF_SIZE), | 
|  | fFlags(kShortString) | 
|  | { | 
|  | UTextOffset i = 0; | 
|  | UTF_APPEND_CHAR(fStackBuffer, i, US_STACKBUF_SIZE, ch); | 
|  | fLength = i; | 
|  | } | 
|  |  | 
|  | UnicodeString::UnicodeString(const UChar *text) | 
|  | : fArray(fStackBuffer), | 
|  | fLength(0), | 
|  | fCapacity(US_STACKBUF_SIZE), | 
|  | fFlags(kShortString) | 
|  | { | 
|  | doReplace(0, 0, text, 0, u_strlen(text)); | 
|  | } | 
|  |  | 
|  | UnicodeString::UnicodeString(const UChar *text, | 
|  | int32_t textLength) | 
|  | : fArray(fStackBuffer), | 
|  | fLength(0), | 
|  | fCapacity(US_STACKBUF_SIZE), | 
|  | fFlags(kShortString) | 
|  | { | 
|  | doReplace(0, 0, text, 0, textLength); | 
|  | } | 
|  |  | 
|  | UnicodeString::UnicodeString(UBool isTerminated, | 
|  | const UChar *text, | 
|  | int32_t textLength) | 
|  | : fArray((UChar *)text), | 
|  | fLength(textLength), | 
|  | fCapacity(isTerminated ? textLength + 1 : textLength), | 
|  | fFlags(kReadonlyAlias) | 
|  | { | 
|  | if(text == 0 || textLength < -1 || textLength == -1 && !isTerminated) { | 
|  | setToBogus(); | 
|  | } else if(textLength == -1) { | 
|  | // text is terminated, or else it would have failed the above test | 
|  | fLength = u_strlen(text); | 
|  | fCapacity = fLength + 1; | 
|  | } | 
|  | } | 
|  |  | 
|  | UnicodeString::UnicodeString(UChar *buff, | 
|  | int32_t bufLength, | 
|  | int32_t buffCapacity) | 
|  | : fArray(buff), | 
|  | fLength(bufLength), | 
|  | fCapacity(buffCapacity), | 
|  | fFlags(kWriteableAlias) | 
|  | { | 
|  | if(buff == 0 || bufLength < 0 || bufLength > buffCapacity) { | 
|  | setToBogus(); | 
|  | } | 
|  | } | 
|  |  | 
|  | UnicodeString::UnicodeString(const char *codepageData, | 
|  | const char *codepage) | 
|  | : fArray(fStackBuffer), | 
|  | fLength(0), | 
|  | fCapacity(US_STACKBUF_SIZE), | 
|  | fFlags(kShortString) | 
|  | { | 
|  | if(codepageData != 0) { | 
|  | doCodepageCreate(codepageData, uprv_strlen(codepageData), codepage); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | UnicodeString::UnicodeString(const char *codepageData, | 
|  | int32_t dataLength, | 
|  | const char *codepage) | 
|  | : fArray(fStackBuffer), | 
|  | fLength(0), | 
|  | fCapacity(US_STACKBUF_SIZE), | 
|  | fFlags(kShortString) | 
|  | { | 
|  | if(codepageData != 0) { | 
|  | doCodepageCreate(codepageData, dataLength, codepage); | 
|  | } | 
|  | } | 
|  |  | 
|  | UnicodeString::UnicodeString(const UnicodeString& that) | 
|  | : Replaceable(), | 
|  | fArray(fStackBuffer), | 
|  | fLength(0), | 
|  | fCapacity(US_STACKBUF_SIZE), | 
|  | fFlags(kShortString) | 
|  | { | 
|  | *this = that; | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // array allocation | 
|  | //======================================== | 
|  |  | 
|  | UBool | 
|  | UnicodeString::allocate(int32_t capacity) { | 
|  | if(capacity <= US_STACKBUF_SIZE) { | 
|  | fArray = fStackBuffer; | 
|  | fCapacity = US_STACKBUF_SIZE; | 
|  | fFlags = kShortString; | 
|  | } else { | 
|  | // count bytes for the refCounter and the string capacity, and | 
|  | // round up to a multiple of 16; then divide by 4 and allocate int32_t's | 
|  | // to be safely aligned for the refCount | 
|  | int32_t words = ((sizeof(int32_t) + capacity * U_SIZEOF_UCHAR + 15) & ~15) >> 2; | 
|  | int32_t *array = new int32_t[words]; | 
|  | if(array != 0) { | 
|  | // set initial refCount and point behind the refCount | 
|  | *array++ = 1; | 
|  |  | 
|  | // have fArray point to the first UChar | 
|  | fArray = (UChar *)array; | 
|  | fCapacity = (words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR); | 
|  | fFlags = kLongString; | 
|  | } else { | 
|  | fLength = 0; | 
|  | fCapacity = 0; | 
|  | fFlags = kIsBogus; | 
|  | return FALSE; | 
|  | } | 
|  | } | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // Destructor | 
|  | //======================================== | 
|  | UnicodeString::~UnicodeString() | 
|  | { | 
|  | releaseArray(); | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // Assignment | 
|  | //======================================== | 
|  | UnicodeString& | 
|  | UnicodeString::operator= (const UnicodeString& src) | 
|  | { | 
|  | // if assigning to ourselves, do nothing | 
|  | if(this == 0 || this == &src) { | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | // is the right side bogus? | 
|  | if(&src == 0 || src.isBogus()) { | 
|  | setToBogus(); | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | // delete the current contents | 
|  | releaseArray(); | 
|  |  | 
|  | // we always copy the length and the hash code | 
|  | fLength = src.fLength; | 
|  |  | 
|  | switch(src.fFlags) { | 
|  | case kShortString: | 
|  | // short string using the stack buffer, do the same | 
|  | fArray = fStackBuffer; | 
|  | fCapacity = US_STACKBUF_SIZE; | 
|  | fFlags = kShortString; | 
|  | if(fLength > 0) { | 
|  | uprv_memcpy(fStackBuffer, src.fArray, fLength * U_SIZEOF_UCHAR); | 
|  | } | 
|  | break; | 
|  | case kLongString: | 
|  | // src uses a refCounted string buffer, use that buffer with refCount | 
|  | // src is const, use a cast - we don't really change it | 
|  | ((UnicodeString &)src).addRef(); | 
|  | // fall through to readonly alias copying: copy all fields | 
|  | case kReadonlyAlias: | 
|  | // src is a readonly alias, do the same | 
|  | fArray = src.fArray; | 
|  | fCapacity = src.fCapacity; | 
|  | fFlags = src.fFlags; | 
|  | break; | 
|  | case kWriteableAlias: | 
|  | // src is a writeable alias; we make a copy of that instead | 
|  | if(allocate(fLength)) { | 
|  | if(fLength > 0) { | 
|  | uprv_memcpy(fArray, src.fArray, fLength * U_SIZEOF_UCHAR); | 
|  | } | 
|  | break; | 
|  | } | 
|  | // if there is not enough memory, then fall through to setting to bogus | 
|  | default: | 
|  | // if src is bogus, set ourselves to bogus | 
|  | // do not call setToBogus() here because fArray and fFlags are not consistent here | 
|  | fArray = 0; | 
|  | fLength = 0; | 
|  | fCapacity = 0; | 
|  | fFlags = kIsBogus; | 
|  | break; | 
|  | } | 
|  |  | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // Miscellaneous operations | 
|  | //======================================== | 
|  | int32_t | 
|  | UnicodeString::numDisplayCells( UTextOffset start, | 
|  | int32_t length, | 
|  | UBool asian) const | 
|  | { | 
|  | // pin indices to legal values | 
|  | pinIndices(start, length); | 
|  |  | 
|  | UChar32 c; | 
|  | int32_t result = 0; | 
|  | UTextOffset limit = start + length; | 
|  |  | 
|  | while(start < limit) { | 
|  | UTF_NEXT_CHAR(fArray, start, limit, c); | 
|  | switch(Unicode::getCellWidth(c)) { | 
|  | case Unicode::ZERO_WIDTH: | 
|  | break; | 
|  |  | 
|  | case Unicode::HALF_WIDTH: | 
|  | result += 1; | 
|  | break; | 
|  |  | 
|  | case Unicode::FULL_WIDTH: | 
|  | result += 2; | 
|  | break; | 
|  |  | 
|  | case Unicode::NEUTRAL: | 
|  | result += (asian ? 2 : 1); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | UCharReference | 
|  | UnicodeString::operator[] (UTextOffset pos) | 
|  | { | 
|  | return UCharReference(this, pos); | 
|  | } | 
|  |  | 
|  | UnicodeString UnicodeString::unescape() const { | 
|  | UnicodeString result; | 
|  | for (int32_t i=0; i<length(); ) { | 
|  | UChar32 c = charAt(i++); | 
|  | if (c == 0x005C /*'\\'*/) { | 
|  | c = unescapeAt(i); // advances i | 
|  | if (c == (UChar32)0xFFFFFFFF) { | 
|  | result.remove(); // return empty string | 
|  | break; // invalid escape sequence | 
|  | } | 
|  | } | 
|  | result.append(c); | 
|  | } | 
|  | return result; | 
|  | } | 
|  |  | 
|  | // u_unescapeAt() callback to get a UChar from a UnicodeString | 
|  | U_CFUNC UChar _charAt(int32_t offset, void *context) { | 
|  | return ((UnicodeString*) context)->charAt(offset); | 
|  | } | 
|  |  | 
|  | UChar32 UnicodeString::unescapeAt(int32_t &offset) const { | 
|  | return u_unescapeAt(_charAt, &offset, length(), (void*)this); | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // Read-only implementation | 
|  | //======================================== | 
|  | int8_t | 
|  | UnicodeString::doCompare( UTextOffset start, | 
|  | int32_t length, | 
|  | const UChar *srcChars, | 
|  | UTextOffset srcStart, | 
|  | int32_t srcLength) const | 
|  | { | 
|  | // compare illegal string values | 
|  | if(isBogus()) { | 
|  | if(srcChars==0) { | 
|  | return 0; | 
|  | } else { | 
|  | return -1; | 
|  | } | 
|  | } else if(srcChars==0) { | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | // pin indices to legal values | 
|  | pinIndices(start, length); | 
|  |  | 
|  | // get the correct pointer | 
|  | const UChar *chars = getArrayStart(); | 
|  |  | 
|  | // are we comparing the same buffer contents? | 
|  | chars += start; | 
|  | srcChars += srcStart; | 
|  | if(chars == srcChars) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | UTextOffset minLength; | 
|  | int8_t lengthResult; | 
|  |  | 
|  | // are we comparing different lengths? | 
|  | if(length != srcLength) { | 
|  | if(length < srcLength) { | 
|  | minLength = length; | 
|  | lengthResult = -1; | 
|  | } else { | 
|  | minLength = srcLength; | 
|  | lengthResult = 1; | 
|  | } | 
|  | } else { | 
|  | minLength = length; | 
|  | lengthResult = 0; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * note that uprv_memcmp() returns an int but we return an int8_t; | 
|  | * we need to take care not to truncate the result - | 
|  | * one way to do this is to right-shift the value to | 
|  | * move the sign bit into the lower 8 bits and making sure that this | 
|  | * does not become 0 itself | 
|  | */ | 
|  |  | 
|  | if(minLength > 0) { | 
|  | int32_t result; | 
|  |  | 
|  | #   if U_IS_BIG_ENDIAN | 
|  | // big-endian: byte comparison works | 
|  | result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); | 
|  | if(result != 0) { | 
|  | return (int8_t)(result >> 15 | 1); | 
|  | } | 
|  | #   else | 
|  | // little-endian: compare UChar units | 
|  | do { | 
|  | result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); | 
|  | if(result != 0) { | 
|  | return (int8_t)(result >> 15 | 1); | 
|  | } | 
|  | } while(--minLength > 0); | 
|  | #   endif | 
|  | } | 
|  | return lengthResult; | 
|  | } | 
|  |  | 
|  | void | 
|  | UnicodeString::doExtract(UTextOffset start, | 
|  | int32_t length, | 
|  | UChar *dst, | 
|  | UTextOffset dstStart) const | 
|  | { | 
|  | // do not copy anything if we alias dst itself | 
|  | if(fArray + start != dst + dstStart) { | 
|  | // pin indices to legal values | 
|  | pinIndices(start, length); | 
|  | us_arrayCopy(getArrayStart(), start, dst, dstStart, length); | 
|  | } | 
|  | } | 
|  |  | 
|  | UTextOffset | 
|  | UnicodeString::indexOf(const UChar *srcChars, | 
|  | UTextOffset srcStart, | 
|  | int32_t srcLength, | 
|  | UTextOffset start, | 
|  | int32_t length) const | 
|  | { | 
|  | if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength <= 0) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | // now we will only work with srcLength-1 | 
|  | --srcLength; | 
|  |  | 
|  | // get the indices within bounds | 
|  | pinIndices(start, length); | 
|  |  | 
|  | // set length for the last possible match start position | 
|  | // note the --srcLength above | 
|  | length -= srcLength; | 
|  |  | 
|  | if(length <= 0) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | const UChar *array = getArrayStart(); | 
|  | UTextOffset limit = start + length; | 
|  |  | 
|  | // search for the first char, then compare the rest of the string | 
|  | // increment srcStart here for that, matching the --srcLength above | 
|  | UChar ch = srcChars[srcStart++]; | 
|  |  | 
|  | do { | 
|  | if(array[start] == ch && (srcLength == 0 || compare(start + 1, srcLength, srcChars, srcStart, srcLength) == 0)) { | 
|  | return start; | 
|  | } | 
|  | } while(++start < limit); | 
|  |  | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | UTextOffset | 
|  | UnicodeString::doIndexOf(UChar c, | 
|  | UTextOffset start, | 
|  | int32_t length) const | 
|  | { | 
|  | // pin indices | 
|  | pinIndices(start, length); | 
|  | if(length == 0) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | // find the first occurrence of c | 
|  | const UChar *begin = getArrayStart() + start; | 
|  | const UChar *limit = begin + length; | 
|  |  | 
|  | do { | 
|  | if(*begin == c) { | 
|  | return begin - getArrayStart(); | 
|  | } | 
|  | } while(++begin < limit); | 
|  |  | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | UTextOffset | 
|  | UnicodeString::lastIndexOf(const UChar *srcChars, | 
|  | UTextOffset srcStart, | 
|  | int32_t srcLength, | 
|  | UTextOffset start, | 
|  | int32_t length) const | 
|  | { | 
|  | if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength <= 0) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | // now we will only work with srcLength-1 | 
|  | --srcLength; | 
|  |  | 
|  | // get the indices within bounds | 
|  | pinIndices(start, length); | 
|  |  | 
|  | // set length for the last possible match start position | 
|  | // note the --srcLength above | 
|  | length -= srcLength; | 
|  |  | 
|  | if(length <= 0) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | const UChar *array = getArrayStart(); | 
|  | UTextOffset pos; | 
|  |  | 
|  | // search for the first char, then compare the rest of the string | 
|  | // increment srcStart here for that, matching the --srcLength above | 
|  | UChar ch = srcChars[srcStart++]; | 
|  |  | 
|  | pos = start + length; | 
|  | do { | 
|  | if(array[--pos] == ch && (srcLength == 0 || compare(pos + 1, srcLength, srcChars, srcStart, srcLength) == 0)) { | 
|  | return pos; | 
|  | } | 
|  | } while(pos > start); | 
|  |  | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | UTextOffset | 
|  | UnicodeString::doLastIndexOf(UChar c, | 
|  | UTextOffset start, | 
|  | int32_t length) const | 
|  | { | 
|  | if(isBogus()) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | // pin indices | 
|  | pinIndices(start, length); | 
|  | if(length == 0) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | const UChar *begin = getArrayStart() + start; | 
|  | const UChar *limit = begin + length; | 
|  |  | 
|  | do { | 
|  | if(*--limit == c) { | 
|  | return limit - getArrayStart(); | 
|  | } | 
|  | } while(limit > begin); | 
|  |  | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::findAndReplace(UTextOffset start, | 
|  | int32_t length, | 
|  | const UnicodeString& oldText, | 
|  | UTextOffset oldStart, | 
|  | int32_t oldLength, | 
|  | const UnicodeString& newText, | 
|  | UTextOffset newStart, | 
|  | int32_t newLength) | 
|  | { | 
|  | if(isBogus() || oldText.isBogus() || newText.isBogus()) { | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | pinIndices(start, length); | 
|  | oldText.pinIndices(oldStart, oldLength); | 
|  | newText.pinIndices(newStart, newLength); | 
|  |  | 
|  | if(oldLength == 0) { | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | while(length > 0 && length >= oldLength) { | 
|  | UTextOffset pos = indexOf(oldText, oldStart, oldLength, start, length); | 
|  | if(pos < 0) { | 
|  | // no more oldText's here: done | 
|  | break; | 
|  | } else { | 
|  | // we found oldText, replace it by newText and go beyond it | 
|  | replace(pos, oldLength, newText, newStart, newLength); | 
|  | length -= pos + oldLength - start; | 
|  | start = pos + newLength; | 
|  | } | 
|  | } | 
|  |  | 
|  | return *this; | 
|  | } | 
|  |  | 
|  |  | 
|  | //======================================== | 
|  | // Write implementation | 
|  | //======================================== | 
|  |  | 
|  | void | 
|  | UnicodeString::setToBogus() | 
|  | { | 
|  | releaseArray(); | 
|  |  | 
|  | fArray = 0; | 
|  | fCapacity = fLength = 0; | 
|  | fFlags = kIsBogus; | 
|  | } | 
|  |  | 
|  | // setTo() analogous to the readonly-aliasing constructor with the same signature | 
|  | UnicodeString & | 
|  | UnicodeString::setTo(UBool isTerminated, | 
|  | const UChar *text, | 
|  | int32_t textLength) | 
|  | { | 
|  | if(text == 0 || textLength < -1 || textLength == -1 && !isTerminated) { | 
|  | setToBogus(); | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | releaseArray(); | 
|  |  | 
|  | fArray = (UChar *)text; | 
|  | if(textLength != -1) { | 
|  | fLength = textLength; | 
|  | } else { | 
|  | // text is terminated, or else it would have failed the above test | 
|  | fLength = u_strlen(text); | 
|  | fCapacity = fLength + 1; | 
|  | } | 
|  |  | 
|  | fCapacity = isTerminated ? fLength + 1 : fLength; | 
|  | fFlags = kReadonlyAlias; | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | // setTo() analogous to the writeable-aliasing constructor with the same signature | 
|  | UnicodeString & | 
|  | UnicodeString::setTo(UChar *buffer, | 
|  | int32_t buffLength, | 
|  | int32_t buffCapacity) { | 
|  | if(buffer == 0 || buffLength < 0 || buffLength > buffCapacity) { | 
|  | setToBogus(); | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | releaseArray(); | 
|  |  | 
|  | fArray = buffer; | 
|  | fLength = buffLength; | 
|  | fCapacity = buffCapacity; | 
|  | fFlags = kWriteableAlias; | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::setCharAt(UTextOffset offset, | 
|  | UChar c) | 
|  | { | 
|  | if(cloneArrayIfNeeded()) { | 
|  | if(offset < 0) { | 
|  | offset = 0; | 
|  | } else if(offset >= fLength) { | 
|  | offset = fLength - 1; | 
|  | } | 
|  |  | 
|  | fArray[offset] = c; | 
|  | } | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::toUpper() | 
|  | { return toUpper(Locale::getDefault()); } | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::toLower() | 
|  | { return toLower(Locale::getDefault()); } | 
|  |  | 
|  | /* | 
|  | * The following toUpper() and toLower() implementations are designed | 
|  | * for UTF-16 and UTF-32, not for UTF-8. | 
|  | * In UTF-16 and UTF-32, the number of code units per code point is fixed, | 
|  | * and a case mapping is assumed to always stay within the same plane | 
|  | * (64k code range) with the original code point. This allows to write | 
|  | * the mapping into the same space as the source character without | 
|  | * expansions or contractions except in the special cases. | 
|  | * | 
|  | * For UTF-8, where a source code point may take up a variable number | 
|  | * of code units, it is more efficient to get the mapping and write | 
|  | * the result only if it is a different code point from the original. | 
|  | * Also, a sharp s and the "SS" string typically both take up 2 bytes in UTF-8, | 
|  | * while the turkish i's typically result in expansions and contractions. | 
|  | * Therefore, for UTF-8, these functions should be reimplemented. | 
|  | * One single implementation for all UTF's would be either clumsy | 
|  | * or inefficient. | 
|  | */ | 
|  | #if UTF_SIZE==8 | 
|  | # error reimplement toUpper() and toLower() for UTF-8, see comment above | 
|  | #endif | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::toUpper(const Locale& locale) | 
|  | { | 
|  | if(!cloneArrayIfNeeded()) { | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | const char *langChars = locale.getLanguage(); | 
|  |  | 
|  | UTextOffset start = 0, next = 0; | 
|  | UTextOffset limit = fLength; | 
|  | UChar32 c; | 
|  |  | 
|  | // The German sharp S character (U+00DF)'s uppercase equivalent is | 
|  | // "SS", making it the only character that expands to two characters | 
|  | // when its case is changed (we don't automatically convert "SS" to | 
|  | // U+00DF going to lowercase because it can only be determined from | 
|  | // knowing the language whether a particular "SS" should map to | 
|  | // U+00DF or "ss").  So we make a preliminary pass through the | 
|  | // string looking for sharp S characters and then go back and make | 
|  | // room for the extra capital Ses if we find any.  [For performance, | 
|  | // we only do this extra work if the language is actually German] | 
|  | if(uprv_strcmp(langChars, "de") == 0) { | 
|  | static UChar SS [] = { 0x0053, 0x0053 }; | 
|  | while(start < limit) { | 
|  | // start == next here by design | 
|  | UTF_NEXT_CHAR(fArray, next, limit, c); | 
|  |  | 
|  | // A sharp s needs to be replaced with two capital S's. | 
|  | if(c == 0x00DF) { | 
|  | doReplace(start, 1, SS, 0, 2); | 
|  | start += 2; | 
|  | ++next; // the string expanded by one | 
|  | ++limit; | 
|  | } else { | 
|  | // Otherwise, the case conversion can be handled by the Unicode code point. | 
|  | c = Unicode::toUpperCase(c); | 
|  | UTF_APPEND_CHAR(fArray, start, limit, c); | 
|  | } | 
|  | } | 
|  | } else if(uprv_strcmp(langChars, "tr") == 0) { | 
|  | // If the specfied language is Turkish, then we have to special-case | 
|  | // for the Turkish dotted and dotless Is.  The regular lowercase i | 
|  | // maps to the capital I with a dot (U+0130), and the lowercase i | 
|  | // without the dot (U+0131) maps to the regular capital I | 
|  | while(start < limit) { | 
|  | // start == next here by design | 
|  | UTF_NEXT_CHAR(fArray, next, limit, c); | 
|  | if(c == 0x0069/*'i'*/) { | 
|  | fArray[start++] = 0x0130; | 
|  | } else if(c == 0x0131) { | 
|  | fArray[start++] = 0x0049/*'I'*/; | 
|  | } else { | 
|  | c = Unicode::toUpperCase(c); | 
|  | UTF_APPEND_CHAR(fArray, start, limit, c); | 
|  | } | 
|  | } | 
|  | } else { | 
|  | while(start < limit) { | 
|  | // start == next here by design | 
|  | UTF_NEXT_CHAR(fArray, next, limit, c); | 
|  | c = Unicode::toUpperCase(c); | 
|  | UTF_APPEND_CHAR(fArray, start, limit, c); | 
|  | } | 
|  | } | 
|  |  | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::toLower(const Locale& locale) | 
|  | { | 
|  | if(!cloneArrayIfNeeded()) { | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | const char *langChars = locale.getLanguage(); | 
|  |  | 
|  | UTextOffset start = 0, next = 0; | 
|  | UTextOffset limit = fLength; | 
|  | UChar32 c; | 
|  |  | 
|  | // if the specfied language is Turkish, then we have to special-case | 
|  | // for the Turkish dotted and dotless Is.  The capital I with a dot | 
|  | // (U+0130) maps to the regular lowercase i, and the regular capital | 
|  | // I maps to the lowercase i without the dot (U+0131) | 
|  | if(uprv_strcmp(langChars, "tr") == 0) { | 
|  | while(start < limit) { | 
|  | // start == next here by design | 
|  | UTF_NEXT_CHAR(fArray, next, limit, c); | 
|  | if(c == 0x0049) { // 'I' | 
|  | fArray[start++] = 0x0131; | 
|  | } else if(c == 0x0130) { | 
|  | fArray[start++] = 0x0069; // 'i' | 
|  | } else { | 
|  | c = Unicode::toLowerCase(c); | 
|  | UTF_APPEND_CHAR(fArray, start, limit, c); | 
|  | } | 
|  | } | 
|  | } else if(uprv_strcmp(langChars, "el") == 0) { | 
|  | // if the specfied language is Greek, then we have to special-case | 
|  | // for the capital letter sigma (U+3A3), which has two lower-case | 
|  | // forms.  If the character following the capital sigma is a letter, | 
|  | // we use the medial form (U+3C3); otherwise, we use the final form | 
|  | // (U+3C2). | 
|  | while(start < limit) { | 
|  | // start == next here by design | 
|  | UTF_NEXT_CHAR(fArray, next, limit, c); | 
|  | if(c == 0x3a3) { | 
|  | if(next < limit) { | 
|  | UTextOffset next2 = next; | 
|  | UChar32 c2; | 
|  | UTF_NEXT_CHAR(fArray, next2, limit, c2); | 
|  | if(Unicode::isLetter(c2)) { | 
|  | fArray[start++] = 0x3C3; | 
|  | } else { | 
|  | fArray[start++] = 0x3C2; | 
|  | } | 
|  | } else { | 
|  | fArray[start++] = 0x3C2; | 
|  | } | 
|  | } else { | 
|  | c = Unicode::toLowerCase(c); | 
|  | UTF_APPEND_CHAR(fArray, start, limit, c); | 
|  | } | 
|  | } | 
|  | } else { | 
|  | // if the specified language is anything other than Turkish or | 
|  | // Greek, we rely on the Unicode class to do all our case mapping-- | 
|  | // there are no other special cases | 
|  | while(start < limit) { | 
|  | // start == next here by design | 
|  | UTF_NEXT_CHAR(fArray, next, limit, c); | 
|  | c = Unicode::toLowerCase(c); | 
|  | UTF_APPEND_CHAR(fArray, start, limit, c); | 
|  | } | 
|  | } | 
|  |  | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::doReplace( UTextOffset start, | 
|  | int32_t length, | 
|  | const UnicodeString& src, | 
|  | UTextOffset srcStart, | 
|  | int32_t srcLength) | 
|  | { | 
|  | if(!src.isBogus()) { | 
|  | // pin the indices to legal values | 
|  | src.pinIndices(srcStart, srcLength); | 
|  |  | 
|  | // get the characters from src | 
|  | // and replace the range in ourselves with them | 
|  | return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); | 
|  | } else { | 
|  | // remove the range | 
|  | return doReplace(start, length, 0, 0, 0); | 
|  | } | 
|  | } | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::doReplace(UTextOffset start, | 
|  | int32_t length, | 
|  | const UChar *srcChars, | 
|  | UTextOffset srcStart, | 
|  | int32_t srcLength) | 
|  | { | 
|  | // if we're bogus, set us to empty first | 
|  | if(isBogus()) { | 
|  | fArray = fStackBuffer; | 
|  | fLength = 0; | 
|  | fCapacity = US_STACKBUF_SIZE; | 
|  | fFlags = kShortString; | 
|  | } | 
|  |  | 
|  | if(srcChars == 0) { | 
|  | srcStart = srcLength = 0; | 
|  | } | 
|  |  | 
|  | int32_t *bufferToDelete = 0; | 
|  |  | 
|  | // the following may change fArray but will not copy the current contents; | 
|  | // therefore we need to keep the current fArray | 
|  | UChar *oldArray = fArray; | 
|  | int32_t oldLength = fLength; | 
|  |  | 
|  | // pin the indices to legal values | 
|  | pinIndices(start, length); | 
|  |  | 
|  | // calculate the size of the string after the replace | 
|  | int32_t newSize = oldLength - length + srcLength; | 
|  |  | 
|  | // clone our array and allocate a bigger array if needed | 
|  | if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize, | 
|  | FALSE, &bufferToDelete) | 
|  | ) { | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | // now do the replace | 
|  |  | 
|  | if(fArray != oldArray) { | 
|  | // if fArray changed, then we need to copy everything except what will change | 
|  | us_arrayCopy(oldArray, 0, fArray, 0, start); | 
|  | us_arrayCopy(oldArray, start + length, | 
|  | fArray, start + srcLength, | 
|  | oldLength - (start + length)); | 
|  | } else if(length != srcLength) { | 
|  | // fArray did not change; copy only the portion that isn't changing, leaving a hole | 
|  | us_arrayCopy(oldArray, start + length, | 
|  | fArray, start + srcLength, | 
|  | oldLength - (start + length)); | 
|  | } | 
|  |  | 
|  | // now fill in the hole with the new string | 
|  | us_arrayCopy(srcChars, srcStart, getArrayStart(), start, srcLength); | 
|  |  | 
|  | fLength = newSize; | 
|  |  | 
|  | // delayed delete in case srcChars == fArray when we started, and | 
|  | // to keep oldArray alive for the above operations | 
|  | delete [] bufferToDelete; | 
|  |  | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Replaceable API | 
|  | */ | 
|  | void | 
|  | UnicodeString::handleReplaceBetween(UTextOffset start, | 
|  | UTextOffset limit, | 
|  | const UnicodeString& text) { | 
|  | replaceBetween(start, limit, text); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Replaceable API | 
|  | */ | 
|  | void | 
|  | UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { | 
|  | UChar* text = new UChar[limit - start]; | 
|  | extractBetween(start, limit, text, 0); | 
|  | insert(dest, text, 0, limit - start); | 
|  | delete[] text; | 
|  | } | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::doReverse(UTextOffset start, | 
|  | int32_t length) | 
|  | { | 
|  | // if we're bogus, do nothing | 
|  | if(isBogus() || !cloneArrayIfNeeded()) { | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | // pin the indices to legal values | 
|  | pinIndices(start, length); | 
|  |  | 
|  | UChar *left = getArrayStart() + start; | 
|  | UChar *right = getArrayStart() + start + length; | 
|  | UChar swap; | 
|  |  | 
|  | while(left < --right) { | 
|  | swap = *left; | 
|  | *left++ = *right; | 
|  | *right = swap; | 
|  | } | 
|  |  | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | UBool | 
|  | UnicodeString::padLeading(int32_t targetLength, | 
|  | UChar padChar) | 
|  | { | 
|  | if(isBogus() || fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { | 
|  | return FALSE; | 
|  | } else { | 
|  | // move contents up by padding width | 
|  | int32_t start = targetLength - fLength; | 
|  | us_arrayCopy(fArray, 0, fArray, start, fLength); | 
|  |  | 
|  | // fill in padding character | 
|  | while(--start >= 0) { | 
|  | fArray[start] = padChar; | 
|  | } | 
|  | fLength = targetLength; | 
|  | return TRUE; | 
|  | } | 
|  | } | 
|  |  | 
|  | UBool | 
|  | UnicodeString::padTrailing(int32_t targetLength, | 
|  | UChar padChar) | 
|  | { | 
|  | if(isBogus() || fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { | 
|  | return FALSE; | 
|  | } else { | 
|  | // fill in padding character | 
|  | int32_t length = targetLength; | 
|  | while(--length >= fLength) { | 
|  | fArray[length] = padChar; | 
|  | } | 
|  | fLength = targetLength; | 
|  | return TRUE; | 
|  | } | 
|  | } | 
|  |  | 
|  | UnicodeString& | 
|  | UnicodeString::trim() | 
|  | { | 
|  | if(isBogus()) { | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | UChar32 c; | 
|  | UTextOffset i = fLength, length; | 
|  |  | 
|  | // first cut off trailing white space | 
|  | for(;;) { | 
|  | length = i; | 
|  | if(i <= 0) { | 
|  | break; | 
|  | } | 
|  | UTF_PREV_CHAR(fArray, 0, i, c); | 
|  | if(!(c == 0x20 || Unicode::isWhitespace(c))) { | 
|  | break; | 
|  | } | 
|  | } | 
|  | if(length < fLength) { | 
|  | fLength = length; | 
|  | } | 
|  |  | 
|  | // find leading white space | 
|  | UTextOffset start; | 
|  | i = 0; | 
|  | for(;;) { | 
|  | start = i; | 
|  | if(i >= length) { | 
|  | break; | 
|  | } | 
|  | UTF_NEXT_CHAR(fArray, i, length, c); | 
|  | if(!(c == 0x20 || Unicode::isWhitespace(c))) { | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | // move string forward over leading white space | 
|  | if(start > 0) { | 
|  | doReplace(0, start, 0, 0, 0); | 
|  | } | 
|  |  | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // Hashing | 
|  | //======================================== | 
|  | int32_t | 
|  | UnicodeString::doHashCode() const | 
|  | { | 
|  | /* Delegate hash computation to uhash.  This makes UnicodeString | 
|  | * hashing consistent with UChar* hashing.  */ | 
|  | int32_t hashCode = uhash_hashUCharsN(getArrayStart(), fLength); | 
|  | if (hashCode == kInvalidHashCode) { | 
|  | hashCode = kEmptyHashCode; | 
|  | } | 
|  | return hashCode; | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // Codeset conversion | 
|  | //======================================== | 
|  | int32_t | 
|  | UnicodeString::extract(UTextOffset start, | 
|  | int32_t length, | 
|  | char *target, | 
|  | uint32_t dstSize, | 
|  | const char *codepage) const | 
|  | { | 
|  | // if we're bogus or there's nothing to convert, do nothing | 
|  | if(isBogus() || length <= 0) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | // pin the indices to legal values | 
|  | pinIndices(start, length); | 
|  |  | 
|  | // set up the conversion parameters | 
|  | const UChar *mySource      = getArrayStart() + start; | 
|  | const UChar *mySourceLimit = mySource + length; | 
|  | char *myTarget             = target; | 
|  | const char *myTargetLimit  = target + dstSize; | 
|  | UErrorCode status          = U_ZERO_ERROR; | 
|  |  | 
|  | // create the converter | 
|  | UConverter *converter; | 
|  |  | 
|  | // if the codepage is the default, use our cache | 
|  | if (codepage == 0) { | 
|  | converter = getDefaultConverter(status); | 
|  | } else if (*codepage == 0) { | 
|  | converter = 0; | 
|  | } else { | 
|  | converter = ucnv_open(codepage, &status); | 
|  | } | 
|  |  | 
|  | // if we failed, set the appropriate flags and return | 
|  | // if it is an empty string, then use the "invariant character" conversion | 
|  | if (U_FAILURE(status)) { | 
|  | // close the converter | 
|  | if (codepage == 0) { | 
|  | releaseDefaultConverter(converter); | 
|  | } else { | 
|  | ucnv_close(converter); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | // perform the conversion | 
|  | if (converter == 0) { | 
|  | // use the "invariant characters" conversion | 
|  | if (length > fLength - start) { | 
|  | length = fLength - start; | 
|  | } | 
|  | u_UCharsToChars(mySource, myTarget, length); | 
|  | return length; | 
|  | } | 
|  |  | 
|  |  | 
|  | /* Pin the limit to U_MAX_PTR.  NULL check is for AS/400. */ | 
|  | if((myTargetLimit < myTarget) || (myTargetLimit == NULL)) { | 
|  | myTargetLimit = (char*)U_MAX_PTR; | 
|  | } | 
|  |  | 
|  | if (myTarget != NULL) { | 
|  | ucnv_fromUnicode(converter, &myTarget, myTargetLimit, | 
|  | &mySource, mySourceLimit, 0, TRUE, &status); | 
|  | } else { | 
|  | /* Find out the size of the target needed for the current codepage */ | 
|  | char targetCh = 0; | 
|  | int32_t size = 0; | 
|  |  | 
|  | myTargetLimit = &targetCh + sizeof(char); | 
|  | status = U_BUFFER_OVERFLOW_ERROR; | 
|  | while (mySource < mySourceLimit && status == U_BUFFER_OVERFLOW_ERROR) { | 
|  | myTarget = &targetCh; | 
|  | status = U_ZERO_ERROR; | 
|  | ucnv_fromUnicode(converter, &myTarget, myTargetLimit, | 
|  | &mySource, mySourceLimit, 0, TRUE, &status); | 
|  | size += sizeof(char); | 
|  | } | 
|  | /* Use the close at the end of the function */ | 
|  | myTarget = target + size; | 
|  | } | 
|  |  | 
|  | // close the converter | 
|  | if (codepage == 0) { | 
|  | releaseDefaultConverter(converter); | 
|  | } else { | 
|  | ucnv_close(converter); | 
|  | } | 
|  |  | 
|  | return (myTarget - target); | 
|  | } | 
|  |  | 
|  | void | 
|  | UnicodeString::doCodepageCreate(const char *codepageData, | 
|  | int32_t dataLength, | 
|  | const char *codepage) | 
|  | { | 
|  | // if there's nothing to convert, do nothing | 
|  | if(codepageData == 0 || dataLength <= 0) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  |  | 
|  | // create the converter | 
|  | // if the codepage is the default, use our cache | 
|  | // if it is an empty string, then use the "invariant character" conversion | 
|  | UConverter *converter = (codepage == 0 ? | 
|  | getDefaultConverter(status) : | 
|  | *codepage == 0 ? | 
|  | 0 : | 
|  | ucnv_open(codepage, &status)); | 
|  |  | 
|  | // if we failed, set the appropriate flags and return | 
|  | if(U_FAILURE(status)) { | 
|  | // close the converter | 
|  | if(codepage == 0) { | 
|  | releaseDefaultConverter(converter); | 
|  | } else { | 
|  | ucnv_close(converter); | 
|  | } | 
|  | setToBogus(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // perform the conversion | 
|  | if(converter == 0) { | 
|  | // use the "invariant characters" conversion | 
|  | if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) { | 
|  | u_charsToUChars(codepageData, getArrayStart(), dataLength); | 
|  | fLength = dataLength; | 
|  | } else { | 
|  | setToBogus(); | 
|  | } | 
|  | return; | 
|  | } | 
|  |  | 
|  | // set up the conversion parameters | 
|  | const char *mySource     = codepageData; | 
|  | const char *mySourceEnd  = mySource + dataLength; | 
|  | UChar *myTarget; | 
|  |  | 
|  | // estimate the size needed: | 
|  | // 1.25 UChar's per source byte should cover most cases | 
|  | int32_t arraySize = dataLength + (dataLength >> 2); | 
|  |  | 
|  | // we do not care about the current contents | 
|  | UBool doCopyArray = FALSE; | 
|  | for(;;) { | 
|  | if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) { | 
|  | setToBogus(); | 
|  | break; | 
|  | } | 
|  |  | 
|  | // perform the conversion | 
|  | myTarget = fArray + fLength; | 
|  | ucnv_toUnicode(converter, &myTarget,  fArray + fCapacity, | 
|  | &mySource, mySourceEnd, 0, FALSE, &status); | 
|  |  | 
|  | // update the conversion parameters | 
|  | fLength = myTarget - fArray; | 
|  |  | 
|  | // allocate more space and copy data, if needed | 
|  | if(status == U_BUFFER_OVERFLOW_ERROR) { | 
|  | // reset the error code | 
|  | status = U_ZERO_ERROR; | 
|  |  | 
|  | // keep the previous conversion results | 
|  | doCopyArray = TRUE; | 
|  |  | 
|  | // estimate the new size needed, larger than before | 
|  | // try 2 UChar's per remaining source byte | 
|  | arraySize = fLength + 2 * (mySourceEnd - mySource); | 
|  | } else { | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | // close the converter | 
|  | if(codepage == 0) { | 
|  | releaseDefaultConverter(converter); | 
|  | } else { | 
|  | ucnv_close(converter); | 
|  | } | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // External Buffer | 
|  | //======================================== | 
|  | // ### TODO: | 
|  | // this is very, very dirty: we should not ever expose our array to the outside, | 
|  | // and this also violates the const-ness of this object | 
|  | // this must be removed when the resource bundle implementation does not need it any more! | 
|  | const UChar* | 
|  | UnicodeString::getUChars() const { | 
|  | // if we're bogus, do nothing | 
|  | if(isBogus()) { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if(fCapacity <= fLength || fArray[fLength] != 0) { | 
|  | if(((UnicodeString &)*this).cloneArrayIfNeeded(fLength + 1)) { | 
|  | fArray[fLength] = 0; | 
|  | } | 
|  | } | 
|  | return fArray; | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // Miscellaneous | 
|  | //======================================== | 
|  | UBool | 
|  | UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, | 
|  | int32_t growCapacity, | 
|  | UBool doCopyArray, | 
|  | int32_t **pBufferToDelete) { | 
|  | // default parameters need to be static, therefore | 
|  | // the defaults are -1 to have convenience defaults | 
|  | if(newCapacity == -1) { | 
|  | newCapacity = fCapacity; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * We need to make a copy of the array if | 
|  | * the buffer is read-only, or | 
|  | * the buffer is refCounted (shared), and refCount>1, or | 
|  | * the buffer is too small. | 
|  | * Return FALSE if memory could not be allocated. | 
|  | */ | 
|  | if(fFlags & kBufferIsReadonly || | 
|  | fFlags & kRefCounted && refCount() > 1 || | 
|  | newCapacity > fCapacity | 
|  | ) { | 
|  | // save old values | 
|  | UChar *array = fArray; | 
|  | uint16_t flags = fFlags; | 
|  |  | 
|  | // check growCapacity for default value and use of the stack buffer | 
|  | if(growCapacity == -1) { | 
|  | growCapacity = newCapacity; | 
|  | } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { | 
|  | growCapacity = US_STACKBUF_SIZE; | 
|  | } | 
|  |  | 
|  | // allocate a new array | 
|  | if(allocate(growCapacity) || | 
|  | newCapacity < growCapacity && allocate(newCapacity) | 
|  | ) { | 
|  | if(doCopyArray) { | 
|  | // copy the contents | 
|  | // do not copy more than what fits - it may be smaller than before | 
|  | if(fCapacity < fLength) { | 
|  | fLength = fCapacity; | 
|  | } | 
|  | us_arrayCopy(array, 0, fArray, 0, fLength); | 
|  | } else { | 
|  | fLength = 0; | 
|  | } | 
|  |  | 
|  | // release the old array | 
|  | if(flags & kRefCounted) { | 
|  | // the array is refCounted; decrement and release if 0 | 
|  | int32_t *pRefCount = ((int32_t *)array - 1); | 
|  | if(--*pRefCount == 0) { | 
|  | if(pBufferToDelete == 0) { | 
|  | delete [] pRefCount; | 
|  | } else { | 
|  | // the caller requested to delete it himself | 
|  | *pBufferToDelete = pRefCount; | 
|  | } | 
|  | } | 
|  | } | 
|  | } else { | 
|  | // not enough memory for growCapacity and not even for the smaller newCapacity | 
|  | // reset the old values for setToBogus() to release the array | 
|  | fArray = array; | 
|  | fFlags = flags; | 
|  | setToBogus(); | 
|  | return FALSE; | 
|  | } | 
|  | } | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | // private function for C API | 
|  | U_CFUNC int32_t | 
|  | T_UnicodeString_length(const UnicodeString *s) | 
|  | { | 
|  | return s->length(); | 
|  | } | 
|  |  | 
|  | // private function for C API | 
|  | U_CFUNC int32_t | 
|  | T_UnicodeString_extract(const UnicodeString *s, char *dst) | 
|  | { | 
|  | return s->extract(0, s->length(), dst, ""); | 
|  | } | 
|  |  | 
|  |  | 
|  | //======================================== | 
|  | // Default converter caching | 
|  | //======================================== | 
|  |  | 
|  | UConverter* | 
|  | UnicodeString::getDefaultConverter(UErrorCode &status) | 
|  | { | 
|  | UConverter *converter = 0; | 
|  |  | 
|  | if(fgDefaultConverter != 0) { | 
|  | Mutex lock; | 
|  |  | 
|  | // need to check to make sure it wasn't taken out from under us | 
|  | if(fgDefaultConverter != 0) { | 
|  | converter = fgDefaultConverter; | 
|  | fgDefaultConverter = 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | // if the cache was empty, create a converter | 
|  | if(converter == 0) { | 
|  | converter = ucnv_open(0, &status); | 
|  | if(U_FAILURE(status)) { | 
|  | return 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | return converter; | 
|  | } | 
|  |  | 
|  | void | 
|  | UnicodeString::releaseDefaultConverter(UConverter *converter) | 
|  | { | 
|  | if(fgDefaultConverter == 0) { | 
|  | if (converter != 0) { | 
|  | ucnv_reset(converter); | 
|  | } | 
|  |  | 
|  | Mutex lock; | 
|  |  | 
|  | if(fgDefaultConverter == 0) { | 
|  | fgDefaultConverter = converter; | 
|  | converter = 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | // it's safe to close a 0 converter | 
|  | ucnv_close(converter); | 
|  | } | 
|  |  | 
|  | //======================================== | 
|  | // Streaming (to be removed) | 
|  | //======================================== | 
|  |  | 
|  | #include "unistrm.h" | 
|  | #include "filestrm.h" | 
|  |  | 
|  |  | 
|  | inline uint8_t | 
|  | uprv_hibyte(uint16_t x) | 
|  | { return (uint8_t)(x >> 8); } | 
|  |  | 
|  | inline uint8_t | 
|  | uprv_lobyte(uint16_t x) | 
|  | { return (uint8_t)(x & 0xff); } | 
|  |  | 
|  | inline uint16_t | 
|  | uprv_hiword(uint32_t x) | 
|  | { return (uint16_t)(x >> 16); } | 
|  |  | 
|  | inline uint16_t | 
|  | uprv_loword(uint32_t x) | 
|  | { return (uint16_t)(x & 0xffff); } | 
|  |  | 
|  | inline void | 
|  | writeLong(FileStream *os, | 
|  | int32_t x) | 
|  | { | 
|  | uint16_t word = uprv_hiword((uint32_t)x); | 
|  | T_FileStream_putc(os, uprv_hibyte(word)); | 
|  | T_FileStream_putc(os, uprv_lobyte(word)); | 
|  | word = uprv_loword((uint32_t)x); | 
|  | T_FileStream_putc(os, uprv_hibyte(word)); | 
|  | T_FileStream_putc(os, uprv_lobyte(word)); | 
|  | } | 
|  |  | 
|  | inline int32_t | 
|  | readLong(FileStream *is) | 
|  | { | 
|  | int32_t x = 0; | 
|  | uint16_t byte; | 
|  |  | 
|  | byte = T_FileStream_getc(is); | 
|  | x |= byte; | 
|  | byte = T_FileStream_getc(is); | 
|  | x = (x << 8) | byte; | 
|  | byte = T_FileStream_getc(is); | 
|  | x = (x << 8) | byte; | 
|  | byte = T_FileStream_getc(is); | 
|  | x = (x << 8) | byte; | 
|  |  | 
|  | return x; | 
|  | } | 
|  |  | 
|  | inline void | 
|  | writeUChar(FileStream *os, | 
|  | UChar c) | 
|  | { | 
|  | T_FileStream_putc(os, uprv_hibyte(c)); | 
|  | T_FileStream_putc(os, uprv_lobyte(c)); | 
|  | } | 
|  |  | 
|  | inline UChar | 
|  | readUChar(FileStream *is) | 
|  | { | 
|  | UChar c = 0; | 
|  | uint16_t byte; | 
|  |  | 
|  | byte = T_FileStream_getc(is); | 
|  | c |= byte; | 
|  | byte = T_FileStream_getc(is); | 
|  | c = (c << 8) | byte; | 
|  |  | 
|  | return c; | 
|  | } | 
|  |  | 
|  | void | 
|  | UnicodeStringStreamer::streamOut(const UnicodeString *s, | 
|  | FileStream *os) | 
|  | { | 
|  | if(!T_FileStream_error(os)) { | 
|  | writeLong(os, s->fLength); | 
|  | } | 
|  |  | 
|  | const UChar *c   = s->getArrayStart(); | 
|  | const UChar *end = c + s->fLength; | 
|  |  | 
|  | while(c != end && ! T_FileStream_error(os)) { | 
|  | writeUChar(os, *c++); | 
|  | } | 
|  | } | 
|  |  | 
|  | void | 
|  | UnicodeStringStreamer::streamIn(UnicodeString *s, | 
|  | FileStream *is) | 
|  | { | 
|  | int32_t newSize; | 
|  |  | 
|  | // handle error conditions | 
|  | if(T_FileStream_error(is) || T_FileStream_eof(is)) { | 
|  | s->setToBogus(); | 
|  | return; | 
|  | } | 
|  | newSize = readLong(is); | 
|  | if((newSize < 0) || T_FileStream_error(is) | 
|  | || ((newSize > 0) && T_FileStream_eof(is))) { | 
|  | s->setToBogus(); //error condition | 
|  | return; | 
|  | } | 
|  |  | 
|  | // clone s's array, if needed | 
|  | if(!s->cloneArrayIfNeeded(newSize, newSize, FALSE)) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | UChar *c = s->getArrayStart(); | 
|  | UChar *end = c + newSize; | 
|  |  | 
|  | while(c < end && ! (T_FileStream_error(is) || T_FileStream_eof(is))) { | 
|  | *c++ = readUChar(is); | 
|  | } | 
|  |  | 
|  | // couldn't read all chars | 
|  | if(c < end) { | 
|  | s->setToBogus(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | s->fLength = newSize; | 
|  | } | 
|  |  | 
|  | void | 
|  | UnicodeStringStreamer::streamOut(const UnicodeString *s, | 
|  | UMemoryStream *os) | 
|  | { | 
|  | if(!uprv_mstrm_error(os)) { | 
|  | uprv_mstrm_write(os, (uint8_t*)&s->fLength, sizeof(s->fLength)); | 
|  | } | 
|  |  | 
|  | const UChar *c   = s->getArrayStart(); | 
|  | const UChar *end = c + s->fLength; | 
|  |  | 
|  | while(c != end && ! uprv_mstrm_error(os)) { | 
|  | uprv_mstrm_write(os, (uint8_t*)c, sizeof(*c)); | 
|  | c++; | 
|  | } | 
|  | } | 
|  |  | 
|  | void | 
|  | UnicodeStringStreamer::streamIn(UnicodeString *s, | 
|  | UMemoryStream *is) | 
|  | { | 
|  | int32_t newSize; | 
|  |  | 
|  | // handle error conditions | 
|  | if(uprv_mstrm_error(is) || uprv_mstrm_eof(is)) { | 
|  | s->setToBogus(); | 
|  | return; | 
|  | } | 
|  | uprv_mstrm_read(is, (uint8_t *)&newSize, sizeof(int32_t)); | 
|  | if((newSize < 0) || uprv_mstrm_error(is) | 
|  | || ((newSize > 0) && uprv_mstrm_eof(is))) { | 
|  | s->setToBogus(); //error condition | 
|  | return; | 
|  | } | 
|  |  | 
|  | // clone s's array, if needed | 
|  | if(!s->cloneArrayIfNeeded(newSize, newSize, FALSE)) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | UChar *c = s->getArrayStart(); | 
|  | UChar *end = c + newSize; | 
|  |  | 
|  | while(c < end && ! (uprv_mstrm_error(is) || uprv_mstrm_eof(is))) { | 
|  | uprv_mstrm_read(is, (uint8_t *)c, sizeof(*c)); | 
|  | c++; | 
|  | } | 
|  |  | 
|  | // couldn't read all chars | 
|  | if(c < end) { | 
|  | s->setToBogus(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | s->fLength = newSize; | 
|  | } | 
|  |  | 
|  | // console IO | 
|  |  | 
|  | #if U_IOSTREAM_SOURCE >= 198506 | 
|  |  | 
|  | #if U_IOSTREAM_SOURCE >= 199711 | 
|  |  | 
|  | U_COMMON_API std::ostream & | 
|  | operator<<(std::ostream& stream, const UnicodeString& s) | 
|  |  | 
|  | #else | 
|  |  | 
|  | U_COMMON_API ostream & | 
|  | operator<<(ostream& stream, const UnicodeString& s) | 
|  |  | 
|  | #endif | 
|  |  | 
|  | { | 
|  | if(s.length() > 0) { | 
|  | char buffer[200]; | 
|  | UConverter *converter; | 
|  | UErrorCode errorCode = U_ZERO_ERROR; | 
|  |  | 
|  | // use the default converter to convert chunks of text | 
|  | converter = UnicodeString::getDefaultConverter(errorCode); | 
|  | if(U_SUCCESS(errorCode)) { | 
|  | const UChar *us = s.getArrayStart(), *uLimit = us + s.length(); | 
|  | char *s, *sLimit = buffer + sizeof(buffer); | 
|  | do { | 
|  | errorCode = U_ZERO_ERROR; | 
|  | s = buffer; | 
|  | ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, FALSE, &errorCode); | 
|  |  | 
|  | // write this chunk | 
|  | if(s > buffer) { | 
|  | stream.write(buffer, s - buffer); | 
|  | } | 
|  | } while(errorCode == U_BUFFER_OVERFLOW_ERROR); | 
|  | UnicodeString::releaseDefaultConverter(converter); | 
|  | } | 
|  | } | 
|  |  | 
|  | stream.flush(); | 
|  | return stream; | 
|  | } | 
|  |  | 
|  | #endif |