source/common/unistr.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 *******************************************************************************
 *                                                                             *
 * COPYRIGHT:                                                                  *
 *   (C) Copyright International Business Machines Corporation, 1998-1999      *
 *   Licensed Material - Program-Property of IBM - All Rights Reserved.        *
 *   US Government Users Restricted Rights - Use, duplication, or disclosure   *
 *   restricted by GSA ADP Schedule Contract with IBM Corp.                    *
 *                                                                             *
 *******************************************************************************
 *
 * File unistr.cpp
 *
 * Modification History:
 *
 *   Date        Name        Description
 *   09/25/98    stephen     Creation.
 *   04/20/99    stephen     Overhauled per 4/16 code review.
 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
 *******************************************************************************
 */

 #include "unistr.h"

 #include "locid.h"
 #include "cstring.h"
 #include "cmemory.h"
 #include "ustring.h"
 #include "mutex.h"

 #if 0
 //DEBUGGING
 #include <iostream.h>

 void
 print(const UnicodeString& s,
       const char *name)
 {
   UChar c;
   cout << name << ":|";
   for(int i = 0; i < s.length(); ++i) {
     c = s[i];
     if(c>= 0x007E || c < 0x0020)
       cout << "[0x" << hex << s[i] << "]";
     else
       cout << (char) s[i];
   }
   cout << '|' << endl;
 }

 void
 print(const UChar *s,
       int32_t len,
       const char *name)
 {
   UChar c;
   cout << name << ":|";
   for(int i = 0; i < len; ++i) {
     c = s[i];
     if(c>= 0x007E || c < 0x0020)
       cout << "[0x" << hex << s[i] << "]";
     else
       cout << (char) s[i];
   }
   cout << '|' << endl;
 }
 // END DEBUGGING
 #endif

 // Local function definitions for now

 // move u_arrayCompare to utypes.h ??
 inline int8_t
 u_arrayCompare(const UChar *src, int32_t srcStart,
          const UChar *dst, int32_t dstStart, int32_t count)
 {return icu_memcmp(src+srcStart, dst+dstStart, (size_t)(count*sizeof(*src)));}

 // need to copy areas that may overlap
 inline void
 us_arrayCopy(const UChar *src, int32_t srcStart,
          UChar *dst, int32_t dstStart, int32_t count)
 {icu_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));}

 // static initialization
 const UChar UnicodeString::fgInvalidUChar      = 0xFFFF;
 const int32_t UnicodeString::kGrowSize         = 0x80;
 const int32_t UnicodeString::kInvalidHashCode  = 0;
 const int32_t UnicodeString::kEmptyHashCode    = 1;
 UConverter* UnicodeString::fgDefaultConverter  = 0;

 //========================================
 // Constructors
 //========================================
 UnicodeString::UnicodeString()
   : fArray(fStackBuffer),
     fLength(0),
     fCapacity(US_STACKBUF_SIZE),
     fRefCounted(FALSE),
     fHashCode(kEmptyHashCode),
     fBogus(FALSE)
 {}

 UnicodeString::UnicodeString(int32_t capacity)
   : fArray(0),
     fLength(0),
     fCapacity(0),
     fRefCounted(FALSE),
     fHashCode(kEmptyHashCode),
     fBogus(FALSE)
 {
   fArray = allocate(capacity, fCapacity);
   if(! fArray) {
     setToBogus();
     return;
   }

   setRefCount(1);
 }

 UnicodeString::UnicodeString(UChar ch)
   : fArray(fStackBuffer),
     fLength(0),
     fCapacity(US_STACKBUF_SIZE),
     fRefCounted(FALSE),
     fHashCode(kEmptyHashCode),
     fBogus(FALSE)
 {
   doReplace(0, 0, &ch, 0, 1);
 }

 UnicodeString::UnicodeString(const UChar *text)
   : fArray(fStackBuffer),
     fLength(0),
     fCapacity(US_STACKBUF_SIZE),
     fRefCounted(FALSE),
     fHashCode(kEmptyHashCode),
     fBogus(FALSE)
 {
   doReplace(0, 0, text, 0, u_strlen(text));
 }

 UnicodeString::UnicodeString( const UChar *text,
                   int32_t textLength)
   : fArray(fStackBuffer),
     fLength(0),
     fCapacity(US_STACKBUF_SIZE),
     fRefCounted(FALSE),
     fHashCode(kEmptyHashCode),
     fBogus(FALSE)
 {
   doReplace(0, 0, text, 0, textLength);
 }

 UnicodeString::UnicodeString(const char *codepageData,
                  const char *codepage)
   : fArray(fStackBuffer),
     fLength(0),
     fCapacity(US_STACKBUF_SIZE),
     fRefCounted(FALSE),
     fHashCode(kEmptyHashCode),
     fBogus(FALSE)
 {
   if(codepageData != 0)
     doCodepageCreate(codepageData, icu_strlen(codepageData), codepage);
 }


 UnicodeString::UnicodeString(const char *codepageData,
                  int32_t dataLength,
                  const char *codepage)
   : fArray(fStackBuffer),
     fLength(0),
     fCapacity(US_STACKBUF_SIZE),
     fRefCounted(FALSE),
     fHashCode(kEmptyHashCode),
     fBogus(FALSE)
 {
   doCodepageCreate(codepageData, dataLength, codepage);
 }

 //========================================
 // Destructor
 //========================================
 UnicodeString::~UnicodeString()
 {
   // decrement ref count and reclaim storage, if owned
   if(fRefCounted && removeRef() == 0)
     delete [] fArray;
 }

 //========================================
 // Assignment
 //========================================
 UnicodeString&
 UnicodeString::operator= (const UnicodeString& src)
 {
   // if src is bogus, or we're bogus, or assigning to ourselves, do nothing
   if(fBogus || src.isBogus() || this == &src)
   return *this;

   // if src is ref counted, point ourselves at its array
   if(src.fRefCounted) {

     // if we're ref counted, decrement our current ref count
     if(fRefCounted && removeRef() == 0)
     delete [] fArray;

     fArray      = src.fArray;
     fLength     = src.fLength;
     fCapacity   = src.fCapacity;
     fHashCode   = src.fHashCode;
     fRefCounted = TRUE;
     addRef();
   }
   // if src isn't ref counted, just do a replace
   else {
     doReplace(0, fLength, src.fArray, 0, src.fLength);
     fHashCode = src.fHashCode;
   }

   return *this;
 }

 //========================================
 // Miscellaneous operations
 //========================================
 int32_t
 UnicodeString::numDisplayCells( UTextOffset start,
                 int32_t length,
                 bool_t asian) const
 {
   // pin indices to legal values
   pinIndices(start, length);

   UChar c;
   int32_t result = 0;
   UTextOffset limit = start + length;

   while(start < limit) {
     c = getArrayStart()[start];
     switch(Unicode::getCellWidth(c)) {
     case Unicode::ZERO_WIDTH:
       break;;

     case Unicode::HALF_WIDTH:
       result += 1;
       break;

     case Unicode::FULL_WIDTH:
       result += 2;
       break;

     case Unicode::NEUTRAL:
       result += (asian ? 2 : 1);
       break;
     }
     ++start;
   }

   return result;
 }

 UCharReference
 UnicodeString::operator[] (UTextOffset pos)
 {
   return UCharReference(this, pos);
 }

 //========================================
 // Read-only implementation
 //========================================
 int8_t
 UnicodeString::doCompare( UTextOffset start,
               int32_t length,
               const UnicodeString& src,
               UTextOffset srcStart,
               int32_t srcLength) const
 {
   // pin indices to legal values
   pinIndices(start, length);

   // get the correct pointer
   const UChar *chars = getArrayStart();

   // compare the characters
   return (src.compare(srcStart, srcLength, chars, start, length) * -1);
 }

 int8_t
 UnicodeString::doCompare( UTextOffset start,
               int32_t length,
               const UChar *srcChars,
               UTextOffset srcStart,
               int32_t srcLength) const
 {
   // pin indices to legal values
   pinIndices(start, length);

   // get the correct pointer
   const UChar *chars = getArrayStart();

   // we're comparing different lengths
   if(length != srcLength) {

     // compare the minimum # of characters
     int32_t minLength     = (length < srcLength ? length : srcLength);
     const UChar *minLimit = chars + minLength;
     const UChar *limit    = chars + length;
     int8_t result;

     // adjust for starting offsets
     chars += start;
     srcChars += srcStart;

     while(chars < minLimit) {
       result = (*chars - *srcChars);

       if(result != 0)
     return result;

       ++chars;
       ++srcChars;
     }

     // if we got here, the leading portions are identical
     return (chars < limit ? 1 : -1);
   }
   // compare two identical lengths, use u_arrayCompare
   else
     return u_arrayCompare(chars, start, srcChars, srcStart, length);
 }

 void
 UnicodeString::doExtract(UTextOffset start,
              int32_t length,
              UChar *dst,
              UTextOffset dstStart) const
 {
   // pin indices to legal values
   pinIndices(start, length);
   us_arrayCopy(getArrayStart(), start, dst, dstStart, length);
 }


 UTextOffset
 UnicodeString::doIndexOf(UChar c,
              UTextOffset start,
              int32_t length) const
 {
   // pin indices
   pinIndices(start, length);

   // find the first occurrence of c
   const UChar *begin = getArrayStart() + start;
   const UChar *limit = begin + length;

   while(begin < limit && *begin != c)
     ++begin;

   return (begin == limit ? -1 : begin - getArrayStart());
 }

 UTextOffset
 UnicodeString::doLastIndexOf(UChar c,
                  UTextOffset start,
                  int32_t length) const
 {
   // pin indices
   pinIndices(start, length);

   const UChar *begin = getArrayStart() + start + length;
   const UChar *limit = begin - length;

   while(begin > limit && *begin != c)
     --begin;

   return (begin == limit ? -1 : begin - getArrayStart());
 }


 //========================================
 // Write implementation
 //========================================

 UnicodeString&
 UnicodeString::setCharAt(UTextOffset offset,
              UChar c)
 {
   if(offset < 0)
     offset = 0;
   else if(offset >= fLength)
     offset = fLength - 1;

   doSetCharAt(offset, c);
   fHashCode = kInvalidHashCode;
   return *this;
 }

 UnicodeString&
 UnicodeString::toUpper()
 { return toUpper(Locale::getDefault()); }

 UnicodeString&
 UnicodeString::toLower()
 { return toLower(Locale::getDefault()); }

 UnicodeString&
 UnicodeString::toUpper(const Locale& locale)
 {
   UTextOffset start = 0;
   UTextOffset limit = fLength;
   UChar c;
   UnicodeString lang;

   locale.getLanguage(lang);

   // The German sharp S character (U+00DF)'s uppercase equivalent is
   // "SS", making it the only character that expands to two characters
   // when its case is changed (we don't automatically convert "SS" to
   // U+00DF going to lowercase because it can only be determined from
   // knowing the language whether a particular "SS" should map to
   // U+00DF or "ss").  So we make a preliminary pass through the
   // string looking for sharp S characters and then go back and make
   // room for the extra capital Ses if we find any.  [For performance,
   // we only do this extra work if the language is actually German]
   if(lang == "de") {
     UChar SS [] = { 0x0053, 0x0053 };
     while(start < limit) {

       c = getArrayStart()[start];

       // A sharp s needs to be replaced with two capital S's.
       if(c == 0x00DF) {
     doReplace(start, 1, SS, 0, 2);
     start++;
     limit++;
       }

       // Otherwise, the case conversion can be handled by the Unicode unit.
       else if(Unicode::isLowerCase(c))
     doSetCharAt(start, Unicode::toUpperCase(c));

       // If no conversion is necessary, do nothing
       ++start;
     }
   }

   // If the specfied language is Turkish, then we have to special-case
   // for the Turkish dotted and dotless Is.  The regular lowercase i
   // maps to the capital I with a dot (U+0130), and the lowercase i
   // without the dot (U+0131) maps to the regular capital I
   else if(lang == "tr") {
     while(start < limit) {
       c = getArrayStart()[start];

       if(c == 0x0069/*'i'*/)
     doSetCharAt(start, 0x0130);
       else if(c == 0x0131)
     doSetCharAt(start, 0x0049/*'I'*/);
       else if(Unicode::isLowerCase(c))
     doSetCharAt(start, Unicode::toUpperCase(c));
       ++start;
     }
   }

   else {
     while(start < limit) {
       c = getArrayStart()[start];
       if(Unicode::isLowerCase(c))
     doSetCharAt(start, Unicode::toUpperCase(c));
       ++start;
     }
   }

   fHashCode = kInvalidHashCode;

   return *this;
 }

 UnicodeString&
 UnicodeString::toLower(const Locale& locale)
 {
   UTextOffset start = 0;
   UTextOffset limit = fLength;
   UChar c;
   UnicodeString lang;

   locale.getLanguage(lang);

   // if the specfied language is Turkish, then we have to special-case
   // for the Turkish dotted and dotless Is.  The capital I with a dot
   // (U+0130) maps to the regular lowercase i, and the regular capital
   // I maps to the lowercase i without the dot (U+0131)
   if(lang == "tr") {
     while(start < limit) {
       c = getArrayStart()[start];
       if(c == 0x0049) // 'I'
     doSetCharAt(start, 0x0131);
       else if(c == 0x0130)
     doSetCharAt(start, 0x0069); // 'i'
       else if(Unicode::isUpperCase(c) || Unicode::isTitleCase(c))
     doSetCharAt(start, Unicode::toLowerCase(c));
       ++start;
     }
   }

   // if the specfied language is Greek, then we have to special-case
   // for the capital letter sigma (U+3A3), which has two lower-case
   // forms.  If the character following the capital sigma is a letter,
   // we use the medial form (U+3C3); otherwise, we use the final form
   // (U+3C2).
   else if(lang == "el") {
     while(start < limit) {
       c = getArrayStart()[start];
       if(c == 0x3a3) {
     if(start + 1 < limit && Unicode::isLetter(getArrayStart()[start + 1]))
       doSetCharAt(start, 0x3C3);
     else
       doSetCharAt(start, 0x3C2);
       }
       else if(Unicode::isUpperCase(c) || Unicode::isTitleCase(c))
     doSetCharAt(start, Unicode::toLowerCase(c));
       ++start;
     }
   }

   // if the specified language is anything other than Turkish or
   // Greek, we rely on the Unicode class to do all our case mapping--
   // there are no other special cases
   else {
     while(start < limit) {
       c = getArrayStart()[start];
       if(Unicode::isUpperCase(c) || Unicode::isTitleCase(c))
     doSetCharAt(start, Unicode::toLowerCase(c));
       ++start;
     }
   }

   fHashCode = kInvalidHashCode;

   return *this;
 }

 // for speed, no bounds checking is performed and the hash code isn't changed
 UnicodeString&
 UnicodeString::doSetCharAt(UTextOffset offset,
                UChar c)
 {
   // clone our array, if necessary
   cloneArrayIfNeeded();

   // set the character
   fArray[ (fRefCounted ? offset + 1 : offset) ] = c;
   return *this;
 }

 UnicodeString&
 UnicodeString::doReplace( UTextOffset start,
               int32_t length,
               const UnicodeString& src,
               UTextOffset srcStart,
               int32_t srcLength)
 {
   // pin the indices to legal values
   src.pinIndices(srcStart, srcLength);

   // get the characters from src
   const UChar *chars = src.getArrayStart();

   // and replace the range in ourselves with them
   doReplace(start, length, chars, srcStart, srcLength);

   return *this;
 }

 UnicodeString&
 UnicodeString::doReplace(UTextOffset start,
              int32_t length,
              const UChar *srcChars,
              UTextOffset srcStart,
              int32_t srcLength)
 {
   // if we're bogus, do nothing
   if(fBogus)
     return *this;

   bool_t deleteWhenDone = FALSE;
   UChar *bufferToDelete = 0;

   // clone our array, if necessary
   cloneArrayIfNeeded();

   // pin the indices to legal values
   pinIndices(start, length);

   // calculate the size of the string after the replace
   int32_t newSize = fLength - length + srcLength;

   // allocate a bigger array if needed
   if( newSize > getCapacity() ) {

     // allocate at minimum the current capacity + needed space
     int32_t tempLength;
     UChar *temp = allocate(fCapacity + srcLength, tempLength);
     if(! temp) {
       setToBogus();
       return *this;
     }

     // if we're not currently ref counted, shift the array right by one
     if(fRefCounted == FALSE)
       us_arrayCopy(fArray, 0, temp, 1, fLength);
     // otherwise, copy the old array into temp, including the ref count
     else
       us_arrayCopy(fArray, 0, temp, 0, fLength + 1);

     // delete the old array if we were ref counted
     if(fRefCounted && removeRef() == 0) {
       // if the srcChars array is the same as this object's array,
       // don't delete it until the end of the method.  this can happen
       // in code like UnicodeString s = "foo"; s += s;
       if(srcChars != getArrayStart())
     delete [] fArray;
       else {
     deleteWhenDone = TRUE;
     bufferToDelete = fArray;
       }
     }

     // use the new array
     fCapacity = tempLength;
     fArray = temp;
     setRefCount(1);
   }

   // now do the replace

   // first copy the portion that isn't changing, leaving a hole
   us_arrayCopy(getArrayStart(), start + length,
           getArrayStart(), start + srcLength,
           fLength - (start + length));

   // now fill in the hole with the new string
   us_arrayCopy(srcChars, srcStart, getArrayStart(), start, srcLength);

   fLength = newSize;
   fHashCode = kInvalidHashCode;

   if(deleteWhenDone)
     delete [] bufferToDelete;

   return *this;
 }

 UnicodeString&
 UnicodeString::doReverse(UTextOffset start,
              int32_t length)
 {
   // if we're bogus, do nothing
   if(fBogus)
     return *this;

   // clone our array, if necessary
   cloneArrayIfNeeded();

   // pin the indices to legal values
   pinIndices(start, length);

   UChar *left = getArrayStart() + start;
   UChar *right = getArrayStart() + start + length;
   UChar swap;

   while(left < --right) {
     swap = *left;
     *left++ = *right;
     *right = swap;
   }

   fHashCode = kInvalidHashCode;

   return *this;
 }

 //========================================
 // Hashing
 //========================================
 int32_t
 UnicodeString::doHashCode()
 {
   const UChar *key     = getArrayStart();
   int32_t len         = fLength;
   int32_t hash         = kInvalidHashCode;
   const UChar *limit     = key + len;
   int32_t inc         = (len >= 128 ? len/64 : 1);

   /*
     We compute the hash by iterating sparsely over 64 (at most)
     characters spaced evenly through the string.  For each character,
     we multiply the previous hash value by a prime number and add the
     new character in, in the manner of an additive linear congruential
     random number generator, thus producing a pseudorandom
     deterministic value which should be well distributed over the
     output range. [LIU] */

   while(key < limit) {
     hash = (hash * 37) + *key;
     key += inc;
   }

   if(hash == kInvalidHashCode)
     hash = kEmptyHashCode;

   fHashCode = hash;
   return fHashCode;
 }

 //========================================
 // Bogusify?
 //========================================
 void
 UnicodeString::setToBogus()
 {
   fBogus = TRUE;
   if(fRefCounted) {
     if(removeRef() == 0)
       delete [] fArray;

     fArray = 0;
     fCapacity = fLength = 0;
   }

   fHashCode = kInvalidHashCode;
 }

 //========================================
 // Codeset conversion
 //========================================
 int32_t
 UnicodeString::extract(UTextOffset start,
                int32_t length,
                char *dst,
                const char *codepage) const
 {
   // if we're bogus or there's nothing to convert, do nothing
   if(fBogus || length == 0)
     return 0;

   // pin the indices to legal values
   pinIndices(start, length);

   int32_t convertedLen = 0;

   // set up the conversion parameters
   int32_t sourceLen        = length;
   const UChar *mySource    = getArrayStart() + start;
   const UChar *mySourceEnd = mySource + length;
   char *myTarget           = dst;
   UErrorCode status        = ZERO_ERROR;
   int32_t arraySize        = 0x0FFFFFFF;

   // create the converter
   UConverter *converter = 0;

   // if the codepage is the default, use our cache
   if(codepage == 0)
     converter = getDefaultConverter(status);
   else
     converter = ucnv_open(codepage, &status);

   // if we failed, set the appropriate flags and return
   if(FAILURE(status)) {
     // close the converter
     if(codepage == 0)
       releaseDefaultConverter(converter);
     else
       ucnv_close(converter);
     return 0;
   }

   // perform the conversion
   // there is no loop here since we assume the buffer is large enough

   ucnv_fromUnicode(converter, &myTarget,  myTarget + arraySize,
            &mySource, mySourceEnd, NULL, TRUE, &status);

   // close the converter
   if(codepage == 0)
     releaseDefaultConverter(converter);
   else
     ucnv_close(converter);

   return (myTarget - dst);
 }

 void
 UnicodeString::doCodepageCreate(const char *codepageData,
                 int32_t dataLength,
                 const char *codepage)
 {
   // if there's nothing to convert, do nothing
   if(codepageData == 0 || dataLength == 0)
     return;

   // set up the conversion parameters
   int32_t sourceLen        = dataLength;
   const char *mySource     = codepageData;
   const char *mySourceEnd  = mySource + sourceLen;
   UChar *myTarget          = getArrayStart();
   UErrorCode status        = ZERO_ERROR;
   int32_t arraySize        = getCapacity();

   // create the converter
   UConverter *converter = 0;

   // if the codepage is the default, use our cache
   converter = (codepage == 0
            ? getDefaultConverter(status)
            : ucnv_open(codepage, &status));

   // if we failed, set the appropriate flags and return
   if(FAILURE(status)) {
     // close the converter
     if(codepage == 0)
       releaseDefaultConverter(converter);
     else
       ucnv_close(converter);
     setToBogus();
     return;
   }

   // perform the conversion
   do {
     // reset the error code
     status = ZERO_ERROR;

     // perform the conversion
     ucnv_toUnicode(converter, &myTarget,  myTarget + arraySize,
            &mySource, mySourceEnd, NULL, TRUE, &status);

     // update the conversion parameters
     fLength      = myTarget - getArrayStart();
     arraySize    = getCapacity() - fLength;

     // allocate more space and copy data, if needed
     if(fLength < dataLength) {
       int32_t tempCapacity;
       UChar *temp = allocate(fCapacity, tempCapacity);

       if(! temp) {
     // close the converter
     if(codepage == 0)
       releaseDefaultConverter(converter);
     else
       ucnv_close(converter);
     // set flags and return
     setToBogus();
     return;
       }

       // if we're not currently ref counted, shift the array right by one
       if(fRefCounted == FALSE)
     us_arrayCopy(fArray, 0, temp, 1, fLength);
       // otherwise, copy the old array into temp, including the ref count
       else
           us_arrayCopy(fArray, 0, temp, 0, fLength + 1);

       if(fRefCounted && removeRef() == 0)
     delete [] fArray;

       fArray      = temp;
       fCapacity   = tempCapacity;

       setRefCount(1);

       myTarget    = getArrayStart() + fLength;
       arraySize   = getCapacity() - fLength;
     }
   }
   while(status == INDEX_OUTOFBOUNDS_ERROR);

   fHashCode = kInvalidHashCode;

   // close the converter
   if(codepage == 0)
     releaseDefaultConverter(converter);
   else
     ucnv_close(converter);
 }

 //========================================
 // External Buffer
 //========================================
 UnicodeString::UnicodeString(UChar *buff,
                  int32_t bufLength,
                  int32_t buffCapacity)
   : fArray(buff),
     fLength(bufLength),
     fCapacity(buffCapacity),
     fRefCounted(FALSE),
     fHashCode(kInvalidHashCode),
     fBogus(FALSE)
 {}

 const UChar*
 UnicodeString::getUChars() const
 {
   // if we're bogus, do nothing
   if(fBogus)
     return 0;

   // clone our array, if necessary
   ((UnicodeString*)this)->cloneArrayIfNeeded();

   // no room for null, resize
   if(getCapacity() <= fLength) {
     // allocate at minimum the current capacity + needed space
     int32_t tempLength;
     UChar *temp = allocate(fCapacity + 1, tempLength);
     if(! temp) {
       ((UnicodeString*)this)->setToBogus();
       return 0;
     }

     // if we're not currently ref counted, shift the array right by one
     if(fRefCounted == FALSE)
       us_arrayCopy(fArray, 0, temp, 1, fLength);
     // otherwise, copy the old array into temp, including the ref count
     else
       us_arrayCopy(fArray, 0, temp, 0, fLength + 1);

     // delete the old array
     if(fRefCounted && ((UnicodeString*)this)->removeRef() == 0)
       delete [] ((UnicodeString*)this)->fArray;

     // use the new array
     ((UnicodeString*)this)->fCapacity = tempLength;
     ((UnicodeString*)this)->fArray    = temp;
     ((UnicodeString*)this)->setRefCount(1);
   }

   // tack on a trailing null
   fArray[(fRefCounted ? 1 : 0) + fLength] = 0;

   return getArrayStart();
 }

 UChar*
 UnicodeString::orphanStorage()
 {
   // if we're bogus, do nothing
   if(fBogus)
     return 0;

   // clone our array, if necessary
   ((UnicodeString*)this)->cloneArrayIfNeeded();

   // if we're ref counted, get rid of the leading ref count
   if(fRefCounted) {
     us_arrayCopy(getArrayStart(), 0, fArray, 0, fLength);
   }

   UChar *retVal = fArray;

   fArray = fStackBuffer;
   fLength = 0;
   fCapacity = US_STACKBUF_SIZE;
   fHashCode = kEmptyHashCode;

   return retVal;
 }

 //========================================
 // Miscellaneous
 //========================================
 void
 UnicodeString::pinIndices(UTextOffset& start,
               int32_t& length) const
 {
   // pin indices
   if(length < 0 || start < 0)
     start = length = 0;
   else {
     if(start < 0)
       start = 0;
     if(length > (fLength - start))
       length = (fLength - start);
   }
 }

 void
 UnicodeString::cloneArrayIfNeeded()
 {
   // if we're ref counted, make a copy of the buffer if necessary
   if(fRefCounted && refCount() > 1) {
     UChar *copy = new UChar [ fCapacity ];
     if( ! copy ) {
       setToBogus();
       return;
     }

     // copy the current shared array into our new array
     us_arrayCopy(fArray, 0, copy, 0, fLength + 1);

     // remove a reference from the current shared array
     // if there are no more references to the current shared array,
     // after we remove the reference, delete the array
     if(removeRef() == 0)
       delete [] fArray;

     // make our array point to the new copy and set the ref count to one
     fArray = copy;
     setRefCount(1);
   }
 }

 // private function for C API
 C_FUNC const UChar*
 T_UnicodeString_getUChars(const UnicodeString *s)
 {
   return s->getUChars();
 }


 //========================================
 // Default converter caching
 //========================================

 UConverter*
 UnicodeString::getDefaultConverter(UErrorCode &status)
 {
   UConverter *converter = 0;

   if(fgDefaultConverter != 0) {
     Mutex lock;

     // need to check to make sure it wasn't taken out from under us
     if(fgDefaultConverter != 0) {
       converter = fgDefaultConverter;
       fgDefaultConverter = 0;
     }
   }

   // if the cache was empty, create a converter
   if(converter == 0) {
     converter = ucnv_open(0, &status);
     if(FAILURE(status))
       return 0;
   }

   return converter;
 }

 void
 UnicodeString::releaseDefaultConverter(UConverter *converter)
 {
   if(fgDefaultConverter == 0) {
     Mutex lock;

     if(fgDefaultConverter == 0) {
       fgDefaultConverter = converter;
       converter = 0;
     }
   }

   // it's safe to close a NULL converter
   ucnv_close(converter);
 }

 //========================================
 // Streaming (to be removed)
 //========================================

 #include <iostream.h>
 #include "unistrm.h"
 #include "filestrm.h"


 inline uint8_t
 icu_hibyte(uint16_t x)
 { return (uint8_t)(x >> 8); }

 inline uint8_t
 icu_lobyte(uint16_t x)
 { return (uint8_t)(x & 0xff); }

 inline uint16_t
 icu_hiword(uint32_t x)
 { return (uint16_t)(x >> 16); }

 inline uint16_t
 icu_loword(uint32_t x)
 { return (uint16_t)(x & 0xffff); }

 inline void
 writeLong(FileStream *os,
       int32_t x)
 {
   uint16_t word = icu_hiword((uint32_t)x);
   T_FileStream_putc(os, icu_hibyte(word));
   T_FileStream_putc(os, icu_lobyte(word));
   word = icu_loword((uint32_t)x);
   T_FileStream_putc(os, icu_hibyte(word));
   T_FileStream_putc(os, icu_lobyte(word));
 }

 inline int32_t
 readLong(FileStream *is)
 {
   int32_t x = 0;
   uint16_t byte;

   byte = T_FileStream_getc(is);
   x |= byte;
   byte = T_FileStream_getc(is);
   x = (x << 8) | byte;
   byte = T_FileStream_getc(is);
   x = (x << 8) | byte;
   byte = T_FileStream_getc(is);
   x = (x << 8) | byte;

   return x;
 }

 inline void
 writeUChar(FileStream *os,
        UChar c)
 {
   T_FileStream_putc(os, icu_hibyte(c));
   T_FileStream_putc(os, icu_lobyte(c));
 }

 inline UChar
 readUChar(FileStream *is)
 {
   UChar c = 0;
   uint16_t byte;

   byte = T_FileStream_getc(is);
   c |= byte;
   byte = T_FileStream_getc(is);
   c = (c << 8) | byte;

   return c;
 }

 void
 UnicodeStringStreamer::streamOut(const UnicodeString *s,
                  FileStream *os)
 {
   if(!T_FileStream_error(os))
     writeLong(os, s->fLength);

   const UChar *c   = s->getArrayStart();
   const UChar *end = c + s->fLength;

   while(c != end && ! T_FileStream_error(os))
     writeUChar(os, *c++);
 }

 void
 UnicodeStringStreamer::streamIn(UnicodeString *s,
                 FileStream *is)
 {
   int32_t newSize;

   // handle error conditions
   if(T_FileStream_error(is) || T_FileStream_eof(is)) {
     s->setToBogus();
     return;
   }
   newSize = readLong(is);
   if((newSize < 0) || T_FileStream_error(is)
      || ((newSize > 0) && T_FileStream_eof(is))) {
     s->setToBogus(); //error condition
     return;
   }

   // clone s's array, if needed
   s->cloneArrayIfNeeded();

   // if the string isn't big enough to hold the data, enlarge it
   if(s->getCapacity() < newSize) {

     int32_t tempLength;
     UChar *temp = s->allocate(newSize, tempLength);
     if(! temp) {
       s->setToBogus();
       return;
     }

     // if s is not currently ref counted, shift the array right by one
     if(s->fRefCounted == FALSE)
       us_arrayCopy(s->fArray, 0, temp, 1, s->fLength);
     // otherwise, copy the old array into temp, including the ref count
     else
       us_arrayCopy(s->fArray, 0, temp, 0, s->fLength + 1);

     // delete the old array if s is ref counted
     if(s->fRefCounted && s->removeRef() == 0)
       delete [] s->fArray;

     // use the new array
     s->fCapacity = tempLength;
     s->fArray    = temp;
     s->setRefCount(1);
   }

   UChar *c = s->getArrayStart();
   UChar *end = c + newSize;

   while(c < end && ! (T_FileStream_error(is) || T_FileStream_eof(is)))
     *c++ = readUChar(is);

   // couldn't read all chars
   if(c < end) {
     s->setToBogus();
     return;
   }

   s->fLength = newSize;
 }

 // console IO

 ostream&
 operator<<(ostream& stream,
        const UnicodeString& s)
 {
   UTextOffset i;
   UChar c;
   int32_t saveFlags = stream.flags();

   stream << hex;

   for(i = 0; i < s.length(); i++) {
     c = s.charAt(i);
     if((c >= ' ' && c <= '~') || c == '\n')
       stream << (char)c;
     else
       stream << "[0x" << c << "]";
   }
   stream.flush();
   stream.setf(saveFlags & ios::basefield, ios::basefield);
   return stream;
 }