source/common/ustring.c - external/github.com/unicode-org/icu - Git at Google

 /*
 ******************************************************************************
 *
 *   Copyright (C) 1998-2001, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
 *
 * File ustring.h
 *
 * Modification History:
 *
 *   Date        Name        Description
 *   12/07/98    bertrand    Creation.
 ******************************************************************************
 */

 #include "unicode/utypes.h"
 #include "unicode/ustring.h"
 #include "unicode/putil.h"
 #include "unicode/ucnv.h"
 #include "cstring.h"
 #include "cwchar.h"
 #include "cmemory.h"
 #include "umutex.h"
 #include "ustr_imp.h"
 #include "ucln_cmn.h"

 /* forward declaractions of definitions for the shared default converter */

 static UConverter *gDefaultConverter = NULL;

 /* ANSI string.h - style functions ------------------------------------------ */

 #define MAX_STRLEN 0x0FFFFFFF

 /* ---- String searching functions ---- */

 U_CAPI UChar* U_EXPORT2
 u_strchr(const UChar *s, UChar c)
 {
   while (*s && *s != c) {
     ++s;
   }
   if (*s == c)
     return (UChar *)s;
   return NULL;
 }

 /* A Boyer-Moore algorithm would be better, but that would require a hashtable
    because UChar is so big. This algorithm doesn't use a lot of extra memory.
  */
 U_CAPI UChar * U_EXPORT2
 u_strstr(const UChar *s, const UChar *substring) {

   UChar *strItr, *subItr;

   if (*substring == 0) {
     return (UChar *)s;
   }

   do {
     strItr = (UChar *)s;
     subItr = (UChar *)substring;

     /* Only one string iterator needs checking for null terminator */
     while ((*strItr != 0) && (*strItr == *subItr)) {
       strItr++;
       subItr++;
     }

     if (*subItr == 0) {             /* Was the end of the substring reached? */
       return (UChar *)s;
     }

     s++;
   } while (*strItr != 0);           /* Was the end of the string reached? */

   return NULL;                      /* No match */
 }

 /**
  * Check if there is an unmatched surrogate c in a string [start..limit[ at s.
  * start<=s<limit or limit==NULL
  * @return TRUE if *s is unmatched
  */
 static U_INLINE UBool
 uprv_isSingleSurrogate(const UChar *start, const UChar *s, UChar c, const UChar *limit) {
     if(UTF_IS_SURROGATE_FIRST(c)) {
         ++s;
         return (UBool)(s==limit || !UTF_IS_TRAIL(*s));
     } else {
         return (UBool)(s==start || !UTF_IS_LEAD(*(s-1)));
     }
 }

 U_CFUNC const UChar *
 uprv_strFindSurrogate(const UChar *s, int32_t length, UChar surrogate) {
     const UChar *limit, *t;
     UChar c;

     if(length>=0) {
         limit=s+length;
     } else {
         limit=NULL;
     }

     for(t=s; t!=limit && ((c=*t)!=0 || limit!=NULL); ++t) {
         if(c==surrogate && uprv_isSingleSurrogate(s, t, c, limit)) {
             return t;
         }
     }

     return NULL;
 }

 U_CFUNC const UChar *
 uprv_strFindLastSurrogate(const UChar *s, int32_t length, UChar surrogate) {
     const UChar *limit, *t;
     UChar c;

     if(length>=0) {
         limit=s+length;
     } else {
         limit=s+u_strlen(s);
     }

     for(t=limit; t!=s;) {
         c=*--t;
         if(c==surrogate && uprv_isSingleSurrogate(s, t, c, limit)) {
             return t;
         }
     }

     return NULL;
 }

 U_CAPI UChar * U_EXPORT2
 u_strchr32(const UChar *s, UChar32 c) {
   if(c < 0xd800) {
     /* non-surrogate BMP code point */
     return u_strchr(s, (UChar)c);
   } else if(c <= 0xdfff) {
     /* surrogate code point */
     return (UChar *)uprv_strFindSurrogate(s, -1, (UChar)c);
   } else if(c <= 0xffff) {
     /* non-surrogate BMP code point */
     return u_strchr(s, (UChar)c);
   } else {
     /* supplementary code point, search for string */
     UChar buffer[3];

     buffer[0] = UTF16_LEAD(c);
     buffer[1] = UTF16_TRAIL(c);
     buffer[2] = 0;
     return u_strstr(s, buffer);
   }
 }

 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
 U_CAPI UChar * U_EXPORT2
 u_strpbrk(const UChar *string, const UChar *matchSet)
 {
     int32_t matchLen;
     UBool single = TRUE;

     for (matchLen = 0; matchSet[matchLen]; matchLen++)
     {
         if (!UTF_IS_SINGLE(matchSet[matchLen]))
         {
             single = FALSE;
         }
     }

     if (single)
     {
         const UChar *matchItr;
         const UChar *strItr;

         for (strItr = string; *strItr; strItr++)
         {
             for (matchItr = matchSet; *matchItr; matchItr++)
             {
                 if (*matchItr == *strItr)
                 {
                     return (UChar *)strItr;
                 }
             }
         }
     }
     else
     {
         int32_t matchItr;
         int32_t strItr;
         UChar32 stringCh, matchSetCh;
         int32_t stringLen = u_strlen(string);

         for (strItr = 0; strItr < stringLen; strItr++)
         {
             UTF_GET_CHAR_SAFE(string, 0, strItr, stringLen, stringCh, TRUE);
             for (matchItr = 0; matchItr < matchLen; matchItr++)
             {
                 UTF_GET_CHAR_SAFE(matchSet, 0, matchItr, matchLen, matchSetCh, TRUE);
                 if (stringCh == matchSetCh && (stringCh != UTF_ERROR_VALUE
                     || string[strItr] == UTF_ERROR_VALUE
                     || (matchSetCh == UTF_ERROR_VALUE && !UTF_IS_SINGLE(matchSet[matchItr]))))
                 {
                     return (UChar *)string + strItr;
                 }
             }
         }
     }

     /* Didn't find it. */
     return NULL;
 }

 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
 U_CAPI int32_t U_EXPORT2
 u_strcspn(const UChar *string, const UChar *matchSet)
 {
     const UChar *foundStr = u_strpbrk(string, matchSet);
     if (foundStr == NULL)
     {
         return u_strlen(string);
     }
     return foundStr - string;
 }

 /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
 U_CAPI int32_t U_EXPORT2
 u_strspn(const UChar *string, const UChar *matchSet)
 {
     UBool single = TRUE;
     UBool match = TRUE;
     int32_t matchLen;
     int32_t retValue;

     for (matchLen = 0; matchSet[matchLen]; matchLen++)
     {
         if (!UTF_IS_SINGLE(matchSet[matchLen]))
         {
             single = FALSE;
         }
     }

     if (single)
     {
         const UChar *matchItr;
         const UChar *strItr;

         for (strItr = string; *strItr && match; strItr++)
         {
             match = FALSE;
             for (matchItr = matchSet; *matchItr; matchItr++)
             {
                 if (*matchItr == *strItr)
                 {
                     match = TRUE;
                     break;
                 }
             }
         }
         retValue = strItr - string - (match == FALSE);
     }
     else
     {
         int32_t matchItr;
         int32_t strItr;
         UChar32 stringCh, matchSetCh;
         int32_t stringLen = u_strlen(string);

         for (strItr = 0; strItr < stringLen && match; strItr++)
         {
             match = FALSE;
             UTF_GET_CHAR_SAFE(string, 0, strItr, stringLen, stringCh, TRUE);
             for (matchItr = 0; matchItr < matchLen; matchItr++)
             {
                 UTF_GET_CHAR_SAFE(matchSet, 0, matchItr, matchLen, matchSetCh, TRUE);
                 if (stringCh == matchSetCh && (stringCh != UTF_ERROR_VALUE
                     || string[strItr] == UTF_ERROR_VALUE
                     || (matchSetCh == UTF_ERROR_VALUE && !UTF_IS_SINGLE(matchSet[matchItr]))))
                 {
                     match = TRUE;
                     break;
                 }
             }
         }
         retValue = strItr - (match == FALSE);
     }

     /* Found a mismatch or didn't find it. */
     return retValue;
 }

 /* ----- Text manipulation functions --- */

 U_CAPI UChar* U_EXPORT2
 u_strtok_r(UChar    *src,
      const UChar    *delim,
            UChar   **saveState)
 {
     UChar *tokSource;
     UChar *nextToken;
     uint32_t nonDelimIdx;

     /* If saveState is NULL, the user messed up. */
     if (src != NULL) {
         tokSource = src;
         *saveState = src; /* Set to "src" in case there are no delimiters */
     }
     else if (*saveState) {
         tokSource = *saveState;
     }
     else {
         /* src == NULL && *saveState == NULL */
         /* This shouldn't happen. We already finished tokenizing. */
         return NULL;
     }

     /* Skip initial delimiters */
     nonDelimIdx = u_strspn(tokSource, delim);
     tokSource = &tokSource[nonDelimIdx];

     if (*tokSource) {
         nextToken = u_strpbrk(tokSource, delim);
         if (nextToken != NULL) {
             /* Create a token */
             *(nextToken++) = 0;
             *saveState = nextToken;
             return tokSource;
         }
         else if (*saveState) {
             /* Return the last token */
             *saveState = NULL;
             return tokSource;
         }
     }
     else {
         /* No tokens were found. Only delimiters were left. */
         *saveState = NULL;
     }
     return NULL;
 }

 U_CAPI UChar* U_EXPORT2
 u_strcat(UChar     *dst,
     const UChar     *src)
 {
     UChar *anchor = dst;            /* save a pointer to start of dst */

     while(*dst != 0) {              /* To end of first string          */
         ++dst;
     }
     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
     }

     return anchor;
 }

 U_CAPI UChar*  U_EXPORT2
 u_strncat(UChar     *dst,
      const UChar     *src,
      int32_t     n )
 {
     if(n > 0) {
         UChar *anchor = dst;            /* save a pointer to start of dst */

         while(*dst != 0) {              /* To end of first string          */
             ++dst;
         }
         while((*dst = *src) != 0) {     /* copy string 2 over              */
             ++dst;
             if(--n == 0) {
                 *dst = 0;
                 break;
             }
             ++src;
         }

         return anchor;
     } else {
         return dst;
     }
 }

 /* ----- Text property functions --- */

 U_CAPI int32_t   U_EXPORT2
 u_strcmp(const UChar *s1,
     const UChar *s2)
 {
     UChar  c1, c2;

     for(;;) {
         c1=*s1++;
         c2=*s2++;
         if (c1 != c2 || c1 == 0) {
             break;
         }
     }
     return (int32_t)c1 - (int32_t)c2;
 }

 /* rotate surrogates to the top to get code point order; assume c>=0xd800 */
 #define UTF16FIXUP(c) {                  \
     if ((c) >= 0xe000) {                 \
         (c) -= 0x800;                    \
     } else {                             \
         (c) += 0x2000;                   \
     }                                    \
 }


 /* String compare in code point order - u_strcmp() compares in code unit order. */
 U_CAPI int32_t U_EXPORT2
 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
     UChar c1, c2;

     /* compare identical prefixes - they do not need to be fixed up */
     for(;;) {
         c1=*s1++;
         c2=*s2++;
         if (c1 != c2) {
             break;
         }
         if (c1 == 0) {
             return 0;
         }
     }

    /*  if both values are in or above the surrogate range, Fix them up. */
    if (c1 >= 0xD800 && c2 >= 0xD800) {
         UTF16FIXUP(c1);
         UTF16FIXUP(c2);
     }

     /* now c1 and c2 are in UTF-32-compatible order */
     return (int32_t)c1-(int32_t)c2;
 }

 U_CAPI int32_t   U_EXPORT2
 u_strncmp(const UChar     *s1,
      const UChar     *s2,
      int32_t     n)
 {
     if(n > 0) {
         int32_t rc;
         for(;;) {
             rc = (int32_t)*s1 - (int32_t)*s2;
             if(rc != 0 || *s1 == 0 || --n == 0) {
                 return rc;
             }
             ++s1;
             ++s2;
         }
     } else {
         return 0;
     }
 }

 U_CAPI int32_t U_EXPORT2
 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
     UChar c1, c2;

     if(n<=0) {
         return 0;
     }

     /* compare identical prefixes - they do not need to be fixed up */
     for(;;) {
         c1=*s1;
         c2=*s2;
         if(c1==c2) {
             if(c1==0 || --n==0) {
                 return 0;
             }
             ++s1;
             ++s2;
         } else {
             break;
         }
     }

    /* c1!=c2, fix up each one if they're both in or above the surrogate range, then compare them */
    if (c1 >= 0xD800 && c2 >= 0xD800) {
         UTF16FIXUP(c1);
         UTF16FIXUP(c2);
     }

     /* now c1 and c2 are in UTF-32-compatible order */
     return (int32_t)c1-(int32_t)c2;
 }

 U_CAPI UChar* U_EXPORT2
 u_strcpy(UChar     *dst,
     const UChar     *src)
 {
     UChar *anchor = dst;            /* save a pointer to start of dst */

     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
     }

     return anchor;
 }

 U_CAPI UChar*  U_EXPORT2
 u_strncpy(UChar     *dst,
      const UChar     *src,
      int32_t     n)
 {
     UChar *anchor = dst;            /* save a pointer to start of dst */

     /* copy string 2 over */
     while(n > 0 && (*(dst++) = *(src++)) != 0) {
         --n;
     }

     return anchor;
 }

 U_CAPI int32_t   U_EXPORT2
 u_strlen(const UChar *s)
 {
 #if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
     return uprv_wcslen(s);
 #else
     const UChar *t = s;
     while(*t != 0) {
       ++t;
     }
     return t - s;
 #endif
 }

 U_CAPI int32_t U_EXPORT2
 u_countChar32(const UChar *s, int32_t length) {
     int32_t count;

     if(s==NULL || length<-1) {
         return 0;
     }

     count=0;
     if(length>=0) {
         while(length>0) {
             ++count;
             if(UTF_IS_LEAD(*s) && length>=2 && UTF_IS_TRAIL(*(s+1))) {
                 s+=2;
                 length-=2;
             } else {
                 ++s;
                 --length;
             }
         }
     } else /* length==-1 */ {
         UChar c;

         for(;;) {
             if((c=*s++)==0) {
                 break;
             }
             ++count;

             /*
              * sufficient to look ahead one because of UTF-16;
              * safe to look ahead one because at worst that would be the terminating NUL
              */
             if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(*s)) {
                 ++s;
             }
         }
     }
     return count;
 }

 U_CAPI UChar * U_EXPORT2
 u_memcpy(UChar *dest, const UChar *src, int32_t count) {
     return (UChar *)uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);
 }

 U_CAPI UChar * U_EXPORT2
 u_memmove(UChar *dest, const UChar *src, int32_t count) {
     return (UChar *)uprv_memmove(dest, src, count*U_SIZEOF_UCHAR);
 }

 U_CAPI UChar * U_EXPORT2
 u_memset(UChar *dest, UChar c, int32_t count) {
     if(count > 0) {
         UChar *ptr = dest;
         UChar *limit = dest + count;

         while (ptr < limit) {
             *(ptr++) = c;
         }
     }
     return dest;
 }

 U_CAPI int32_t U_EXPORT2
 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
     if(count > 0) {
         const UChar *limit = buf1 + count;
         int32_t result;

         while (buf1 < limit) {
             result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
             if (result != 0) {
                 return result;
             }
             buf1++;
             buf2++;
         }
     }
     return 0;
 }

 U_CAPI int32_t U_EXPORT2
 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
     const UChar *limit;
     UChar c1, c2;

     if(count<=0) {
         return 0;
     }

     limit=s1+count;

     /* compare identical prefixes - they do not need to be fixed up */
     for(;;) {
         c1=*s1;
         c2=*s2;
         if(c1!=c2) {
             break;
         }
         ++s1;
         ++s2;
         if(s1==limit) {
             return 0;
         }
     }

    /* c1!=c2, fix up each one if they're both in or above the surrogate range, then compare them */
    if (c1 >= 0xD800 && c2 >= 0xD800) {
         UTF16FIXUP(c1);
         UTF16FIXUP(c2);
     }

     /* now c1 and c2 are in UTF-32-compatible order */
     return (int32_t)c1-(int32_t)c2;
 }

 U_CAPI UChar * U_EXPORT2
 u_memchr(const UChar *src, UChar ch, int32_t count) {
     if(count > 0) {
         const UChar *ptr = src;
         const UChar *limit = src + count;

         do {
             if (*ptr == ch) {
                 return (UChar *)ptr;
             }
         } while (++ptr < limit);
     }
     return NULL;
 }

 U_CAPI UChar * U_EXPORT2
 u_memchr32(const UChar *src, UChar32 ch, int32_t count) {
     if(count<=0 || (uint32_t)ch>0x10ffff) {
         return NULL; /* no string, or illegal arguments */
     }

     if(ch<0xd800) {
         /* non-surrogate BMP code point */
         return u_memchr(src, (UChar)ch, count); /* BMP, single UChar */
     } else if(ch<=0xdfff) {
         /* surrogate code point */
         return (UChar *)uprv_strFindSurrogate(src, count, (UChar)ch);
     } else if(ch<=0xffff) {
         return u_memchr(src, (UChar)ch, count); /* BMP, single UChar */
     } else if(count<2) {
         return NULL; /* too short for a surrogate pair */
     } else {
         const UChar *limit=src+count-1; /* -1 so that we do not need a separate check for the trail unit */
         UChar lead=UTF16_LEAD(ch), trail=UTF16_TRAIL(ch);

         do {
             if(*src==lead && *(src+1)==trail) {
                 return (UChar *)src;
             }
         } while(++src<limit);
         return NULL;
     }
 }

 /* conversions between char* and UChar* ------------------------------------- */

 /*
  returns the minimum of (the length of the null-terminated string) and n.
 */
 static int32_t u_astrnlen(const char *s1, int32_t n)
 {
     int32_t len = 0;

     if (s1)
     {
         while (*(s1++) && n--)
         {
             len++;
         }
     }
     return len;
 }

 U_CAPI UChar*  U_EXPORT2
 u_uastrncpy(UChar *ucs1,
            const char *s2,
            int32_t n)
 {
   UChar *target = ucs1;
   UErrorCode err = U_ZERO_ERROR;
   UConverter *cnv = u_getDefaultConverter(&err);
   if(U_SUCCESS(err) && cnv != NULL) {
     ucnv_reset(cnv);
     ucnv_toUnicode(cnv,
                    &target,
                    ucs1+n,
                    &s2,
                    s2+u_astrnlen(s2, n),
                    NULL,
                    TRUE,
                    &err);
     ucnv_reset(cnv); /* be good citizens */
     u_releaseDefaultConverter(cnv);
     if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
       *ucs1 = 0; /* failure */
     }
     if(target < (ucs1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
       *target = 0;  /* terminate */
     }
   } else {
     *ucs1 = 0;
   }
   return ucs1;
 }

 U_CAPI UChar*  U_EXPORT2
 u_uastrcpy(UChar *ucs1,
           const char *s2 )
 {
   UErrorCode err = U_ZERO_ERROR;
   UConverter *cnv = u_getDefaultConverter(&err);
   if(U_SUCCESS(err) && cnv != NULL) {
     ucnv_toUChars(cnv,
                     ucs1,
                     MAX_STRLEN,
                     s2,
                     uprv_strlen(s2),
                     &err);
     u_releaseDefaultConverter(cnv);
     if(U_FAILURE(err)) {
       *ucs1 = 0;
     }
   } else {
     *ucs1 = 0;
   }
   return ucs1;
 }

 /*
  returns the minimum of (the length of the null-terminated string) and n.
 */
 static int32_t u_ustrnlen(const UChar *ucs1, int32_t n)
 {
     int32_t len = 0;

     if (ucs1)
     {
         while (*(ucs1++) && n--)
         {
             len++;
         }
     }
     return len;
 }

 U_CAPI char*  U_EXPORT2
 u_austrncpy(char *s1,
         const UChar *ucs2,
         int32_t n)
 {
   char *target = s1;
   UErrorCode err = U_ZERO_ERROR;
   UConverter *cnv = u_getDefaultConverter(&err);
   if(U_SUCCESS(err) && cnv != NULL) {
     ucnv_reset(cnv);
     ucnv_fromUnicode(cnv,
                   &target,
                   s1+n,
                   &ucs2,
                   ucs2+u_ustrnlen(ucs2, n),
                   NULL,
                   TRUE,
                   &err);
     ucnv_reset(cnv); /* be good citizens */
     u_releaseDefaultConverter(cnv);
     if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
       *s1 = 0; /* failure */
     }
     if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
       *target = 0;  /* terminate */
     }
   } else {
     *s1 = 0;
   }
   return s1;
 }

 U_CAPI char*  U_EXPORT2
 u_austrcpy(char *s1,
          const UChar *ucs2 )
 {
   UErrorCode err = U_ZERO_ERROR;
   UConverter *cnv = u_getDefaultConverter(&err);
   if(U_SUCCESS(err) && cnv != NULL) {
     int32_t len = ucnv_fromUChars(cnv,
                   s1,
                   MAX_STRLEN,
                   ucs2,
                   -1,
                   &err);
     u_releaseDefaultConverter(cnv);
     s1[len] = 0;
   } else {
     *s1 = 0;
   }
   return s1;
 }

 /* mutexed access to a shared default converter ----------------------------- */

 UBool ustring_cleanup(void) {
     if (gDefaultConverter) {
         ucnv_close(gDefaultConverter);
         gDefaultConverter = NULL;
     }

     /* it's safe to close a 0 converter  */
     return TRUE;
 }

 U_CAPI UConverter* U_EXPORT2
 u_getDefaultConverter(UErrorCode *status)
 {
     UConverter *converter = NULL;

     if (gDefaultConverter != NULL) {
         umtx_lock(NULL);

         /* need to check to make sure it wasn't taken out from under us */
         if (gDefaultConverter != NULL) {
             converter = gDefaultConverter;
             gDefaultConverter = NULL;
         }
         umtx_unlock(NULL);
     }

     /* if the cache was empty, create a converter */
     if(converter == NULL) {
         converter = ucnv_open(NULL, status);
         if(U_FAILURE(*status)) {
             return NULL;
         }
     }

     return converter;
 }

 U_CAPI void U_EXPORT2
 u_releaseDefaultConverter(UConverter *converter)
 {
   if(gDefaultConverter == NULL) {
     if (converter != NULL) {
       ucnv_reset(converter);
     }
     umtx_lock(NULL);

     if(gDefaultConverter == NULL) {
       gDefaultConverter = converter;
       converter = NULL;
     }
     umtx_unlock(NULL);
   }

   if(converter != NULL) {
     ucnv_close(converter);
   }
 }

 /* u_unescape & support fns ------------------------------------------------- */

 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
 static const UChar UNESCAPE_MAP[] = {
     /*"   0x22, 0x22 */
     /*'   0x27, 0x27 */
     /*?   0x3F, 0x3F */
     /*\   0x5C, 0x5C */
     /*a*/ 0x61, 0x07,
     /*b*/ 0x62, 0x08,
     /*f*/ 0x66, 0x0c,
     /*n*/ 0x6E, 0x0a,
     /*r*/ 0x72, 0x0d,
     /*t*/ 0x74, 0x09,
     /*v*/ 0x76, 0x0b
 };
 enum { UNESCAPE_MAP_LENGTH = sizeof(UNESCAPE_MAP) / sizeof(UNESCAPE_MAP[0]) };

 /* Convert one octal digit to a numeric value 0..7, or -1 on failure */
 static int8_t _digit8(UChar c) {
     if (c >= 0x0030 && c <= 0x0037) {
         return (int8_t)(c - 0x0030);
     }
     return -1;
 }

 /* Convert one hex digit to a numeric value 0..F, or -1 on failure */
 static int8_t _digit16(UChar c) {
     if (c >= 0x0030 && c <= 0x0039) {
         return (int8_t)(c - 0x0030);
     }
     if (c >= 0x0041 && c <= 0x0046) {
         return (int8_t)(c - (0x0041 - 10));
     }
     if (c >= 0x0061 && c <= 0x0066) {
         return (int8_t)(c - (0x0061 - 10));
     }
     return -1;
 }

 /* Parse a single escape sequence.  Although this method deals in
  * UChars, it does not use C++ or UnicodeString.  This allows it to
  * be used from C contexts. */
 U_CAPI UChar32 U_EXPORT2
 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
              int32_t *offset,
              int32_t length,
              void *context) {

     int32_t start = *offset;
     UChar c;
     UChar32 result = 0;
     int8_t n = 0;
     int8_t minDig = 0;
     int8_t maxDig = 0;
     int8_t bitsPerDigit = 4;
     int8_t dig;
     int32_t i;

     /* Check that offset is in range */
     if (*offset < 0 || *offset >= length) {
         goto err;
     }

     /* Fetch first UChar after '\\' */
     c = charAt((*offset)++, context);

     /* Convert hexadecimal and octal escapes */
     switch (c) {
     case 0x0075 /*'u'*/:
         minDig = maxDig = 4;
         break;
     case 0x0055 /*'U'*/:
         minDig = maxDig = 8;
         break;
     case 0x0078 /*'x'*/:
         minDig = 1;
         maxDig = 2;
         break;
     default:
         dig = _digit8(c);
         if (dig >= 0) {
             minDig = 1;
             maxDig = 3;
             n = 1; /* Already have first octal digit */
             bitsPerDigit = 3;
             result = dig;
         }
         break;
     }
     if (minDig != 0) {
         while (*offset < length && n < maxDig) {
             c = charAt(*offset, context);
             dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
             if (dig < 0) {
                 break;
             }
             result = (result << bitsPerDigit) | dig;
             ++(*offset);
             ++n;
         }
         if (n < minDig) {
             goto err;
         }
         return result;
     }

     /* Convert C-style escapes in table */
     for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
         if (c == UNESCAPE_MAP[i]) {
             return UNESCAPE_MAP[i+1];
         } else if (c < UNESCAPE_MAP[i]) {
             break;
         }
     }

     /* If no special forms are recognized, then consider
      * the backslash to generically escape the next character.
      * Deal with surrogate pairs. */
     if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
         UChar c2 = charAt(*offset, context);
         if (UTF_IS_SECOND_SURROGATE(c2)) {
             ++(*offset);
             return UTF16_GET_PAIR_VALUE(c, c2);
         }
     }
     return c;

  err:
     /* Invalid escape sequence */
     *offset = start; /* Reset to initial value */
     return (UChar32)0xFFFFFFFF;
 }

 /* u_unescapeAt() callback to return a UChar from a char* */
 static UChar _charPtr_charAt(int32_t offset, void *context) {
     UChar c16;
     /* It would be more efficient to access the invariant tables
      * directly but there is no API for that. */
     u_charsToUChars(((char*) context) + offset, &c16, 1);
     return c16;
 }

 /* Append an escape-free segment of the text; used by u_unescape() */
 static void _appendUChars(UChar *dest, int32_t destCapacity,
                           const char *src, int32_t srcLen) {
     if (destCapacity < 0) {
         destCapacity = 0;
     }
     if (srcLen > destCapacity) {
         srcLen = destCapacity;
     }
     u_charsToUChars(src, dest, srcLen);
 }

 /* Do an invariant conversion of char* -> UChar*, with escape parsing */
 U_CAPI int32_t U_EXPORT2
 u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
     const char *segment = src;
     int32_t i = 0;
     char c;

     while ((c=*src) != 0) {
         /* '\\' intentionally written as compiler-specific
          * character constant to correspond to compiler-specific
          * char* constants. */
         if (c == '\\') {
             int32_t lenParsed = 0;
             UChar32 c32;
             if (src != segment) {
                 if (dest != NULL) {
                     _appendUChars(dest + i, destCapacity - i,
                                   segment, src - segment);
                 }
                 i += src - segment;
             }
             ++src; /* advance past '\\' */
             c32 = u_unescapeAt(_charPtr_charAt, &lenParsed, uprv_strlen(src), (void*)src);
             if (lenParsed == 0) {
                 goto err;
             }
             src += lenParsed; /* advance past escape seq. */
             if (dest != NULL && UTF_CHAR_LENGTH(c32) <= (destCapacity - i)) {
                 UTF_APPEND_CHAR_UNSAFE(dest, i, c32);
             } else {
                 i += UTF_CHAR_LENGTH(c32);
             }
             segment = src;
         } else {
             ++src;
         }
     }
     if (src != segment) {
         if (dest != NULL) {
             _appendUChars(dest + i, destCapacity - i,
                           segment, src - segment);
         }
         i += src - segment;
     }
     if (dest != NULL && i < destCapacity) {
         dest[i] = 0;
     }
     return i + 1; /* add 1 for zero term */

  err:
     if (dest != NULL && destCapacity > 0) {
         *dest = 0;
     }
     return 0;
 }

 /* C UGrowBuffer implementation --------------------------------------------- */

 U_CAPI UBool /* U_CALLCONV U_EXPORT2 */
 u_growBufferFromStatic(void *context,
                        UChar **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
                        int32_t length) {
     UChar *newBuffer=(UChar *)uprv_malloc(reqCapacity*U_SIZEOF_UCHAR);
     if(newBuffer!=NULL) {
         if(length>0) {
             uprv_memcpy(newBuffer, *pBuffer, length*U_SIZEOF_UCHAR);
         }
         *pCapacity=reqCapacity;
     } else {
         *pCapacity=0;
     }

     /* release the old pBuffer if it was not statically allocated */
     if(*pBuffer!=(UChar *)context) {
         uprv_free(*pBuffer);
     }

     *pBuffer=newBuffer;
     return (UBool)(newBuffer!=NULL);
 }

 /* NUL-termination of strings ----------------------------------------------- */

 /**
  * NUL-terminate a string no matter what its type.
  * Set warning and error codes accordingly.
  */
 #define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
     if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
         /* not a public function, so no complete argument checking */   \
                                                                         \
         if(length<0) {                                                  \
             /* assume that the caller handles this */                   \
         } else if(length<destCapacity) {                                \
             /* NUL-terminate the string, the NUL fits */                \
             dest[length]=0;                                             \
             /* unset the not-terminated warning but leave all others */ \
             if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
                 *pErrorCode=U_ZERO_ERROR;                               \
             }                                                           \
         } else if(length==destCapacity) {                               \
             /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
             *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
         } else /* length>destCapacity */ {                              \
             /* even the string itself did not fit - set an error code */ \
             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
         }                                                               \
     }

 U_CAPI int32_t U_EXPORT2
 u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
     return length;
 }

 U_CAPI int32_t U_EXPORT2
 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
     return length;
 }

 U_CAPI int32_t U_EXPORT2
 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
     return length;
 }

 U_CAPI int32_t U_EXPORT2
 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
     return length;
 }