| /* |
| ****************************************************************************** |
| * |
| * Copyright (C) 2001-2004, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ****************************************************************************** |
| * |
| * File ustrtrns.c |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 9/10/2001 Ram Creation. |
| ****************************************************************************** |
| */ |
| |
| /******************************************************************************* |
| * |
| * u_strTo* and u_strFrom* APIs |
| * WCS functions moved to ustr_wcs.c for better modularization |
| * |
| ******************************************************************************* |
| */ |
| |
| |
| #include "unicode/putil.h" |
| #include "unicode/ustring.h" |
| #include "cstring.h" |
| #include "cmemory.h" |
| #include "ustr_imp.h" |
| |
| U_CAPI UChar* U_EXPORT2 |
| u_strFromUTF32(UChar *dest, |
| int32_t destCapacity, |
| int32_t *pDestLength, |
| const UChar32 *src, |
| int32_t srcLength, |
| UErrorCode *pErrorCode) |
| { |
| int32_t reqLength = 0; |
| uint32_t ch =0; |
| UChar *pDestLimit =dest+destCapacity; |
| UChar *pDest = dest; |
| const uint32_t *pSrc = (const uint32_t *)src; |
| |
| /* args check */ |
| if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
| return NULL; |
| } |
| |
| if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
| *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| |
| /* Check if the source is null terminated */ |
| if(srcLength == -1 ){ |
| while(((ch=*pSrc)!=0) && (pDest < pDestLimit)){ |
| ++pSrc; |
| if(ch<=0xFFFF){ |
| *(pDest++)=(UChar)ch; |
| }else if(ch<=0x10ffff){ |
| *(pDest++)=UTF16_LEAD(ch); |
| if(pDest<pDestLimit){ |
| *(pDest++)=UTF16_TRAIL(ch); |
| }else{ |
| reqLength++; |
| break; |
| } |
| }else{ |
| *pErrorCode = U_INVALID_CHAR_FOUND; |
| return NULL; |
| } |
| } |
| while((ch=*pSrc++) != 0){ |
| reqLength+=UTF_CHAR_LENGTH(ch); |
| } |
| }else{ |
| const uint32_t* pSrcLimit = ((const uint32_t*)pSrc) + srcLength; |
| while((pSrc < pSrcLimit) && (pDest < pDestLimit)){ |
| ch = *pSrc++; |
| if(ch<=0xFFFF){ |
| *(pDest++)=(UChar)ch; |
| }else if(ch<=0x10FFFF){ |
| *(pDest++)=UTF16_LEAD(ch); |
| if(pDest<pDestLimit){ |
| *(pDest++)=UTF16_TRAIL(ch); |
| }else{ |
| reqLength++; |
| break; |
| } |
| }else{ |
| *pErrorCode = U_INVALID_CHAR_FOUND; |
| return NULL; |
| } |
| } |
| while(pSrc <pSrcLimit){ |
| ch = *pSrc++; |
| reqLength+=UTF_CHAR_LENGTH(ch); |
| } |
| } |
| |
| reqLength += (int32_t)(pDest - dest); |
| if(pDestLength){ |
| *pDestLength = reqLength; |
| } |
| |
| /* Terminate the buffer */ |
| u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); |
| |
| return dest; |
| } |
| |
| |
| U_CAPI UChar32* U_EXPORT2 |
| u_strToUTF32(UChar32 *dest, |
| int32_t destCapacity, |
| int32_t *pDestLength, |
| const UChar *src, |
| int32_t srcLength, |
| UErrorCode *pErrorCode) |
| { |
| const UChar* pSrc = src; |
| const UChar* pSrcLimit; |
| int32_t reqLength=0; |
| uint32_t ch=0; |
| uint32_t *pDest = (uint32_t *)dest; |
| uint32_t *pDestLimit = pDest + destCapacity; |
| UChar ch2=0; |
| |
| /* args check */ |
| if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
| return NULL; |
| } |
| |
| |
| if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
| *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| |
| if(srcLength==-1) { |
| while((ch=*pSrc)!=0 && pDest!=pDestLimit) { |
| ++pSrc; |
| /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/ |
| if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { |
| ++pSrc; |
| ch=UTF16_GET_PAIR_VALUE(ch, ch2); |
| } |
| *(pDest++)= ch; |
| } |
| while((ch=*pSrc++)!=0) { |
| if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { |
| ++pSrc; |
| } |
| ++reqLength; |
| } |
| } else { |
| pSrcLimit = pSrc+srcLength; |
| while(pSrc<pSrcLimit && pDest<pDestLimit) { |
| ch=*pSrc++; |
| if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { |
| ++pSrc; |
| ch=UTF16_GET_PAIR_VALUE(ch, ch2); |
| } |
| *(pDest++)= ch; |
| } |
| while(pSrc!=pSrcLimit) { |
| ch=*pSrc++; |
| if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { |
| ++pSrc; |
| } |
| ++reqLength; |
| } |
| } |
| |
| reqLength+=(int32_t)(pDest - (uint32_t *)dest); |
| if(pDestLength){ |
| *pDestLength = reqLength; |
| } |
| |
| /* Terminate the buffer */ |
| u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode); |
| |
| return dest; |
| } |
| |
| U_CAPI UChar* U_EXPORT2 |
| u_strFromUTF8(UChar *dest, |
| int32_t destCapacity, |
| int32_t *pDestLength, |
| const char* src, |
| int32_t srcLength, |
| UErrorCode *pErrorCode){ |
| |
| UChar *pDest = dest; |
| UChar *pDestLimit = dest+destCapacity; |
| UChar32 ch=0; |
| int32_t index = 0; |
| int32_t reqLength = 0; |
| uint8_t* pSrc = (uint8_t*) src; |
| |
| /* args check */ |
| if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
| return NULL; |
| } |
| |
| if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
| *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| |
| if(srcLength == -1){ |
| srcLength = (int32_t)uprv_strlen((char*)pSrc); |
| } |
| |
| while((index < srcLength)&&(pDest<pDestLimit)){ |
| ch = pSrc[index++]; |
| if(ch <=0x7f){ |
| *pDest++=(UChar)ch; |
| }else{ |
| ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1); |
| if(ch<0){ |
| *pErrorCode = U_INVALID_CHAR_FOUND; |
| return NULL; |
| }else if(ch<=0xFFFF){ |
| *(pDest++)=(UChar)ch; |
| }else{ |
| *(pDest++)=UTF16_LEAD(ch); |
| if(pDest<pDestLimit){ |
| *(pDest++)=UTF16_TRAIL(ch); |
| }else{ |
| reqLength++; |
| break; |
| } |
| } |
| } |
| } |
| /* donot fill the dest buffer just count the UChars needed */ |
| while(index < srcLength){ |
| ch = pSrc[index++]; |
| if(ch <= 0x7f){ |
| reqLength++; |
| }else{ |
| ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1); |
| if(ch<0){ |
| *pErrorCode = U_INVALID_CHAR_FOUND; |
| return NULL; |
| } |
| reqLength+=UTF_CHAR_LENGTH(ch); |
| } |
| } |
| |
| reqLength+=(int32_t)(pDest - dest); |
| |
| if(pDestLength){ |
| *pDestLength = reqLength; |
| } |
| |
| /* Terminate the buffer */ |
| u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); |
| |
| return dest; |
| } |
| |
| static U_INLINE uint8_t * |
| _appendUTF8(uint8_t *pDest, UChar32 c) { |
| /* c<=0x7f is handled by the caller, here it is 0x80<=c<=0x10ffff */ |
| if((c)<=0x7ff) { |
| *pDest++=(uint8_t)((c>>6)|0xc0); |
| *pDest++=(uint8_t)((c&0x3f)|0x80); |
| } else if((uint32_t)(c)<=0xffff) { |
| *pDest++=(uint8_t)((c>>12)|0xe0); |
| *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80); |
| *pDest++=(uint8_t)(((c)&0x3f)|0x80); |
| } else /* if((uint32_t)(c)<=0x10ffff) */ { |
| *pDest++=(uint8_t)(((c)>>18)|0xf0); |
| *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80); |
| *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80); |
| *pDest++=(uint8_t)(((c)&0x3f)|0x80); |
| } |
| return pDest; |
| } |
| |
| |
| U_CAPI char* U_EXPORT2 |
| u_strToUTF8(char *dest, |
| int32_t destCapacity, |
| int32_t *pDestLength, |
| const UChar *pSrc, |
| int32_t srcLength, |
| UErrorCode *pErrorCode){ |
| |
| int32_t reqLength=0; |
| const UChar *pSrcLimit; |
| uint32_t ch=0,ch2=0; |
| uint8_t *pDest = (uint8_t *)dest; |
| uint8_t *pDestLimit = pDest + destCapacity; |
| |
| |
| /* args check */ |
| if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
| return NULL; |
| } |
| |
| if((pSrc==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
| *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| |
| if(srcLength==-1) { |
| while((ch=*pSrc)!=0 && pDest!=pDestLimit) { |
| ++pSrc; |
| if(ch <= 0x7f) { |
| *pDest++ = (char)ch; |
| ++reqLength; |
| continue; |
| } |
| |
| /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/ |
| if(UTF_IS_SURROGATE(ch)) { |
| if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { |
| ++pSrc; |
| ch=UTF16_GET_PAIR_VALUE(ch, ch2); |
| } else { |
| /* Unicode 3.2 forbids surrogate code points in UTF-8 */ |
| *pErrorCode = U_INVALID_CHAR_FOUND; |
| return NULL; |
| } |
| } |
| reqLength += UTF8_CHAR_LENGTH(ch); |
| /* do we have enough room in destination? */ |
| if(destCapacity< reqLength){ |
| break; |
| } |
| /* convert and append*/ |
| pDest=_appendUTF8(pDest, ch); |
| } |
| while((ch=*pSrc++)!=0) { |
| if(ch<=0x7f) { |
| ++reqLength; |
| } else if(ch<=0x7ff) { |
| reqLength+=2; |
| } else if(!UTF_IS_SURROGATE(ch)) { |
| reqLength+=3; |
| } else if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { |
| ++pSrc; |
| reqLength+=4; |
| } else { |
| /* Unicode 3.2 forbids surrogate code points in UTF-8 */ |
| *pErrorCode = U_INVALID_CHAR_FOUND; |
| return NULL; |
| } |
| } |
| } else { |
| pSrcLimit = pSrc+srcLength; |
| while(pSrc<pSrcLimit && pDest<pDestLimit) { |
| ch=*pSrc++; |
| if(ch <= 0x7f) { |
| *pDest++ = (char)ch; |
| ++reqLength; |
| continue; |
| } |
| |
| if(UTF_IS_SURROGATE(ch)) { |
| if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { |
| ++pSrc; |
| ch=UTF16_GET_PAIR_VALUE(ch, ch2); |
| } else { |
| /* Unicode 3.2 forbids surrogate code points in UTF-8 */ |
| *pErrorCode = U_INVALID_CHAR_FOUND; |
| return NULL; |
| } |
| } |
| reqLength += UTF8_CHAR_LENGTH(ch); |
| /* do we have enough room in destination? */ |
| if(destCapacity< reqLength){ |
| break; |
| } |
| /* convert and append*/ |
| pDest=_appendUTF8(pDest, ch); |
| } |
| while(pSrc<pSrcLimit) { |
| ch=*pSrc++; |
| if(ch<=0x7f) { |
| ++reqLength; |
| } else if(ch<=0x7ff) { |
| reqLength+=2; |
| } else if(!UTF_IS_SURROGATE(ch)) { |
| reqLength+=3; |
| } else if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { |
| ++pSrc; |
| reqLength+=4; |
| } else { |
| /* Unicode 3.2 forbids surrogate code points in UTF-8 */ |
| *pErrorCode = U_INVALID_CHAR_FOUND; |
| return NULL; |
| } |
| } |
| } |
| |
| if(pDestLength){ |
| *pDestLength = reqLength; |
| } |
| |
| /* Terminate the buffer */ |
| u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode); |
| |
| return (char*)dest; |
| } |