| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 2001-2012, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * file name: ustr_wcs.cpp |
| * encoding: UTF-8 |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2004sep07 |
| * created by: Markus W. Scherer |
| * |
| * u_strToWCS() and u_strFromWCS() functions |
| * moved here from ustrtrns.c for better modularization. |
| */ |
| |
| #include "unicode/utypes.h" |
| #include "unicode/ustring.h" |
| #include "cstring.h" |
| #include "cwchar.h" |
| #include "cmemory.h" |
| #include "ustr_imp.h" |
| #include "ustr_cnv.h" |
| |
| #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION |
| |
| #define _STACK_BUFFER_CAPACITY 1000 |
| #define _BUFFER_CAPACITY_MULTIPLIER 2 |
| |
| #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) |
| // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. |
| // Then we could change this to work only with wchar_t buffers. |
| static inline UBool |
| u_growAnyBufferFromStatic(void *context, |
| void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, |
| int32_t length, int32_t size) { |
| // Use char* not void* to avoid the compiler's strict-aliasing assumptions |
| // and related warnings. |
| char *newBuffer=(char *)uprv_malloc(reqCapacity*size); |
| if(newBuffer!=NULL) { |
| if(length>0) { |
| uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); |
| } |
| *pCapacity=reqCapacity; |
| } else { |
| *pCapacity=0; |
| } |
| |
| /* release the old pBuffer if it was not statically allocated */ |
| if(*pBuffer!=(char *)context) { |
| uprv_free(*pBuffer); |
| } |
| |
| *pBuffer=newBuffer; |
| return (UBool)(newBuffer!=NULL); |
| } |
| |
| /* helper function */ |
| static wchar_t* |
| _strToWCS(wchar_t *dest, |
| int32_t destCapacity, |
| int32_t *pDestLength, |
| const UChar *src, |
| int32_t srcLength, |
| UErrorCode *pErrorCode){ |
| |
| char stackBuffer [_STACK_BUFFER_CAPACITY]; |
| char* tempBuf = stackBuffer; |
| int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; |
| char* tempBufLimit = stackBuffer + tempBufCapacity; |
| UConverter* conv = NULL; |
| char* saveBuf = tempBuf; |
| wchar_t* intTarget=NULL; |
| int32_t intTargetCapacity=0; |
| int count=0,retVal=0; |
| |
| const UChar *pSrcLimit =NULL; |
| const UChar *pSrc = src; |
| |
| conv = u_getDefaultConverter(pErrorCode); |
| |
| if(U_FAILURE(*pErrorCode)){ |
| return NULL; |
| } |
| |
| if(srcLength == -1){ |
| srcLength = u_strlen(pSrc); |
| } |
| |
| pSrcLimit = pSrc + srcLength; |
| |
| for(;;) { |
| /* reset the error state */ |
| *pErrorCode = U_ZERO_ERROR; |
| |
| /* convert to chars using default converter */ |
| ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); |
| count =(tempBuf - saveBuf); |
| |
| /* This should rarely occur */ |
| if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ |
| tempBuf = saveBuf; |
| |
| /* we dont have enough room on the stack grow the buffer */ |
| int32_t newCapacity = 2 * srcLength; |
| if(newCapacity <= tempBufCapacity) { |
| newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; |
| } |
| if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, |
| newCapacity, count, 1)) { |
| goto cleanup; |
| } |
| |
| saveBuf = tempBuf; |
| tempBufLimit = tempBuf + tempBufCapacity; |
| tempBuf = tempBuf + count; |
| |
| } else { |
| break; |
| } |
| } |
| |
| if(U_FAILURE(*pErrorCode)){ |
| goto cleanup; |
| } |
| |
| /* done with conversion null terminate the char buffer */ |
| if(count>=tempBufCapacity){ |
| tempBuf = saveBuf; |
| /* we dont have enough room on the stack grow the buffer */ |
| if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, |
| count+1, count, 1)) { |
| goto cleanup; |
| } |
| saveBuf = tempBuf; |
| } |
| |
| saveBuf[count]=0; |
| |
| |
| /* allocate more space than required |
| * here we assume that every char requires |
| * no more than 2 wchar_ts |
| */ |
| intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; |
| intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); |
| |
| if(intTarget){ |
| |
| int32_t nulLen = 0; |
| int32_t remaining = intTargetCapacity; |
| wchar_t* pIntTarget=intTarget; |
| tempBuf = saveBuf; |
| |
| /* now convert the mbs to wcs */ |
| for(;;){ |
| |
| /* we can call the system API since we are sure that |
| * there is atleast 1 null in the input |
| */ |
| retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); |
| |
| if(retVal==-1){ |
| *pErrorCode = U_INVALID_CHAR_FOUND; |
| break; |
| }else if(retVal== remaining){/* should never occur */ |
| int numWritten = (pIntTarget-intTarget); |
| u_growAnyBufferFromStatic(NULL,(void**) &intTarget, |
| &intTargetCapacity, |
| intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, |
| numWritten, |
| sizeof(wchar_t)); |
| pIntTarget = intTarget; |
| remaining=intTargetCapacity; |
| |
| if(nulLen!=count){ /*there are embedded nulls*/ |
| pIntTarget+=numWritten; |
| remaining-=numWritten; |
| } |
| |
| }else{ |
| int32_t nulVal; |
| /*scan for nulls */ |
| /* we donot check for limit since tempBuf is null terminated */ |
| while(tempBuf[nulLen++] != 0){ |
| } |
| nulVal = (nulLen < srcLength) ? 1 : 0; |
| pIntTarget = pIntTarget + retVal+nulVal; |
| remaining -=(retVal+nulVal); |
| |
| /* check if we have reached the source limit*/ |
| if(nulLen>=(count)){ |
| break; |
| } |
| } |
| } |
| count = (int32_t)(pIntTarget-intTarget); |
| |
| if(0 < count && count <= destCapacity){ |
| uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); |
| } |
| |
| if(pDestLength){ |
| *pDestLength = count; |
| } |
| |
| /* free the allocated memory */ |
| uprv_free(intTarget); |
| |
| }else{ |
| *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
| } |
| cleanup: |
| /* are we still using stack buffer */ |
| if(stackBuffer != saveBuf){ |
| uprv_free(saveBuf); |
| } |
| u_terminateWChars(dest,destCapacity,count,pErrorCode); |
| |
| u_releaseDefaultConverter(conv); |
| |
| return dest; |
| } |
| #endif |
| |
| U_CAPI wchar_t* U_EXPORT2 |
| u_strToWCS(wchar_t *dest, |
| int32_t destCapacity, |
| int32_t *pDestLength, |
| const UChar *src, |
| int32_t srcLength, |
| UErrorCode *pErrorCode){ |
| |
| /* args check */ |
| if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
| return NULL; |
| } |
| |
| if( (src==NULL && srcLength!=0) || srcLength < -1 || |
| (destCapacity<0) || (dest == NULL && destCapacity > 0) |
| ) { |
| *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| |
| #ifdef U_WCHAR_IS_UTF16 |
| /* wchar_t is UTF-16 just do a memcpy */ |
| if(srcLength == -1){ |
| srcLength = u_strlen(src); |
| } |
| if(0 < srcLength && srcLength <= destCapacity){ |
| u_memcpy((UChar *)dest, src, srcLength); |
| } |
| if(pDestLength){ |
| *pDestLength = srcLength; |
| } |
| |
| u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode); |
| |
| return dest; |
| |
| #elif defined U_WCHAR_IS_UTF32 |
| |
| return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, |
| src, srcLength, pErrorCode); |
| |
| #else |
| |
| return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); |
| |
| #endif |
| |
| } |
| |
| #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) |
| /* helper function */ |
| static UChar* |
| _strFromWCS( UChar *dest, |
| int32_t destCapacity, |
| int32_t *pDestLength, |
| const wchar_t *src, |
| int32_t srcLength, |
| UErrorCode *pErrorCode) |
| { |
| int32_t retVal =0, count =0 ; |
| UConverter* conv = NULL; |
| UChar* pTarget = NULL; |
| UChar* pTargetLimit = NULL; |
| UChar* target = NULL; |
| |
| UChar uStack [_STACK_BUFFER_CAPACITY]; |
| |
| wchar_t wStack[_STACK_BUFFER_CAPACITY]; |
| wchar_t* pWStack = wStack; |
| |
| |
| char cStack[_STACK_BUFFER_CAPACITY]; |
| int32_t cStackCap = _STACK_BUFFER_CAPACITY; |
| char* pCSrc=cStack; |
| char* pCSave=pCSrc; |
| char* pCSrcLimit=NULL; |
| |
| const wchar_t* pSrc = src; |
| const wchar_t* pSrcLimit = NULL; |
| |
| if(srcLength ==-1){ |
| /* if the wchar_t source is null terminated we can safely |
| * assume that there are no embedded nulls, this is a fast |
| * path for null terminated strings. |
| */ |
| for(;;){ |
| /* convert wchars to chars */ |
| retVal = uprv_wcstombs(pCSrc,src, cStackCap); |
| |
| if(retVal == -1){ |
| *pErrorCode = U_ILLEGAL_CHAR_FOUND; |
| goto cleanup; |
| }else if(retVal >= (cStackCap-1)){ |
| /* Should rarely occur */ |
| u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
| cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); |
| pCSave = pCSrc; |
| }else{ |
| /* converted every thing */ |
| pCSrc = pCSrc+retVal; |
| break; |
| } |
| } |
| |
| }else{ |
| /* here the source is not null terminated |
| * so it may have nulls embeded and we need to |
| * do some extra processing |
| */ |
| int32_t remaining =cStackCap; |
| |
| pSrcLimit = src + srcLength; |
| |
| for(;;){ |
| int32_t nulLen = 0; |
| |
| /* find nulls in the string */ |
| while(nulLen<srcLength && pSrc[nulLen++]!=0){ |
| } |
| |
| if((pSrc+nulLen) < pSrcLimit){ |
| /* check if we have enough room in pCSrc */ |
| if(remaining < (nulLen * MB_CUR_MAX)){ |
| /* should rarely occur */ |
| int32_t len = (pCSrc-pCSave); |
| pCSrc = pCSave; |
| /* we do not have enough room so grow the buffer*/ |
| u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
| _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); |
| |
| pCSave = pCSrc; |
| pCSrc = pCSave+len; |
| remaining = cStackCap-(pCSrc - pCSave); |
| } |
| |
| /* we have found a null so convert the |
| * chunk from beginning of non-null char to null |
| */ |
| retVal = uprv_wcstombs(pCSrc,pSrc,remaining); |
| |
| if(retVal==-1){ |
| /* an error occurred bail out */ |
| *pErrorCode = U_ILLEGAL_CHAR_FOUND; |
| goto cleanup; |
| } |
| |
| pCSrc += retVal+1 /* already null terminated */; |
| |
| pSrc += nulLen; /* skip past the null */ |
| srcLength-=nulLen; /* decrement the srcLength */ |
| remaining -= (pCSrc-pCSave); |
| |
| |
| }else{ |
| /* the source is not null terminated and we are |
| * end of source so we copy the source to a temp buffer |
| * null terminate it and convert wchar_ts to chars |
| */ |
| if(nulLen >= _STACK_BUFFER_CAPACITY){ |
| /* Should rarely occur */ |
| /* allocate new buffer buffer */ |
| pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); |
| if(pWStack==NULL){ |
| *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
| goto cleanup; |
| } |
| } |
| if(nulLen>0){ |
| /* copy the contents to tempStack */ |
| uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); |
| } |
| |
| /* null terminate the tempBuffer */ |
| pWStack[nulLen] =0 ; |
| |
| if(remaining < (nulLen * MB_CUR_MAX)){ |
| /* Should rarely occur */ |
| int32_t len = (pCSrc-pCSave); |
| pCSrc = pCSave; |
| /* we do not have enough room so grow the buffer*/ |
| u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
| cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); |
| |
| pCSave = pCSrc; |
| pCSrc = pCSave+len; |
| remaining = cStackCap-(pCSrc - pCSave); |
| } |
| /* convert to chars */ |
| retVal = uprv_wcstombs(pCSrc,pWStack,remaining); |
| |
| pCSrc += retVal; |
| pSrc += nulLen; |
| srcLength-=nulLen; /* decrement the srcLength */ |
| break; |
| } |
| } |
| } |
| |
| /* OK..now we have converted from wchar_ts to chars now |
| * convert chars to UChars |
| */ |
| pCSrcLimit = pCSrc; |
| pCSrc = pCSave; |
| pTarget = target= dest; |
| pTargetLimit = dest + destCapacity; |
| |
| conv= u_getDefaultConverter(pErrorCode); |
| |
| if(U_FAILURE(*pErrorCode)|| conv==NULL){ |
| goto cleanup; |
| } |
| |
| for(;;) { |
| |
| *pErrorCode = U_ZERO_ERROR; |
| |
| /* convert to stack buffer*/ |
| ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); |
| |
| /* increment count to number written to stack */ |
| count+= pTarget - target; |
| |
| if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ |
| target = uStack; |
| pTarget = uStack; |
| pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; |
| } else { |
| break; |
| } |
| |
| } |
| |
| if(pDestLength){ |
| *pDestLength =count; |
| } |
| |
| u_terminateUChars(dest,destCapacity,count,pErrorCode); |
| |
| cleanup: |
| |
| if(cStack != pCSave){ |
| uprv_free(pCSave); |
| } |
| |
| if(wStack != pWStack){ |
| uprv_free(pWStack); |
| } |
| |
| u_releaseDefaultConverter(conv); |
| |
| return dest; |
| } |
| #endif |
| |
| U_CAPI UChar* U_EXPORT2 |
| u_strFromWCS(UChar *dest, |
| int32_t destCapacity, |
| int32_t *pDestLength, |
| const wchar_t *src, |
| int32_t srcLength, |
| UErrorCode *pErrorCode) |
| { |
| |
| /* args check */ |
| if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
| return NULL; |
| } |
| |
| if( (src==NULL && srcLength!=0) || srcLength < -1 || |
| (destCapacity<0) || (dest == NULL && destCapacity > 0) |
| ) { |
| *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| |
| #ifdef U_WCHAR_IS_UTF16 |
| /* wchar_t is UTF-16 just do a memcpy */ |
| if(srcLength == -1){ |
| srcLength = u_strlen((const UChar *)src); |
| } |
| if(0 < srcLength && srcLength <= destCapacity){ |
| u_memcpy(dest, (const UChar *)src, srcLength); |
| } |
| if(pDestLength){ |
| *pDestLength = srcLength; |
| } |
| |
| u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); |
| |
| return dest; |
| |
| #elif defined U_WCHAR_IS_UTF32 |
| |
| return u_strFromUTF32(dest, destCapacity, pDestLength, |
| (UChar32*)src, srcLength, pErrorCode); |
| |
| #else |
| |
| return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); |
| |
| #endif |
| |
| } |
| |
| #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */ |