/* ****************************************************************************** * * Copyright (C) 2001-2003, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * * File ustrtrns.c * * Modification History: * * Date Name Description * 9/10/2001 Ram Creation. ****************************************************************************** */ /******************************************************************************* * * u_strTo* and u_strFrom* APIs * ******************************************************************************* */ #include "unicode/putil.h" #include "unicode/ucnv.h" #include "unicode/ustring.h" #include "cstring.h" #include "cwchar.h" #include "cmemory.h" #include "ustr_imp.h" U_CAPI UBool /* U_CALLCONV U_EXPORT2 */ u_growAnyBufferFromStatic(void *context, void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, int32_t length, int32_t size); U_CAPI UBool /* U_CALLCONV U_EXPORT2 */ u_growAnyBufferFromStatic(void *context, void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, int32_t length, int32_t size) { void *newBuffer=uprv_malloc(reqCapacity*size); if(newBuffer!=NULL) { if(length>0) { uprv_memcpy(newBuffer, *pBuffer, length*size); } *pCapacity=reqCapacity; } else { *pCapacity=0; } /* release the old pBuffer if it was not statically allocated */ if(*pBuffer!=(void *)context) { uprv_free(*pBuffer); } *pBuffer=newBuffer; return (UBool)(newBuffer!=NULL); } #define _STACK_BUFFER_CAPACITY 1000 U_CAPI UChar* U_EXPORT2 u_strFromUTF32(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const UChar32 *src, int32_t srcLength, UErrorCode *pErrorCode) { int32_t reqLength = 0; uint32_t ch =0; UChar *pDestLimit =dest+destCapacity; UChar *pDest = dest; const uint32_t *pSrc = (const uint32_t *)src; /* args check */ if(pErrorCode && U_FAILURE(*pErrorCode)){ return NULL; } if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } /* Check if the source is null terminated */ if(srcLength == -1 ){ while((ch=*pSrc)!=0){ if(pDest < pDestLimit){ ++pSrc; if(ch<=0xFFFF){ *(pDest++)=(UChar)ch; }else{ *(pDest++)=(uint16_t)((ch>>10)+0xd7c0); /*UTF_FIRST_SURROGATE(ch);*/ if(pDest>10)+0xd7c0); /*UTF_FIRST_SURROGATE(ch);*/ if(pDest 0)){ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } if(srcLength == -1){ srcLength = u_strlen(pSrc); } while(index < srcLength){ if(pDest 0)){ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } if(srcLength == -1){ srcLength = uprv_strlen((char*)pSrc); } while(index < srcLength){ if(pDest >10)+0xd7c0); /*UTF_FIRST_SURROGATE(ch);*/ if(pDest 0)){ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } if(srcLength == -1){ while(*pSrc!=0){ if(reqLength < destCapacity){ ch = *pSrc++; if(ch<=0x7f) { dest[reqLength++]=(uint8_t)ch; } else { int num = utf8_appendCharSafeBody((uint8_t*)dest, reqLength, destCapacity, ch); if(num==reqLength){ *pErrorCode =U_BUFFER_OVERFLOW_ERROR; break; } reqLength = num; } }else{ *pErrorCode = U_BUFFER_OVERFLOW_ERROR; break; } } while(*pSrc!=0){ ch = *pSrc++; reqLength+=UTF8_CHAR_LENGTH(ch); } }else{ pSrcLimit = pSrc + srcLength; while(pSrc < pSrcLimit){ if(reqLength < destCapacity){ ch = *pSrc++; if(ch<=0x7f) { dest[reqLength++]=(uint8_t)ch; } else { int num = utf8_appendCharSafeBody((uint8_t*)dest, reqLength, destCapacity, ch); if(num==reqLength){ *pErrorCode =U_BUFFER_OVERFLOW_ERROR; break; } reqLength = num; } }else{ *pErrorCode = U_BUFFER_OVERFLOW_ERROR; break; } } while(pSrc < pSrcLimit){ ch = *pSrc++; reqLength+=UTF8_CHAR_LENGTH(ch); } } if(destCapacity==0){ reqLength+=destCapacity; } if(pDestLength){ *pDestLength = reqLength; } /* Terminate the buffer */ u_terminateChars((char*)dest,destCapacity,reqLength,pErrorCode); return (char*)dest; } /* helper function */ static wchar_t* _strToWCS(wchar_t *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode){ char stackBuffer [_STACK_BUFFER_CAPACITY]; char* tempBuf = stackBuffer; int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; char* tempBufLimit = stackBuffer + tempBufCapacity; UConverter* conv = NULL; char* saveBuf = tempBuf; wchar_t* intTarget=NULL; int count=0,retVal=0; const UChar *pSrcLimit =NULL; const UChar *pSrc = src; pSrcLimit = pSrc + srcLength; conv = u_getDefaultConverter(pErrorCode); do{ /* convert to chars using default converter */ ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,FALSE,pErrorCode); /* This should rarely occur */ if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ int32_t bufCount = tempBuf - saveBuf; *pErrorCode=U_ZERO_ERROR; tempBuf = saveBuf; /* we dont have enough room on the stack grow the buffer */ u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, (tempBufCapacity+_STACK_BUFFER_CAPACITY), bufCount,sizeof(char)); saveBuf = tempBuf; tempBufLimit = tempBuf + tempBufCapacity; tempBuf = tempBuf + bufCount; /* to flush the converters internal state when pSrc=(count)){ break; } pIntTarget = pIntTarget + retVal+1/* for terminating null*/; } if(count < destCapacity){ uprv_memcpy(dest,intTarget,count*sizeof(wchar_t)); }else{ *pErrorCode = U_BUFFER_OVERFLOW_ERROR; } if(pDestLength){ *pDestLength = count; } /* free the allocated memory */ uprv_free(intTarget); }else{ *pErrorCode = U_MEMORY_ALLOCATION_ERROR; } /* are we still using stack buffer */ if(stackBuffer != saveBuf){ uprv_free(saveBuf); } u_terminateWChars(dest,destCapacity,count,pErrorCode); u_releaseDefaultConverter(conv); return dest; } U_CAPI wchar_t* U_EXPORT2 u_strToWCS(wchar_t *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode){ const UChar *pSrc = src; /* args check */ if(pErrorCode && U_FAILURE(*pErrorCode)){ return NULL; } if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } if(srcLength == -1){ srcLength = u_strlen(pSrc); } #ifdef U_WCHAR_IS_UTF16 /* wchar_t is UTF-16 just do a memcpy */ if(srcLength==-1){ srcLength =0; while(pSrc[srcLength++]!=0){ } } if(srcLength <= destCapacity){ uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); }else{ *pErrorCode = U_BUFFER_OVERFLOW_ERROR; } if(pDestLength){ *pDestLength = srcLength; } u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); return dest;; #elif defined U_WCHAR_IS_UTF32 return u_strToUTF32(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); #else return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); #endif } /* helper function */ static UChar* _strFromWCS( UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode){ int32_t retVal =0, count =0 ; UConverter* conv = NULL; UChar* pTarget = NULL; UChar* pTargetLimit = NULL; UChar uStack [_STACK_BUFFER_CAPACITY]; wchar_t wStack[_STACK_BUFFER_CAPACITY]; wchar_t* pWStack = wStack; char cStack[_STACK_BUFFER_CAPACITY]; int32_t cStackCap = _STACK_BUFFER_CAPACITY; char* pCSrc=cStack; char* pCSave=pCSrc; char* pCSrcLimit=NULL; const wchar_t* pSrc = src; const wchar_t* pSrcLimit = NULL; if(srcLength ==-1){ /* if the wchar_t source is null terminated we can safely * assume that there are no embedded nulls, this is a fast * path for null terminated strings. */ for(;;){ /* convert wchars to chars */ retVal = uprv_wcstombs(pCSrc,src, cStackCap); if(retVal == -1){ *pErrorCode = U_ILLEGAL_CHAR_FOUND; goto cleanup; }else if(retVal == cStackCap){ /* Should rarely occur */ u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, cStackCap*2,0,sizeof(char)); pCSave = pCSrc; }else{ /* converted every thing */ pCSrc = pCSrc+retVal; break; } } }else{ /* here the source is not null terminated * so it may have nulls embeded and we need to * do some extra processing */ int32_t remaining =cStackCap; pSrcLimit = src + srcLength; for(;;){ int32_t nulLen = 0; /* find nulls in the string */ while((pSrc+nulLen) _STACK_BUFFER_CAPACITY){ /* Should rarely occcur */ /* allocate new buffer buffer */ pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * nulLen); } /* copy the contents to tempStack */ uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t)); /* null terminate the tempBuffer */ pWStack[nulLen] =0 ; if(remaining < (nulLen * MB_CUR_MAX)){ /* Should rarely occur */ int32_t len = (pCSrc-pCSave); pCSrc = pCSave; /* we do not have enough room so grow the buffer*/ u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); pCSave = pCSrc; pCSrc = pCSave+len; remaining = cStackCap-(pCSrc - pCSave); } /* convert to chars */ retVal = uprv_wcstombs(pCSrc,pWStack,remaining); pCSrc = pCSrc+retVal +1; pSrc = pSrc + nulLen; } /* ran out of input, break */ if(pSrc >= pSrcLimit){ break; } } } /* OK..now we have converted from wchar_ts to chars now * convert chars to UChars */ pCSrcLimit = pCSrc; pCSrc = pCSave; pTarget = dest; pTargetLimit = dest + destCapacity; conv= u_getDefaultConverter(pErrorCode); /* convert and write to the target */ ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,FALSE,pErrorCode); /* count the number converted */ count=pTarget - dest; while(*pErrorCode ==U_BUFFER_OVERFLOW_ERROR){ *pErrorCode = U_ZERO_ERROR; pTarget = uStack; pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; /* convert to stack buffer*/ ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,FALSE,pErrorCode); /* increment count to number written to stack */ count+= pTarget - uStack; } if(pDestLength){ *pDestLength =count; } u_terminateUChars(dest,destCapacity,count,pErrorCode); cleanup: if(cStack != pCSave){ uprv_free(pCSave); } if(wStack != pWStack){ uprv_free(pWStack); } u_releaseDefaultConverter(conv); return dest; } U_CAPI UChar* U_EXPORT2 u_strFromWCS(UChar *dest, int32_t destCapacity, int32_t *pDestLength, const wchar_t *src, int32_t srcLength, UErrorCode *pErrorCode) { /* args check */ if(pErrorCode && U_FAILURE(*pErrorCode)){ return NULL; } if((srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } #ifdef U_WCHAR_IS_UTF16 { const wchar_t* pSrc = src; /* wchar_t is UTF-16 just do a memcpy */ if(srcLength==-1){ srcLength =0; while(pSrc[srcLength++]!=0){ } } if(srcLength <= destCapacity){ uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); }else{ *pErrorCode = U_BUFFER_OVERFLOW_ERROR; } if(pDestLength){ *pDestLength = srcLength; } } u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); return dest; #elif defined U_WCHAR_IS_UTF32 return u_strFromUTF32(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); #else return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); #endif }