| /* |
| ****************************************************************************** |
| * |
| * Copyright (C) 1998-2003, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ****************************************************************************** |
| * |
| * File ustdio.c |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 11/18/98 stephen Creation. |
| * 03/12/99 stephen Modified for new C API. |
| * 07/19/99 stephen Fixed read() and gets() |
| ****************************************************************************** |
| */ |
| |
| #include "unicode/ustdio.h" |
| #include "unicode/putil.h" |
| #include "cmemory.h" |
| #include "ufile.h" |
| #include "ufmt_cmn.h" |
| #include "unicode/ucnv.h" |
| #include "unicode/ustring.h" |
| |
| #include <string.h> |
| |
| #define DELIM_LF 0x000A |
| #define DELIM_VT 0x000B |
| #define DELIM_FF 0x000C |
| #define DELIM_CR 0x000D |
| #define DELIM_NEL 0x0085 |
| #define DELIM_LS 0x2028 |
| #define DELIM_PS 0x2029 |
| |
| /* Leave this copyright notice here! */ |
| static const char copyright[] = U_COPYRIGHT_STRING; |
| |
| /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ |
| #ifdef WIN32 |
| static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; |
| static const uint32_t DELIMITERS_LEN = 2; |
| #elif (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) |
| static const UChar DELIMITERS [] = { DELIM_NEL, 0x0000 }; |
| static const uint32_t DELIMITERS_LEN = 1; |
| #else |
| static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; |
| static const uint32_t DELIMITERS_LEN = 1; |
| #endif |
| |
| #define IS_FIRST_STRING_DELIMITER(c1) \ |
| (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ |
| || (c1) == DELIM_NEL \ |
| || (c1) == DELIM_LS \ |
| || (c1) == DELIM_PS) |
| #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) |
| #define IS_COMBINED_STRING_DELIMITER(c1, c2) \ |
| (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) |
| |
| |
| #if !UCONFIG_NO_TRANSLITERATION |
| |
| U_CAPI UTransliterator* U_EXPORT2 |
| u_fsettransliterator(UFILE *file, UFileDirection direction, |
| UTransliterator *adopt, UErrorCode *status) |
| { |
| UTransliterator *old = NULL; |
| |
| if(file==NULL || U_FAILURE(*status)) |
| { |
| return adopt; |
| } |
| |
| if(!file) |
| { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| return adopt; |
| } |
| |
| if(direction & U_READ) |
| { |
| /** TODO: implement */ |
| *status = U_UNSUPPORTED_ERROR; |
| return adopt; |
| } |
| |
| if(adopt == NULL) /* they are clearing it */ |
| { |
| if(file->fTranslit != NULL) |
| { |
| /* TODO: Check side */ |
| old = file->fTranslit->translit; |
| uprv_free(file->fTranslit->buffer); |
| file->fTranslit->buffer=NULL; |
| uprv_free(file->fTranslit); |
| file->fTranslit=NULL; |
| } |
| } |
| else |
| { |
| if(file->fTranslit == NULL) |
| { |
| file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); |
| if(!file->fTranslit) |
| { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return adopt; |
| } |
| file->fTranslit->capacity = 0; |
| file->fTranslit->length = 0; |
| file->fTranslit->pos = 0; |
| file->fTranslit->buffer = NULL; |
| } |
| else |
| { |
| old = file->fTranslit->translit; |
| ufile_flush_translit(file); |
| } |
| |
| file->fTranslit->translit = adopt; |
| } |
| |
| return old; |
| } |
| |
| static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) |
| { |
| int32_t newlen; |
| int32_t junkCount = 0; |
| int32_t textLength; |
| int32_t textLimit; |
| UTransPosition pos; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| if(count == NULL) |
| { |
| count = &junkCount; |
| } |
| |
| if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) |
| { |
| /* fast path */ |
| return src; |
| } |
| |
| /* First: slide over everything */ |
| if(f->fTranslit->length > f->fTranslit->pos) |
| { |
| memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, |
| (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); |
| } |
| f->fTranslit->length -= f->fTranslit->pos; /* always */ |
| f->fTranslit->pos = 0; |
| |
| /* Calculate new buffer size needed */ |
| newlen = (*count + f->fTranslit->length) * 4; |
| |
| if(newlen > f->fTranslit->capacity) |
| { |
| if(f->fTranslit->buffer == NULL) |
| { |
| f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); |
| } |
| else |
| { |
| f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); |
| } |
| f->fTranslit->capacity = newlen; |
| } |
| |
| /* Now, copy any data over */ |
| u_strncpy(f->fTranslit->buffer + f->fTranslit->length, |
| src, |
| *count); |
| f->fTranslit->length += *count; |
| |
| /* Now, translit in place as much as we can */ |
| if(flush == FALSE) |
| { |
| textLength = f->fTranslit->length; |
| pos.contextStart = 0; |
| pos.contextLimit = textLength; |
| pos.start = 0; |
| pos.limit = textLength; |
| |
| utrans_transIncrementalUChars(f->fTranslit->translit, |
| f->fTranslit->buffer, /* because we shifted */ |
| &textLength, |
| f->fTranslit->capacity, |
| &pos, |
| &status); |
| |
| #ifdef _DEBUG |
| if(U_FAILURE(status)) |
| { |
| fprintf(stderr, " Gack. Translit blew up with a %s\n", u_errorName(status)); |
| return src; |
| } |
| #endif |
| |
| /* now: start/limit point to the transliterated text */ |
| /* Transliterated is [buffer..pos.start) */ |
| *count = pos.start; |
| f->fTranslit->pos = pos.start; |
| f->fTranslit->length = pos.limit; |
| |
| return f->fTranslit->buffer; |
| } |
| else |
| { |
| textLength = f->fTranslit->length; |
| textLimit = f->fTranslit->length; |
| |
| utrans_transUChars(f->fTranslit->translit, |
| f->fTranslit->buffer, |
| &textLength, |
| f->fTranslit->capacity, |
| 0, |
| &textLimit, |
| &status); |
| |
| #ifdef _DEBUG |
| if(U_FAILURE(status)) |
| { |
| fprintf(stderr, " Gack. Translit(flush) blew up with a %s\n", u_errorName(status)); |
| return src; |
| } |
| #endif |
| |
| /* out: converted len */ |
| *count = textLimit; |
| |
| /* Set pointers to 0 */ |
| f->fTranslit->pos = 0; |
| f->fTranslit->length = 0; |
| |
| return f->fTranslit->buffer; |
| } |
| } |
| |
| #endif |
| |
| void |
| ufile_flush_translit(UFILE *f) |
| { |
| #if !UCONFIG_NO_TRANSLITERATION |
| if((!f)||(!f->fTranslit)) |
| return; |
| #endif |
| |
| u_file_write_flush(NULL, 0, f, TRUE); |
| } |
| |
| |
| void |
| ufile_close_translit(UFILE *f) |
| { |
| #if !UCONFIG_NO_TRANSLITERATION |
| if((!f)||(!f->fTranslit)) |
| return; |
| #endif |
| |
| ufile_flush_translit(f); |
| |
| #if !UCONFIG_NO_TRANSLITERATION |
| if(f->fTranslit->translit) |
| utrans_close(f->fTranslit->translit); |
| |
| if(f->fTranslit->buffer) |
| { |
| uprv_free(f->fTranslit->buffer); |
| } |
| |
| uprv_free(f->fTranslit); |
| f->fTranslit = NULL; |
| #endif |
| } |
| |
| |
| /* Input/output */ |
| |
| U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fputs(const UChar *s, |
| UFILE *f) |
| { |
| int32_t count = u_file_write(s, u_strlen(s), f); |
| count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); |
| return count; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fputc(UChar uc, |
| UFILE *f) |
| { |
| return u_file_write(&uc, 1, f) == 1 ? uc : EOF; |
| } |
| |
| |
| U_CAPI int32_t U_EXPORT2 |
| u_file_write_flush( const UChar *chars, |
| int32_t count, |
| UFILE *f, |
| UBool flush) |
| { |
| /* Set up conversion parameters */ |
| UErrorCode status = U_ZERO_ERROR; |
| const UChar *mySource = chars; |
| const UChar *sourceAlias = chars; |
| const UChar *mySourceEnd; |
| char *myTarget = f->fCharBuffer; |
| int32_t bufferSize = UFILE_CHARBUFFER_SIZE; |
| int32_t written = 0; |
| int32_t numConverted = 0; |
| |
| if (count < 0) { |
| count = u_strlen(chars); |
| } |
| mySourceEnd = chars + count; |
| |
| #if !UCONFIG_NO_TRANSLITERATION |
| if((f->fTranslit) && (f->fTranslit->translit)) |
| { |
| /* Do the transliteration */ |
| mySource = u_file_translit(f, chars, &count, flush); |
| sourceAlias = mySource; |
| mySourceEnd = mySource + count; |
| } |
| #endif |
| |
| /* Perform the conversion in a loop */ |
| do { |
| status = U_ZERO_ERROR; |
| sourceAlias = mySource; |
| if(f->fConverter != NULL) { /* We have a valid converter */ |
| ucnv_fromUnicode(f->fConverter, |
| &myTarget, |
| f->fCharBuffer + bufferSize, |
| &mySource, |
| mySourceEnd, |
| NULL, |
| flush, |
| &status); |
| } else { /*weiv: do the invariant conversion */ |
| u_UCharsToChars(mySource, myTarget, count); |
| myTarget += count; |
| } |
| numConverted = (int32_t)(myTarget - f->fCharBuffer); |
| |
| if (numConverted > 0) { |
| /* write the converted bytes */ |
| fwrite(f->fCharBuffer, |
| sizeof(char), |
| numConverted, |
| f->fFile); |
| |
| written += numConverted; |
| } |
| myTarget = f->fCharBuffer; |
| } |
| while(status == U_BUFFER_OVERFLOW_ERROR); |
| |
| /* return # of chars written */ |
| return written; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_file_write( const UChar *chars, |
| int32_t count, |
| UFILE *f) |
| { |
| return u_file_write_flush(chars,count,f,FALSE); |
| } |
| |
| |
| /* private function used for buffering input */ |
| void |
| ufile_fill_uchar_buffer(UFILE *f) |
| { |
| UErrorCode status; |
| const char *mySource; |
| const char *mySourceEnd; |
| UChar *myTarget; |
| int32_t bufferSize; |
| int32_t maxCPBytes; |
| int32_t bytesRead; |
| int32_t availLength; |
| int32_t dataSize; |
| |
| |
| /* shift the buffer if it isn't empty */ |
| dataSize = (int32_t)(f->fUCLimit - f->fUCPos); |
| if(dataSize != 0) { |
| memmove(f->fUCBuffer, |
| f->fUCPos, |
| dataSize * sizeof(UChar)); |
| } |
| |
| |
| /* record how much buffer space is available */ |
| availLength = UFILE_UCHARBUFFER_SIZE - dataSize; |
| |
| /* Determine the # of codepage bytes needed to fill our UChar buffer */ |
| /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ |
| maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); |
| |
| /* Read in the data to convert */ |
| bytesRead = (int32_t)fread(f->fCharBuffer, |
| sizeof(char), |
| ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), |
| f->fFile); |
| |
| /* Set up conversion parameters */ |
| status = U_ZERO_ERROR; |
| mySource = f->fCharBuffer; |
| mySourceEnd = f->fCharBuffer + bytesRead; |
| myTarget = f->fUCBuffer + dataSize; |
| bufferSize = UFILE_UCHARBUFFER_SIZE; |
| |
| if(f->fConverter != NULL) { /* We have a valid converter */ |
| /* Perform the conversion */ |
| ucnv_toUnicode(f->fConverter, |
| &myTarget, |
| f->fUCBuffer + bufferSize, |
| &mySource, |
| mySourceEnd, |
| NULL, |
| (UBool)(feof(f->fFile) != 0), |
| &status); |
| |
| } else { /*weiv: do the invariant conversion */ |
| u_charsToUChars(mySource, myTarget, bytesRead); |
| myTarget += bytesRead; |
| } |
| |
| /* update the pointers into our array */ |
| f->fUCPos = f->fUCBuffer; |
| f->fUCLimit = myTarget; |
| } |
| |
| U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fgets(UChar *s, |
| int32_t n, |
| UFILE *f) |
| { |
| int32_t dataSize; |
| int32_t count; |
| UChar *alias; |
| UChar *limit; |
| UChar *sItr; |
| UChar currDelim = 0; |
| |
| if (n <= 0) { |
| /* Caller screwed up. We need to write the null terminatior. */ |
| return NULL; |
| } |
| |
| /* fill the buffer if needed */ |
| if (f->fUCPos >= f->fUCLimit) { |
| ufile_fill_uchar_buffer(f); |
| } |
| |
| /* subtract 1 from n to compensate for the terminator */ |
| --n; |
| |
| /* determine the amount of data in the buffer */ |
| dataSize = (int32_t)(f->fUCLimit - f->fUCPos); |
| |
| /* if 0 characters were left, return 0 */ |
| if (dataSize == 0) |
| return NULL; |
| |
| /* otherwise, iteratively fill the buffer and copy */ |
| count = 0; |
| sItr = s; |
| currDelim = 0; |
| while (dataSize > 0 && count < n) { |
| alias = f->fUCPos; |
| |
| /* Find how much to copy */ |
| if (dataSize < n) { |
| limit = f->fUCLimit; |
| } |
| else { |
| limit = alias + n; |
| } |
| |
| if (!currDelim) { |
| /* Copy UChars until we find the first occurrence of a delimiter character */ |
| while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { |
| count++; |
| *(sItr++) = *(alias++); |
| } |
| /* Preserve the newline */ |
| if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { |
| if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { |
| currDelim = *alias; |
| } |
| count++; |
| *(sItr++) = *(alias++); |
| } |
| } |
| /* If we have a CRLF combination, preserve that too. */ |
| if (alias < limit) { |
| if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { |
| count++; |
| *(sItr++) = *(alias++); |
| } |
| currDelim = 0; |
| } |
| |
| /* update the current buffer position */ |
| f->fUCPos = alias; |
| |
| /* if we found a delimiter */ |
| if (alias < f->fUCLimit && !currDelim) { |
| |
| /* break out */ |
| break; |
| } |
| |
| /* refill the buffer */ |
| ufile_fill_uchar_buffer(f); |
| |
| /* determine the amount of data in the buffer */ |
| dataSize = (int32_t)(f->fUCLimit - f->fUCPos); |
| } |
| |
| /* add the terminator and return s */ |
| *sItr = 0x0000; |
| return s; |
| } |
| |
| U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fgetc(UFILE *f) |
| { |
| /* if we have an available character in the buffer, return it */ |
| if(f->fUCPos < f->fUCLimit) |
| return *(f->fUCPos)++; |
| /* otherwise, fill the buffer and return the next character */ |
| else { |
| ufile_fill_uchar_buffer(f); |
| if(f->fUCPos < f->fUCLimit) { |
| return *(f->fUCPos)++; |
| } |
| else { |
| return U_EOF; |
| } |
| } |
| } |
| |
| /* Read a UChar from a UFILE and process escape sequences */ |
| U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fgetcx(UFILE *f) |
| { |
| UChar32 c32; |
| |
| /* Fill the buffer if it is empty */ |
| if (f->fUCPos + 1 >= f->fUCLimit) { |
| ufile_fill_uchar_buffer(f); |
| } |
| |
| /* Get the next character in the buffer */ |
| if (f->fUCPos < f->fUCLimit) { |
| c32 = *(f->fUCPos)++; |
| } |
| else { |
| c32 = U_EOF; |
| } |
| |
| if (U_IS_LEAD(c32)) { |
| if (f->fUCPos < f->fUCLimit) { |
| UChar c16 = *(f->fUCPos)++; |
| c32 = U16_GET_SUPPLEMENTARY(c32, c16); |
| } |
| else { |
| c32 = U_EOF; |
| } |
| } |
| |
| return c32; |
| } |
| |
| U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fungetc(UChar32 ch, |
| UFILE *f) |
| { |
| /* if we're at the beginning of the buffer, sorry! */ |
| if (f->fUCPos == f->fUCBuffer |
| || (U_IS_LEAD(ch) && (f->fUCPos - 1) == f->fUCBuffer)) |
| { |
| ch = U_EOF; |
| } |
| else { |
| /* otherwise, put the character back */ |
| /* TODO: Maybe we shouldn't be writing to the buffer and just verify the contents */ |
| if (U_IS_LEAD(ch)) { |
| /* Remember, put them back on in the reverse order. */ |
| *--(f->fUCPos) = U16_TRAIL(ch); |
| *--(f->fUCPos) = U16_LEAD(ch); |
| } |
| else { |
| *--(f->fUCPos) = (UChar)ch; |
| } |
| } |
| return ch; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_file_read( UChar *chars, |
| int32_t count, |
| UFILE *f) |
| { |
| int32_t dataSize; |
| int32_t read; |
| |
| /* fill the buffer */ |
| ufile_fill_uchar_buffer(f); |
| |
| /* determine the amount of data in the buffer */ |
| dataSize = (int32_t)(f->fUCLimit - f->fUCPos); |
| |
| /* if the buffer contains the amount requested, just copy */ |
| if(dataSize > count) { |
| memcpy(chars, f->fUCPos, count * sizeof(UChar)); |
| |
| /* update the current buffer position */ |
| f->fUCPos += count; |
| |
| /* return # of chars read */ |
| return count; |
| } |
| |
| /* otherwise, iteratively fill the buffer and copy */ |
| read = 0; |
| do { |
| |
| /* determine the amount of data in the buffer */ |
| dataSize = (int32_t)(f->fUCLimit - f->fUCPos); |
| |
| /* copy the current data in the buffer */ |
| memcpy(chars + read, f->fUCPos, dataSize * sizeof(UChar)); |
| |
| /* update number of items read */ |
| read += dataSize; |
| |
| /* update the current buffer position */ |
| f->fUCPos += dataSize; |
| |
| /* refill the buffer */ |
| ufile_fill_uchar_buffer(f); |
| |
| } while(dataSize != 0 && read < count); |
| |
| return read; |
| } |