source/samples/XMLConverter/XMLConverter.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 **********************************************************************
 * Copyright (C) 1998-2000, International Business Machines Corporation
 * and others.  All Rights Reserved.
 **********************************************************************
 *
 */
 // XMLConverter.cpp
 // To convert one encoded XML file to another

 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>

 /* Define _XPG4_2 for Solaris and friends. */
 #ifndef _XPG4_2
 #define _XPG4_2
 #endif

 /* Define __USE_XOPEN_EXTENDED for Linux and glibc. */
 #ifndef __USE_XOPEN_EXTENDED
 #define __USE_XOPEN_EXTENDED
 #endif

 #include <string.h>
 #include <cstring.h>

 #ifdef _WIN32
 #  include <windows.h>
 #endif

 #include "unicode/utypes.h"
 #include "unicode/ustring.h"
 #include "unicode/ucnv.h"
 #include "unicode/ucnv_err.h"
 #include "unicode/uloc.h"
 #include "unicode/uchar.h"

 #define MAXFILENAMELEN  1024
 #define RAWBUFSIZE       4096
 #define ENCODINGCOUNT   5
 #define FIRSTLINEBUF    256
 typedef unsigned char  BYTE;


 char firstLine[128];
 char encodingNameInFile[256];
 UBool verbose = FALSE;

 extern void convertFile(char*, char*, char*, UConverter*);
 extern void usage();
 extern void printChars(unsigned char*, int);
 extern int getInputEncodingType(const BYTE* rawBuffer,
                                 unsigned long byteCount);
 extern long convertFirstLine(FILE* inF,
                              char* inEncName,
                              FILE* outF,
                              char* outEncName,
                              char* ptrBuf,
                              unsigned long toRead,
                              UChar* uBuf);
 extern void catString(char* thisString, UBool quote);
 extern int32_t  XMLUConvert( UConverter* inConverter,
                       UConverter* outConverter,
                       const char* inBuffer,
                       int32_t* inBufSize,
                       char* outBuffer,
                       int32_t outBufCapacity,
                       UBool flush,
                       UErrorCode* err);
 extern void XMLU_fromCodepageToCodepage(    UConverter*    outConverter,
                         UConverter*    inConverter,
                         char**         target,
                         const char*        targetLimit,
                         const char**        source,
                         const char*        sourceLimit,
                         int32_t*        offsets,
                         UBool            flush,
                         UErrorCode*        err);

 static const BYTE    gEBCDICPre[]    = { 0x4C, 0x6F, 0xA7, 0x94 };
 #if 0
 //not supported encodings
 static const BYTE    gUCS4BPre[]     = { 0x00, 0x00, 0x00, 0x3C };
 static const BYTE    gUCS4LPre[]     = { 0x3C, 0x00, 0x00, 0x00 };
 #endif
 static const BYTE    gUTF16BPre[]    = { 0x00, 0x3C, 0x00, 0x3F };
 static const BYTE    gUTF16LPre[]    = { 0x3C, 0x00, 0x3F, 0x00 };
 static const char    gXMLDecl_ASCII[]= { 0x3C, 0x3F, 0x78, 0x6D, 0x6C };

 enum Encodings
     {
         EBCDIC          = 0,
         UCS_4B          = 1,
         UCS_4L          = 2,
         US_ASCII        = 3,
         UTF_8           = 4,
         UTF_16B         = 5,
         UTF_16L         = 6,

         Encodings_Count = ENCODINGCOUNT,
         Encodings_Min   = EBCDIC,
         Encodings_Max   = UTF_16L,

         OtherEncoding   = 999
     };


 void usage(char *  exeName)
 {
     fprintf(stdout, "\n USAGE: \n \t%s [-h] [-v] -e trgEncName inputFile outputFile \n\n", exeName);
     fprintf(stdout, " %s    = Exe name \n ", exeName);
 	fprintf(stdout, "-h     \t= to get help (this information!) \n ");
     fprintf(stdout, "-v     \t= set verbose on; \n \t\t  to get more information about the conversion process \n ");
     fprintf(stdout, "-e     \t= This is a mandatory option and follows with the targetEncName");
     fprintf(stdout, "       \t\t  E.g., output encoding can be like : \n \t\t  ascii, utf8, utf-16be, utf-16le, ebcdic-cp-us \n");
     fprintf(stdout, "trgEncName  \t= The output encoding type needed. \n \t\t  It always should follow the -e switch\n");
     fprintf(stdout, "inputFile     \t= The input XML file name \n");
     fprintf(stdout, "outputFile    \t= The output XML file name \n");
     fprintf(stdout, " \n For example: \n ");
     fprintf(stdout, " \t %s -e utf8 pr-utf-16.xml pr-utf-8.xml \n\n\n ", exeName);
 }


 int main(int argc, char** argv)
 {
     UErrorCode err = U_ZERO_ERROR;
     char* inFileName;
     char* outFileName;
     char * encName = NULL;

 	UConverter*  conv = NULL;

     for (int i=0; i< argc; i++)
     {
         if (!strcmp( argv[i], "-h") || (argc < 5) )
         {
             usage(argv[0]);
             exit(1);
         }
         if (!strcmp( argv[i], "-v"))
             verbose = TRUE;
         if (!strcmp( argv[i], "-e"))
         {
             if ( argc == i+4)
             {
                 encName = new char[strlen(argv[i+1]) +1];
                 strcpy(encName, argv[i+1]);
                 inFileName = new char[strlen(argv[i+2]) +1];
                 strcpy(inFileName, argv[i+2]);
                 outFileName = new char[strlen(argv[i+3]) +1];
                 strcpy(outFileName, argv[i+3]);
                 break;
             }
             else
             {
                 usage(argv[0]);
                 exit(1);
             }
         }
     }

 	conv = ucnv_open(encName, &err);
 	if (U_FAILURE(err))
 	{
         if (verbose)
         {
             fprintf(stderr, "Could not create converter to: %s\n", encName);
 #if defined(_DEBUG) && defined(XP_CPLUSPLUS)
 	    	fprintf (stderr,"FAILURE! (%s) (%d)\n", u_errorName(err), err);
 #endif
         }
         ucnv_close(conv);
         exit(1);
     }

     fprintf(stdout, "Converting %s to %s...\n", inFileName, outFileName);
 	convertFile(encName, inFileName, outFileName, conv);
 	fprintf(stdout, "Finished transcoding file: %s\n", inFileName);

     ucnv_close(conv);
     if (encName)
         delete encName;
     return 0;
 }

 void convertFile(char* encName, char* iFN, char* oFN, UConverter* outConvrtr)
 {
     //Read the input file
     //
     FILE* inFile = fopen( iFN, "rb");
     if (inFile == NULL) {
         if (verbose)
             fprintf(stderr, "Could not open input file - %s for reading \n", iFN);
         exit(1);
     }

     FILE*   outFile = fopen(oFN, "wb");
 	if (outFile == NULL)
 	{
         if (verbose)
 		    fprintf(stderr, "Could not open output file - %s for writing \n", oFN);
 		fclose(inFile);
 		return;
 	}

     char            rawBuf[RAWBUFSIZE];
     char*           pRawBuf     = NULL;
     unsigned long   bytesRead   = 0;
 	UErrorCode       err         = U_ZERO_ERROR;

     //get the file size
     //
     unsigned int    curPos      = ftell(inFile);

     if(verbose)
       fprintf(stderr, "curPos = %d\n", curPos);

     if (curPos == 0xFFFFFFFF)
     {
         fprintf(stderr, "fileSize - Could not save current pos \n");
         exit(1);
     }

     // Seek to the end and save that value for return
     //
     if ( fseek(inFile, 0 , SEEK_END) )
     {
         fprintf(stderr, "fileSize - Could not seek to end \n");
         exit(1);
     }

     const unsigned int endPos = ftell(inFile);
     if (endPos == 0xFFFFFFFF)
     {
         fprintf(stderr, "fileSize - Could not get the end pos \n");
         exit(1);
     }

     // And put the pointer back
     //
     if (fseek(inFile, curPos, SEEK_SET))
     {
         fprintf(stderr, "fileSize - Could not seek back to original pos \n");
         exit(1);
     }

     if (curPos >= endPos)
     {
         fprintf(stderr,"Reached end of input file while reading \n");
         exit(1);
     }

     unsigned int    bytesLeft   = endPos - curPos;
     if (verbose)
         fprintf(stdout,"Input file size is %d \n", bytesLeft);

     unsigned int toRead = (RAWBUFSIZE > bytesLeft) ? bytesLeft : RAWBUFSIZE;

     //Read the infile
     //
     bytesRead = fread( (void*)rawBuf, 1, toRead, inFile);
     if (ferror(inFile))
     {
         fprintf(stderr," couldnot read file for input encoding \n");
         exit(1);
     }

     if (bytesRead ==  0)
     {
         fprintf(stderr," couldnot fill raw buffer \n");
         exit(1);
     }
     pRawBuf = rawBuf;

     // get the input encoding type
     int inputEnc = getInputEncodingType((const BYTE*)rawBuf, bytesRead);
     if (inputEnc == OtherEncoding)
     {
         fprintf(stderr, " Unknown encoded input file. \n Only input encodings supported in the first line are \n");
         fprintf(stderr, " ascii, ebcdic-cp-us, utf8, utf-16be, utf-16le \n");
         exit(1);
     }

     //transcoding the first line from inEncodName to ascii and then replacing
     //the encoding=inEncodingName to encoding=outEncodingName
     //

     UChar          ucBuf[RAWBUFSIZE];
     char * inEncodName;
     char* tmpPtr = (char*) rawBuf;

     //get the input encoding name
     //
     switch (inputEnc)
     {
     case 0 :
         inEncodName = new char[strlen("ebcdic-cp-us") +1];
         strcpy(inEncodName, "ebcdic-cp-us");
         break;
     case 3 :
         inEncodName = new char[strlen("ascii") +1];
         strcpy(inEncodName, "ascii");
         break;
     case 4 :
         inEncodName = new char[strlen("utf8") +1];
         strcpy(inEncodName, "utf8");
         break;
     case 5 :
         inEncodName = new char[strlen("utf-16be") +1];
         strcpy(inEncodName, "utf-16be");
         break;
     case 6 :
         inEncodName = new char[strlen("utf-16le") +1];
         strcpy(inEncodName, "utf-16le");
         break;
     default :
         break;
     };

     if(verbose)
       {
 	fprintf(stderr, "inConverter = %s\n", inEncodName);
       }

     UConverter* inConvrtr = ucnv_open(inEncodName, &err);
     //now read and transcode the input to output file
     //Process the firstline separately
     //
     long afterFirstLine = convertFirstLine(inFile, inEncodName, outFile, encName,
 					   pRawBuf, toRead, (UChar*)ucBuf);

     //move the pointer after the first line
     //
     if (fseek(inFile, (unsigned long) afterFirstLine, SEEK_SET))
     {
         fprintf(stderr, "fileSize - Could not set the cursor to %d after the first line \n", afterFirstLine);
         exit(1);
     }
     else
       if(verbose)
 	fprintf(stderr,"Seeked to %d OK \n", afterFirstLine);
     bytesLeft = endPos - afterFirstLine;
     toRead = (RAWBUFSIZE > bytesLeft) ? bytesLeft : RAWBUFSIZE;

     //  read the rest of the input file
     //
     if (verbose)
         fprintf(stdout,"The first line consists of %d bytes \n", afterFirstLine);
     if (encodingNameInFile !=NULL)
     {
         if (inEncodName)
             delete inEncodName;
         inEncodName = new char[strlen(encodingNameInFile)+1];
         strcpy(inEncodName, encodingNameInFile);
         ucnv_close(inConvrtr);
         inConvrtr = ucnv_open(inEncodName, &err);
     }
     if (verbose)
         fprintf(stdout, "Input Encoding type = %s,  Output Encoding type = %s \n", inEncodName, encName);

     char *outBuf = new char[RAWBUFSIZE];
     int  outBufSize = RAWBUFSIZE;
     UBool tFlush = FALSE;
     err = U_ZERO_ERROR;

     if (verbose)
         fprintf(stdout, "processing the rest of the file \n");
     while( (bytesRead = fread((void *) rawBuf, 1, toRead, inFile)) > 0 || !tFlush)
     {
          int32_t  bytesNeeded = XMLUConvert( inConvrtr,
                       outConvrtr,
                       pRawBuf,
                       (int32_t*)&bytesRead,
                       outBuf,
                       outBufSize,
                       tFlush,
                       &err);
          if (bytesNeeded > 0)
          {
              long bout =
                  fwrite((void *) outBuf, 1, bytesNeeded, outFile);
              if (bout != bytesNeeded)
              {
                  fprintf(stderr, "Wrote only %d bytes.\n", bout);
                  fclose(inFile);
                  fclose(outFile);
              }
          }

         if ((err != U_BUFFER_OVERFLOW_ERROR) && U_FAILURE(err) )
         {
 #if defined(_DEBUG)
             fprintf (stderr, "Error transcoding rest of the file: (%s) %d\n", u_errorName(err), err);
 #endif
             fclose(inFile);
             fclose(outFile);
             exit(1);
         }
         if ((bytesRead > 0) && (err !=U_ZERO_ERROR))
         {
 	  if(verbose)
 	    fprintf(stderr, "err=%d * read %d bytes\n", err,bytesRead);

             if (fseek(inFile, (curPos+bytesRead), SEEK_SET))
             {
                 fprintf(stderr, "fileSize - Could not set the input cursor to %d (curpos=%d, bytesRead=%d)\n", curPos+bytesRead,curPos,bytesRead);
                 exit(1);
             }
             curPos = ftell(inFile);
             bytesLeft = endPos - curPos;
         }
         else
         {
             curPos = ftell(inFile);
             bytesLeft = endPos - curPos;
         }
         toRead = (RAWBUFSIZE > bytesLeft) ? bytesLeft : RAWBUFSIZE;
         if (toRead < RAWBUFSIZE) tFlush = TRUE;
         if (err == U_BUFFER_OVERFLOW_ERROR)
             err = U_ZERO_ERROR;
     }
     ucnv_close(inConvrtr);
     delete inEncodName;
     fclose(inFile);
     fclose(outFile);
 };


 int getInputEncodingType(const BYTE* rawBuffer, unsigned long byteCount)
 {
     //match the first four bytes of the input buffer with the encoding types available
     //checking for ASCII
     //
     if (byteCount > 5)
     {
         if (!memcmp(rawBuffer, gXMLDecl_ASCII, 5))
         return US_ASCII;
     }

     //  If the count of raw bytes is less than 2, it cannot be anything
     //  we understand, so return UTF-8 as a fallback.
     //
     if (byteCount < 2)
         return  UTF_8;

     //  We know its at least two bytes, so lets check for a UTF-16 BOM.
     //
     if ((rawBuffer[0] == 0xFE) && (rawBuffer[1] == 0xFF))
         return UTF_16B;
     else if ((rawBuffer[0] == 0xFF) && (rawBuffer[1] == 0xFE))
         return UTF_16L;

     //  Oh well, not one of those. So now lets see if we have at least 4
     //  bytes. If not, then we are out of ideas and can return UTF-8 as the
     //  fallback.
     //
     if (byteCount < 4)
         return OtherEncoding;

     //  We have at least 4 bytes. So lets check the 4 byte sequences that
     //  indicate other UTF-16 encodings.
     //
     if ((rawBuffer[0] == 0x00) || (rawBuffer[0] == 0x3C))
     {
 #if 0
         //not supported encodings
         if (!memcmp(rawBuffer, gUCS4BPre, 4))
             return UCS_4B;
         else if (!memcmp(rawBuffer, gUCS4LPre, 4))
             return UCS_4L;
         else
 #endif
             if (!memcmp(rawBuffer, gUTF16BPre, 4))
             return UTF_16B;
         else if (!memcmp(rawBuffer, gUTF16LPre, 4))
             return UTF_16L;
     }

     //  See if we have enough bytes to possibly match the EBCDIC prefix.
     //  If so, try it.
     //
     if (!memcmp(rawBuffer, gEBCDICPre, 4))
          return EBCDIC;

     //  Does not seem to be anything we know, so go with UTF-8 to get at
     //  least through the first line and see what it really is.
     //
     return OtherEncoding;
 }


 long convertFirstLine( FILE* inF, char* inEncName,
                        FILE* outF, char* outEncName,
                        char* ptrBuf, unsigned long toRead,
                        UChar* uBuf)
 {
     //Here we read the inputFile with the specified buffer size.
     //Then convert this to ascii. then read the first line and convert to
     //output and input encoding types and return for rest of the conversion
     //

     if (fseek(inF, 0, SEEK_SET))
     {
         fprintf(stderr, "file - Could not seek the begin pos \n");
         exit(1);
     }

     unsigned long bytesRead = fread( (void*)ptrBuf, 1, toRead, inF);

     char            tempBuf[RAWBUFSIZE];
     int             bufLength       = 0;
     long            bytesNeeded     = 0;
     UErrorCode      err             = U_ZERO_ERROR;

     bytesNeeded = ucnv_convert("ascii",
 			inEncName,
 			(char*) tempBuf,
 			0,
 			(const char*) ptrBuf,
 			bytesRead,
 			&err);

     if (err == U_BUFFER_OVERFLOW_ERROR)
     {
 	    err = U_ZERO_ERROR;
     }
 	else if (U_FAILURE(err))
 	{
 #if defined(_DEBUG)
 		printf ("Error transcoding first line of input file: (%s) %d\n", u_errorName(err), err);
 #endif
         fclose(inF);
     	fclose(outF);
         exit(1);
 	}

     ucnv_convert("ascii",
 			inEncName,
 			(char*) tempBuf,
 			bytesNeeded,
 			(const char*) ptrBuf,
 			bytesRead,
 			&err);

 	if (U_FAILURE(err))
 	{
 #if defined(_DEBUG)
 		printf ("Error transcoding2 first line of input file: (%s) %d\n", u_errorName(err), err);
 #endif
         fclose(inF);
     	fclose(outF);
         exit(1);
     }
     else
 	{
         //read the tempBuf to get the first line
         //
         char firstLineBuf[FIRSTLINEBUF];
         int tempBufLength = 0;

         for( bufLength = 0,  tempBufLength=0; bufLength < FIRSTLINEBUF; bufLength++, tempBufLength++)
         {
             if ((tempBufLength == 0) && ((inEncName == "utf-16be") || (inEncName == "utf-16le") || (inEncName == "utf16")) )
                 tempBufLength++;
             firstLineBuf[bufLength] = (char)tempBuf[tempBufLength];
             if (tempBuf[tempBufLength] == 0x3E) {
                 firstLineBuf[bufLength+1] = '\0';
                 break;
             }

         }
         char* pFLB = new char[sizeof(firstLineBuf) +1];
         strcpy(pFLB, firstLineBuf);

         //if the file doesnot contain the version string line then its and illegal file
         //
         if (firstLineBuf[0] != 0x3C )
         {
               fprintf(stderr,"Illegal xml file: It doesnot contain the xml declaration statement on the first line \n");
               fclose(inF);
     	      fclose(outF);
               exit(1);
         }

         UBool encString      = TRUE;
         UBool stdString      = TRUE;
         UBool encInsertMid   = FALSE;
         UBool encInsertLast  = FALSE;
         UBool dQuote         = TRUE;
         char* doubleQuote   = "\"";
         char* singleQuote   = "\'";

         if (!strstr( (const char*)pFLB, doubleQuote))
         {
             if (!strstr( (const char*)pFLB, singleQuote))
             {
               fprintf(stderr,"Illegal xml file: It doesnot contain the approprite xml declaration \n");
               fclose(inF);
     	      fclose(outF);
               exit(1);
             }
             dQuote = FALSE;
         }

         char* newString     = strstr( (const char*) pFLB, "encoding");
         char* stringWithEnc = 0;

         if (!newString)
             encString = FALSE;
         else
 	  {
 	    stringWithEnc = new char[strlen(newString)+1];
             strcpy(stringWithEnc, newString);
 	  }

         newString = strstr( (const char*) pFLB, "standalone");
         char* stringWithStd = 0;
         if (!newString)
             stdString = FALSE;
         else
         {
 	    stringWithStd = new char[strlen(newString)+1];
             strcpy(stringWithStd, newString);
        }

         if (!encString && !stdString)
              encInsertLast = TRUE;
         if (!encString && stdString)
              encInsertMid = TRUE;

         //Encodingname for the rest of the input file could be different.
         //If its not specified in the  first line then assume it to be UTF8
         if (encInsertLast || encInsertMid)
         {
             //if the encoding type was found utf16 family or ebcdic and
             // the encoding string is not present in the file then its an error
             if (!strcmp(inEncName, "utf-16be")
                 || !strcmp(inEncName, "utf-16le")
                 || !strcmp(inEncName, "ebcdic-cp-us"))
             {
                 fprintf(stderr, "Illegal xml file: it doesnot contain the encoding string in the first line of the input file\n");
                 fclose(inF);
     	        fclose(outF);
                 exit(1);
             }
             strcpy(encodingNameInFile, inEncName);
         }

         char* tempString    = " encoding=";
         char* dupFLB        = uprv_strdup(pFLB);
 	int stringTwoLength = 0;

 	/* build up the length */
 	stringTwoLength = bufLength;

 	if(tempString)
 	  stringTwoLength += strlen(tempString);

 	if(outEncName)
 	  stringTwoLength += strlen(outEncName);

 	if(stringWithStd)
 	  stringTwoLength += strlen(stringWithStd);

 	stringTwoLength   += 5;

         char* stringTwo     = new char[stringTwoLength];

         if (encInsertLast) {
             char* stringOne = new char[bufLength];
             strncpy(stringOne, pFLB, bufLength-1);
             strcpy(stringOne+bufLength-1, "");
             stringTwo = strcpy(stringTwo, stringOne);
             strcat(stringTwo, tempString);
             catString(stringTwo, dQuote);
             strcat(stringTwo, outEncName);
             catString(stringTwo, dQuote);
             strcat(stringTwo , " ?>");
             delete stringOne;
         }
         //insert the string before 'standalone' statement
         else if (encInsertMid) {
             char* stringThree = new char[bufLength + strlen(tempString) + strlen(outEncName) + 5];
             if (dQuote)
                 stringThree = strtok(dupFLB, doubleQuote);
             else
                 stringThree = strtok(dupFLB, singleQuote);

             strcpy(stringTwo, stringThree);
             catString(stringTwo, dQuote);

             char* tmpString;
             if (dQuote)
                 tmpString = strtok(0, doubleQuote);
             else
                 tmpString = strtok(0, singleQuote);
             if (tmpString != NULL)
                 strcat(stringTwo, tmpString);

             catString(stringTwo, dQuote);
             strcat(stringTwo, tempString);
             catString(stringTwo, dQuote);

             strcat(stringTwo, outEncName);
             if (dQuote)
                 strcat(stringTwo, "\" ");
             else
                 strcat(stringTwo, "\' ");
             strcat(stringTwo, stringWithStd);
             delete stringThree;
         }
         //if the encoding string is there then modify the output encoding name in it.
         else if (encString)
         {
             char* stringFive  = new char[strlen(dupFLB)+1];

             if (dQuote)
                 stringFive = strtok (dupFLB, doubleQuote);
             else
                 stringFive = strtok (dupFLB, singleQuote);

             strcpy(stringTwo, stringFive);
             catString(stringTwo, dQuote);
             while (stringFive != NULL)
             {
                 if (dQuote)
                     stringFive = strtok(0,doubleQuote);
                 else
                     stringFive = strtok(0,singleQuote);

                 if (stringFive == NULL)
                     break;
                 strcat(stringTwo, stringFive);

                 char* n1String = strstr(stringFive, ">");
                 if (!n1String)
                     catString(stringTwo, dQuote);

                 char* nString = strstr(stringFive, "encoding");
                 if (nString)
                 {
                     strcat(stringTwo, outEncName);
                     if (dQuote)
                         stringFive = strtok(0, doubleQuote);
                     else
                         stringFive = strtok(0, singleQuote);
                     strcpy(encodingNameInFile, stringFive); //this is the encoded string name
                     catString(stringTwo, dQuote);
                 }
             }
             if (stringFive != NULL)
             {
                 delete stringFive;
                 stringFive = 0;
             }
         }

         // introduce the first order bytes for utf16 be and le files
         //
         if (!strcmp(outEncName, "utf-16be") || !strcmp(outEncName, "utf16"))
         {
             uBuf[0] = 0xFE;
             fwrite( (void*) uBuf, 1, 1, outF);
             uBuf[0] = 0xFF;
             fwrite( (void*) uBuf, 1, 1, outF);
         } else if (!strcmp(outEncName , "utf-16le"))
         {
            uBuf[0] = 0xFF;
            fwrite( (void*) uBuf, 1, 1, outF);
            uBuf[0] = 0xFE;
            fwrite( (void*) uBuf, 1, 1, outF);
         }

         err = U_ZERO_ERROR;
         long oneChar = 0;
         while ( *stringTwo != '\0' )
         {
             //transcode character-by-character
             oneChar = ucnv_convert(outEncName,
 			    "ascii",
 			    (char*) uBuf,
 			    0,
                 (const char*) stringTwo,
 			    1,
 			    &err);
             if (err == U_BUFFER_OVERFLOW_ERROR)
             {
 	            err = U_ZERO_ERROR;
             }
 	        else if (U_FAILURE(err))
 	        {
 #if defined(_DEBUG)
 		        fprintf (stderr, "Error transcoding char-by-char: (%s) %d\n", u_errorName(err), err);
 #endif
                 fclose(inF);
     	        fclose(outF);
                 exit(1);
 	        }

             ucnv_convert(outEncName,
 			    "ascii",
 			    (char*) uBuf,
 			    oneChar,
                 (const char*) stringTwo,
 			    1,
 			    &err);
 	        if (U_FAILURE(err))
 	        {
 #if defined(_DEBUG)
 		        fprintf (stderr, "Error transcoding2 char-by-char: (%s) %d\n", u_errorName(err), err);
 #endif
                 fclose(inF);
     	        fclose(outF);
                 exit(1);
             }
             fwrite( (void*) uBuf, 1, oneChar, outF);
             stringTwo++;
         }
     }


     //Now get the pointer offset after the first line in the input file
     //and return this position
     //
     char* newInEncName  = new char[strlen(inEncName) +1];
     strcpy(newInEncName, inEncName);
     if (encodingNameInFile !=NULL)
     {
         if (inEncName)
             delete newInEncName;
         newInEncName = new char[strlen(encodingNameInFile)+1];
         strcpy(newInEncName, encodingNameInFile);
     }

      char   oldBuf[RAWBUFSIZE];
      int    bufHere    = bufLength +1;
      if (!strcmp(newInEncName, "utf-16be") || !strcmp(newInEncName, "utf16") || !strcmp(newInEncName, "utf-16le"))
      {
          bufHere +=1;
         memcpy((void*)oldBuf, (void*) tempBuf, bufHere);
      }
      else
               memcpy((void*)oldBuf, (void*) tempBuf, bufHere);

      char   newBuf[RAWBUFSIZE];
      long   endBytes    = 0;
      //transcode this ascii type to the input encoding type
      //and get the pointer to the end of first line in the input buffer
      //
      err = U_ZERO_ERROR;
      endBytes = ucnv_convert(newInEncName,
      		"ascii",
 			(char*) newBuf,
 			0,
 			(const char*) oldBuf,
 			bufHere,
 			&err);

     if (err == U_BUFFER_OVERFLOW_ERROR)
     {
 	    err = U_ZERO_ERROR;
     }
 	else if (U_FAILURE(err))
 	{
 #if defined(_DEBUG)
 		fprintf (stderr, "Error transcoding from ascii to input encoding: (%s) %d\n", u_errorName(err), err);
 #endif
         fclose(inF);
     	fclose(outF);
         exit(1);
 	}
     ucnv_convert(newInEncName,
         	"ascii",
 			(char*) newBuf,
 			endBytes,
 			(const char*) oldBuf,
 			bufHere,
 			&err);
 	if (U_FAILURE(err))
 	{
 #if defined(_DEBUG)
 		fprintf (stderr, "Error transcoding2 from ascii to input encoding: (%s) %d\n", u_errorName(err), err);
 #endif
         delete newInEncName;
         fclose(inF);
     	fclose(outF);
         exit(1);
     }

     return endBytes;
 }


 int32_t  XMLUConvert( UConverter* inConverter,
                       UConverter* outConverter,
                       const char* inBuffer,
                       int32_t* inBufSize,
                       char* outBuffer,
                       int32_t outBufCapacity,
                       UBool flush,
                       UErrorCode* err)
 {
     const char* inBufferAlias = inBuffer;
     char* outBufferAlias = outBuffer;
     const char* inBufferEnd = inBuffer + *inBufSize;
     const char* outBufferEnd = outBuffer + outBufCapacity;
     //const char* consumed;

     if (U_FAILURE(*err)) return 0;

     XMLU_fromCodepageToCodepage(outConverter,
         inConverter,
         &outBufferAlias,
         outBufferEnd,
         &inBufferAlias,
         inBufferEnd,
         NULL,
         flush,
         err);

    // *inBufSize = inBufferAlias;
     return outBufferAlias - outBuffer;
 }

 void XMLU_fromCodepageToCodepage(    UConverter*    outConverter,
                         UConverter*    inConverter,
                         char**         target,
                         const char*    targetLimit,
                         const char**   source,
                         const char*    sourceLimit,
                         int32_t*       offsets,
                         UBool         flush,
                         UErrorCode*    err)
 {

 #if 0
     UChar out_chunk[RAWBUFSIZE];
     const UChar* out_chunk_limit = out_chunk + RAWBUFSIZE;
     UChar* out_chunk_alias;
     UChar const* out_chunk_alias2;
     UChar const* consumed_UChars;


     if (U_FAILURE(*err)) return;

     *consumed = *source;
     /*loops until the input buffer is completely consumed
     *or if an error has be encountered
     *first we convert from inConverter codepage to Unicode
     *then from Unicode to outConverter codepage
     */

     while ((sourceLimit != *source) && U_SUCCESS(*err))
     {
         out_chunk_alias = out_chunk;
         *source = *consumed;
         ucnv_reset(inConverter);
         ucnv_toUnicode(inConverter,
             &out_chunk_alias,
             out_chunk_limit,
             source,
             sourceLimit,
             consumed,
             flush,
             err);

             /*U_BUFFER_OVERFLOW_ERROR means that the output "CHUNK" is full
             *we will require at least another loop (it's a recoverable error)
         */

         if (U_SUCCESS(*err) || (*err == U_BUFFER_OVERFLOW_ERROR))
         {
             *err = U_ZERO_ERROR;
             out_chunk_alias2 = out_chunk;

             while ((out_chunk_alias2 != out_chunk_alias) && U_SUCCESS(*err))
             {
                 ucnv_fromUnicode(outConverter,
                     target,
                     targetLimit,
                     &out_chunk_alias2,
                     out_chunk_alias,
                     &consumed_UChars,
                     FALSE,
                     err);

             }
         }
         else break;
     }
     return;

 #endif


   UChar out_chunk[RAWBUFSIZE];
   const UChar *out_chunk_limit = out_chunk + RAWBUFSIZE;
   UChar *out_chunk_alias;
   UChar const *out_chunk_alias2;


   if (U_FAILURE (*err))    return;


   /*loops until the input buffer is completely consumed
    *or if an error has be encountered
    *first we convert from inConverter codepage to Unicode
    *then from Unicode to outConverter codepage
    */
   while ((*source != sourceLimit) && U_SUCCESS (*err))
     {
       out_chunk_alias = out_chunk;
       ucnv_toUnicode (inConverter,
 		      &out_chunk_alias,
 		      out_chunk_limit,
 		      source,
 		      sourceLimit,
 		      NULL,
 		      flush,
 		      err);

       /*U_BUFFER_OVERFLOW_ERROR means that the output "CHUNK" is full
        *we will require at least another loop (it's a recoverable error)
        */

       if (U_SUCCESS (*err) || (*err == U_BUFFER_OVERFLOW_ERROR))
 	{
 	  *err = U_ZERO_ERROR;
 	  out_chunk_alias2 = out_chunk;

 	  while ((out_chunk_alias2 != out_chunk_alias) && U_SUCCESS (*err))
 	    {
 	      ucnv_fromUnicode (outConverter,
 				target,
 				targetLimit,
 				&out_chunk_alias2,
 				out_chunk_alias,
 				NULL,
 				TRUE,
 				err);

 	    }
 	}
       else
 	break;
     }

   return;
 }

 void catString(char* thisString, UBool quote)
 {
     if (quote)
         strcat(thisString, "\"");
     else
         strcat(thisString, "\'");
 }