source/samples/scsu/compsamp.cpp - external/github.com/unicode-org/icu - Git at Google

 /**************************************************************************
 *
 *   Copyright (C) 2000, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ***************************************************************************
 *   file name:  compsamp.c
 *   encoding:   ASCII (7-bit)
 *
 *   created on: 2000may30
 *   created by: Steven R. Loomis
 *
 *   Sample code for the ICU compression routines.
 *
 * Note: Nothing special is needed to build this sample. Link with
 *       the icu UC and icu I18N libraries.
 *
 *       I use 'assert' for error checking, you probably will want
 *       something more flexible.  '***BEGIN SAMPLE***' and
 *       '***END SAMPLE***' mark pieces suitable for stand alone
 *       code snippets.
 *
 */

 #include <stdio.h>
 #include <ctype.h>            /* for isspace, etc.    */
 #include <assert.h>
 #include <string.h>

 #include "unicode/utypes.h"   /* Basic ICU data types */
 #include "unicode/scsu.h"
 #include "unicode/uchar.h"
 #include "unicode/ustring.h"  /* some more string fcns*/
 #include "unicode/uloc.h"

 /* Some utility functions */

 static const UChar kNone[] = { 0x0000 };

 /* Print a UChar if possible, in seven characters. */
 static void prettyPrintUChar(UChar c)
 {
   if(  (c <= 0x007F) &&
        (isgraph(c))  ) {
     printf("  '%c'  ", (char)(0x00FF&c));
   } else if ( c > 0x007F ) {
     char buf[100];
     UErrorCode status = U_ZERO_ERROR;
     UTextOffset o;

     o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 100, &status);
     if(U_SUCCESS(status)) {
       buf[6] = 0;
       printf("%- 7s", buf);
     } else {
       printf("??????? ");
     }
   } else {
     switch((char)(c & 0x007F)) {
     case ' ':
       printf("  ' '  ");
       break;
     case '\t':
       printf("  \t   ");
       break;
     case '\n':
       printf("  \n   ");
       break;
     default:
       printf("       ");
       break;
     }
   }
 }


 static void printUChars(const char  *name = "?",
                  const UChar *uch  = kNone,
                  int32_t     len   = -1 )
 {
   int32_t i;

   if( (len == -1) && (uch) ) {
     len = u_strlen(uch);
   }

   printf("% 5s:", name);
   for( i = 0; i <len; i++) {
     printf("%- 6d ", i);
   }
   printf("\n");

   printf("% 5s: ", "uni");
   for( i = 0; i <len; i++) {
     printf("\\u%04X ", (int)uch[i]);
   }
   printf("\n");

   printf("% 5s: ", "ch");
   for( i = 0; i <len; i++) {
     prettyPrintUChar(uch[i]);
   }
   printf("\n");
 }

 static void printBytes(const char  *name = "?",
                  const uint8_t *uch  = (const uint8_t*)"",
                  int32_t     len   = -1 )
 {
   int32_t i;

   if( (len == -1) && (uch) ) {
     len = strlen((const char*)uch);
   }

   printf("% 5s:", name);
   for( i = 0; i <len; i++) {
     printf(" %- 4d", i);
   }
   printf("\n");

   printf("% 5s: ", "uni");
   for( i = 0; i <len; i++) {
     printf("\\x%02X ", 0x00FF & (int)uch[i]);
   }
   printf("\n");

   printf("% 5s: ", "ch");
   for( i = 0; i <len; i++) {
     if(isgraph(uch[i])) {
       printf(" '%c' ", (char)uch[i]);
     } else {
       printf("     ");
     }
   }
   printf("\n");
 }


 /*******************************************************************
   Very simple C sample to compress the word 'Moscow' in Russian, followed
   by an exclamation mark (!)

  */
 UErrorCode compsample_01()
 {
   printf("\n\n==============================================\n"
          "Sample 01: C: simple Unicode compression\n");


   // "Moscva!" in cyrillic letters, to be converted to the KOI8-R
   // Russian code page.
   UChar input[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
                      0x0430, 0x0021, 0x0000 };
   const UChar *source;
   uint8_t buffer[100];
   uint8_t *target;
   int32_t targetLen = sizeof(buffer);
   int32_t sourceLen = u_strlen(input);
   target = buffer;
   source = input;

   UErrorCode status = U_ZERO_ERROR;

   // **************************** START SAMPLE *******************
   // set up the compressor
   UnicodeCompressor comp;
   scsu_init(&comp);

   target = buffer;
   scsu_compress(&comp, &target, target+targetLen, &source,
                 source+sourceLen, &status);
   assert(U_SUCCESS(status));

   fprintf(stderr, "Bytes converted: %d\n", target-buffer);

   // ***************************** END SAMPLE ********************

   // Print it out
   printUChars("src", input);
   printf("\n");
   printBytes("targ", buffer, target - buffer);

   return U_ZERO_ERROR;
 }

 int32_t countOurDataStrings()
 {
   return uloc_countAvailable();
 }

 int32_t getOurDataString(int32_t i, UChar *s, int32_t size)
 {
   UErrorCode status = U_ZERO_ERROR;
   int32_t len;
   const char *loc = uloc_getAvailable(i);
   len =  uloc_getDisplayName(loc,loc,s,size,&status);
   if(U_FAILURE(status))
     {
       return 0;
     }
   return len;
 }

 void compsample_02()
 {
   printf("\n\n==============================================\n"
          "Sample 02: C: bulk Unicode compression\n");

 #define SAMPLE2BUFFERSIZE 1024  /* larger than our largest data */
   UChar input[SAMPLE2BUFFERSIZE];
   const UChar *source;
   uint8_t output[SAMPLE2BUFFERSIZE];
   uint8_t *target;
   int32_t sourceLen;
   int32_t count;
   int32_t i;
   int32_t charsIn = 0,bytesOut = 0;
   FILE *f;

   UErrorCode status = U_ZERO_ERROR;

   f = fopen("outdata2.scsu", "w");

   // **************************** START SAMPLE *******************
   // set up the compressor
   UnicodeCompressor comp;
   scsu_init(&comp);

   count = countOurDataStrings();
   for(i=0;i<count;i++)
     {
       sourceLen = getOurDataString(i, input, SAMPLE2BUFFERSIZE);
       charsIn += sourceLen;
       //      printUChars("src", input);

       /* Now, loop and write out all of the data */
       source = input;
       target = output;

       while(source < (input+sourceLen))
         {
           scsu_compress(&comp, &target, output+SAMPLE2BUFFERSIZE,
                         &source, input+sourceLen, &status);

           if( (status == U_ZERO_ERROR) || (status == U_BUFFER_OVERFLOW_ERROR)) {
             /* got all of it */
 //            printBytes("out", output, target-output); // Uncomment for very verbose output..

             fwrite(output, 1, target-output, f);
             bytesOut += (target-output);

             target = output; /* reset target to beginning */
             if(status == U_ZERO_ERROR) {
               break; /* Got everything! */
             }

             status = U_ZERO_ERROR; /* reset, go get another chunk. */
           }
         }
     }

   fclose(f);
   printf("done[02] - %d uchars in, %d bytes written. \n", charsIn, bytesOut);
   /* at this point, call scsu_reset(&comp) if you want ot write out
      a different data stream with the same compressor. */

   /************************* END SAMPLE ************************/
 }

 int main()
 {
   compsample_01();
   compsample_02();

   return 0;
 }
	/**************************************************************************
	*
	* Copyright (C) 2000, International Business Machines
	* Corporation and others. All Rights Reserved.
	*
	***************************************************************************
	* file name: compsamp.c
	* encoding: ASCII (7-bit)
	*
	* created on: 2000may30
	* created by: Steven R. Loomis
	*
	* Sample code for the ICU compression routines.
	*
	* Note: Nothing special is needed to build this sample. Link with
	* the icu UC and icu I18N libraries.
	*
	* I use 'assert' for error checking, you probably will want
	* something more flexible. '*BEGIN SAMPLE*' and
	* '*END SAMPLE*' mark pieces suitable for stand alone
	* code snippets.
	*
	*/

	#include <stdio.h>
	#include <ctype.h> /* for isspace, etc. */
	#include <assert.h>
	#include <string.h>

	#include "unicode/utypes.h" /* Basic ICU data types */
	#include "unicode/scsu.h"
	#include "unicode/uchar.h"
	#include "unicode/ustring.h" /* some more string fcns*/
	#include "unicode/uloc.h"

	/* Some utility functions */

	static const UChar kNone[] = { 0x0000 };

	/* Print a UChar if possible, in seven characters. */
	static void prettyPrintUChar(UChar c)
	{
	if( (c <= 0x007F) &&
	(isgraph(c)) ) {
	printf(" '%c' ", (char)(0x00FF&c));
	} else if ( c > 0x007F ) {
	char buf[100];
	UErrorCode status = U_ZERO_ERROR;
	UTextOffset o;

	o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 100, &status);
	if(U_SUCCESS(status)) {
	buf[6] = 0;
	printf("%- 7s", buf);
	} else {
	printf("??????? ");
	}
	} else {
	switch((char)(c & 0x007F)) {
	case ' ':
	printf(" ' ' ");
	break;
	case '\t':
	printf(" \t ");
	break;
	case '\n':
	printf(" \n ");
	break;
	default:
	printf(" ");
	break;
	}
	}
	}


	static void printUChars(const char *name = "?",
	const UChar *uch = kNone,
	int32_t len = -1 )
	{
	int32_t i;

	if( (len == -1) && (uch) ) {
	len = u_strlen(uch);
	}

	printf("% 5s:", name);
	for( i = 0; i <len; i++) {
	printf("%- 6d ", i);
	}
	printf("\n");

	printf("% 5s: ", "uni");
	for( i = 0; i <len; i++) {
	printf("\\u%04X ", (int)uch[i]);
	}
	printf("\n");

	printf("% 5s: ", "ch");
	for( i = 0; i <len; i++) {
	prettyPrintUChar(uch[i]);
	}
	printf("\n");
	}

	static void printBytes(const char *name = "?",
	const uint8_t uch = (const uint8_t)"",
	int32_t len = -1 )
	{
	int32_t i;

	if( (len == -1) && (uch) ) {
	len = strlen((const char*)uch);
	}

	printf("% 5s:", name);
	for( i = 0; i <len; i++) {
	printf(" %- 4d", i);
	}
	printf("\n");

	printf("% 5s: ", "uni");
	for( i = 0; i <len; i++) {
	printf("\\x%02X ", 0x00FF & (int)uch[i]);
	}
	printf("\n");

	printf("% 5s: ", "ch");
	for( i = 0; i <len; i++) {
	if(isgraph(uch[i])) {
	printf(" '%c' ", (char)uch[i]);
	} else {
	printf(" ");
	}
	}
	printf("\n");
	}


	/*******************************************************************
	Very simple C sample to compress the word 'Moscow' in Russian, followed
	by an exclamation mark (!)

	*/
	UErrorCode compsample_01()
	{
	printf("\n\n==============================================\n"
	"Sample 01: C: simple Unicode compression\n");


	// "Moscva!" in cyrillic letters, to be converted to the KOI8-R
	// Russian code page.
	UChar input[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
	0x0430, 0x0021, 0x0000 };
	const UChar *source;
	uint8_t buffer[100];
	uint8_t *target;
	int32_t targetLen = sizeof(buffer);
	int32_t sourceLen = u_strlen(input);
	target = buffer;
	source = input;

	UErrorCode status = U_ZERO_ERROR;

	// ************************** START SAMPLE *****************
	// set up the compressor
	UnicodeCompressor comp;
	scsu_init(&comp);

	target = buffer;
	scsu_compress(&comp, &target, target+targetLen, &source,
	source+sourceLen, &status);
	assert(U_SUCCESS(status));

	fprintf(stderr, "Bytes converted: %d\n", target-buffer);

	// *************************** END SAMPLE ******************

	// Print it out
	printUChars("src", input);
	printf("\n");
	printBytes("targ", buffer, target - buffer);

	return U_ZERO_ERROR;
	}

	int32_t countOurDataStrings()
	{
	return uloc_countAvailable();
	}

	int32_t getOurDataString(int32_t i, UChar *s, int32_t size)
	{
	UErrorCode status = U_ZERO_ERROR;
	int32_t len;
	const char *loc = uloc_getAvailable(i);
	len = uloc_getDisplayName(loc,loc,s,size,&status);
	if(U_FAILURE(status))
	{
	return 0;
	}
	return len;
	}

	void compsample_02()
	{
	printf("\n\n==============================================\n"
	"Sample 02: C: bulk Unicode compression\n");

	#define SAMPLE2BUFFERSIZE 1024 /* larger than our largest data */
	UChar input[SAMPLE2BUFFERSIZE];
	const UChar *source;
	uint8_t output[SAMPLE2BUFFERSIZE];
	uint8_t *target;
	int32_t sourceLen;
	int32_t count;
	int32_t i;
	int32_t charsIn = 0,bytesOut = 0;
	FILE *f;

	UErrorCode status = U_ZERO_ERROR;

	f = fopen("outdata2.scsu", "w");

	// ************************** START SAMPLE *****************
	// set up the compressor
	UnicodeCompressor comp;
	scsu_init(&comp);

	count = countOurDataStrings();
	for(i=0;i<count;i++)
	{
	sourceLen = getOurDataString(i, input, SAMPLE2BUFFERSIZE);
	charsIn += sourceLen;
	// printUChars("src", input);

	/* Now, loop and write out all of the data */
	source = input;
	target = output;

	while(source < (input+sourceLen))
	{
	scsu_compress(&comp, &target, output+SAMPLE2BUFFERSIZE,
	&source, input+sourceLen, &status);

	if( (status == U_ZERO_ERROR) \|\| (status == U_BUFFER_OVERFLOW_ERROR)) {
	/* got all of it */
	// printBytes("out", output, target-output); // Uncomment for very verbose output..

	fwrite(output, 1, target-output, f);
	bytesOut += (target-output);

	target = output; /* reset target to beginning */
	if(status == U_ZERO_ERROR) {
	break; /* Got everything! */
	}

	status = U_ZERO_ERROR; /* reset, go get another chunk. */
	}
	}
	}

	fclose(f);
	printf("done[02] - %d uchars in, %d bytes written. \n", charsIn, bytesOut);
	/* at this point, call scsu_reset(&comp) if you want ot write out
	a different data stream with the same compressor. */

	/*********************** END SAMPLE **********************/
	}

	int main()
	{
	compsample_01();
	compsample_02();

	return 0;
	}