blob: 788c6f90a30cd9ecc435b1d2d8db4c2e5639fb12 [file] [log] [blame]
/**************************************************************************
*
* Copyright (C) 2000, International Business Machines
* Corporation and others. All Rights Reserved.
*
***************************************************************************
* file name: compsamp.c
* encoding: ASCII (7-bit)
*
* created on: 2000may30
* created by: Steven R. Loomis
*
* Sample code for the ICU compression routines.
*
* Note: Nothing special is needed to build this sample. Link with
* the icu UC and icu I18N libraries.
*
* I use 'assert' for error checking, you probably will want
* something more flexible. '***BEGIN SAMPLE***' and
* '***END SAMPLE***' mark pieces suitable for stand alone
* code snippets.
*
*/
#include <stdio.h>
#include <ctype.h> /* for isspace, etc. */
#include <assert.h>
#include <string.h>
#include "unicode/utypes.h" /* Basic ICU data types */
#include "unicode/scsu.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h" /* some more string fcns*/
#include "unicode/uloc.h"
/* Some utility functions */
static const UChar kNone[] = { 0x0000 };
/* Print a UChar if possible, in seven characters. */
static void prettyPrintUChar(UChar c)
{
if( (c <= 0x007F) &&
(isgraph(c)) ) {
printf(" '%c' ", (char)(0x00FF&c));
} else if ( c > 0x007F ) {
char buf[100];
UErrorCode status = U_ZERO_ERROR;
UTextOffset o;
o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 100, &status);
if(U_SUCCESS(status)) {
buf[6] = 0;
printf("%- 7s", buf);
} else {
printf("??????? ");
}
} else {
switch((char)(c & 0x007F)) {
case ' ':
printf(" ' ' ");
break;
case '\t':
printf(" \t ");
break;
case '\n':
printf(" \n ");
break;
default:
printf(" ");
break;
}
}
}
static void printUChars(const char *name = "?",
const UChar *uch = kNone,
int32_t len = -1 )
{
int32_t i;
if( (len == -1) && (uch) ) {
len = u_strlen(uch);
}
printf("% 5s:", name);
for( i = 0; i <len; i++) {
printf("%- 6d ", i);
}
printf("\n");
printf("% 5s: ", "uni");
for( i = 0; i <len; i++) {
printf("\\u%04X ", (int)uch[i]);
}
printf("\n");
printf("% 5s: ", "ch");
for( i = 0; i <len; i++) {
prettyPrintUChar(uch[i]);
}
printf("\n");
}
static void printBytes(const char *name = "?",
const uint8_t *uch = (const uint8_t*)"",
int32_t len = -1 )
{
int32_t i;
if( (len == -1) && (uch) ) {
len = strlen((const char*)uch);
}
printf("% 5s:", name);
for( i = 0; i <len; i++) {
printf(" %- 4d", i);
}
printf("\n");
printf("% 5s: ", "uni");
for( i = 0; i <len; i++) {
printf("\\x%02X ", 0x00FF & (int)uch[i]);
}
printf("\n");
printf("% 5s: ", "ch");
for( i = 0; i <len; i++) {
if(isgraph(uch[i])) {
printf(" '%c' ", (char)uch[i]);
} else {
printf(" ");
}
}
printf("\n");
}
/*******************************************************************
Very simple C sample to compress the word 'Moscow' in Russian, followed
by an exclamation mark (!)
*/
UErrorCode compsample_01()
{
printf("\n\n==============================================\n"
"Sample 01: C: simple Unicode compression\n");
// "Moscva!" in cyrillic letters, to be converted to the KOI8-R
// Russian code page.
UChar input[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
0x0430, 0x0021, 0x0000 };
const UChar *source;
uint8_t buffer[100];
uint8_t *target;
int32_t targetLen = sizeof(buffer);
int32_t sourceLen = u_strlen(input);
target = buffer;
source = input;
UErrorCode status = U_ZERO_ERROR;
// **************************** START SAMPLE *******************
// set up the compressor
UnicodeCompressor comp;
scsu_init(&comp);
target = buffer;
scsu_compress(&comp, &target, target+targetLen, &source,
source+sourceLen, &status);
assert(U_SUCCESS(status));
fprintf(stderr, "Bytes converted: %d\n", target-buffer);
// ***************************** END SAMPLE ********************
// Print it out
printUChars("src", input);
printf("\n");
printBytes("targ", buffer, target - buffer);
return U_ZERO_ERROR;
}
int32_t countOurDataStrings()
{
return uloc_countAvailable();
}
int32_t getOurDataString(int32_t i, UChar *s, int32_t size)
{
UErrorCode status = U_ZERO_ERROR;
int32_t len;
const char *loc = uloc_getAvailable(i);
len = uloc_getDisplayName(loc,loc,s,size,&status);
if(U_FAILURE(status))
{
return 0;
}
return len;
}
void compsample_02()
{
printf("\n\n==============================================\n"
"Sample 02: C: bulk Unicode compression\n");
#define SAMPLE2BUFFERSIZE 1024 /* larger than our largest data */
UChar input[SAMPLE2BUFFERSIZE];
const UChar *source;
uint8_t output[SAMPLE2BUFFERSIZE];
uint8_t *target;
int32_t sourceLen;
int32_t count;
int32_t i;
int32_t charsIn = 0,bytesOut = 0;
FILE *f;
UErrorCode status = U_ZERO_ERROR;
f = fopen("outdata2.scsu", "w");
// **************************** START SAMPLE *******************
// set up the compressor
UnicodeCompressor comp;
scsu_init(&comp);
count = countOurDataStrings();
for(i=0;i<count;i++)
{
sourceLen = getOurDataString(i, input, SAMPLE2BUFFERSIZE);
charsIn += sourceLen;
// printUChars("src", input);
/* Now, loop and write out all of the data */
source = input;
target = output;
while(source < (input+sourceLen))
{
scsu_compress(&comp, &target, output+SAMPLE2BUFFERSIZE,
&source, input+sourceLen, &status);
if( (status == U_ZERO_ERROR) || (status == U_BUFFER_OVERFLOW_ERROR)) {
/* got all of it */
// printBytes("out", output, target-output); // Uncomment for very verbose output..
fwrite(output, 1, target-output, f);
bytesOut += (target-output);
target = output; /* reset target to beginning */
if(status == U_ZERO_ERROR) {
break; /* Got everything! */
}
status = U_ZERO_ERROR; /* reset, go get another chunk. */
}
}
}
fclose(f);
printf("done[02] - %d uchars in, %d bytes written. \n", charsIn, bytesOut);
/* at this point, call scsu_reset(&comp) if you want ot write out
a different data stream with the same compressor. */
/************************* END SAMPLE ************************/
}
int main()
{
compsample_01();
compsample_02();
return 0;
}