blob: d77e3f537cc1720934585036c3f5c9cb8009ac4c [file] [log] [blame]
/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/TestUtility.java,v $
* $Date: 2005/10/11 19:39:15 $
* $Revision: 1.4 $
*
*******************************************************************************
*/
package com.ibm.text.utility;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import java.text.*;
import java.io.*;
import java.nio.Buffer;
import com.ibm.icu.dev.test.util.DataInputCompressor;
import com.ibm.icu.dev.test.util.DataOutputCompressor;
import com.ibm.icu.dev.test.util.ICUPropertyFactory;
import com.ibm.icu.dev.test.util.UnicodeLabel;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.dev.test.util.UnicodePropertySource;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.text.UCD.Default;
public class TestUtility {
/*
static public class MyEnum extends EnumBase {
public static MyEnum
ZEROED = (MyEnum) makeNext(myEnum.getClass()),
SHIFTED = (MyEnum) makeNext(),
NON_IGNORABLE = (MyEnum) makeNext(),
FIRST_ENUM = ZEROED,
LAST_ENUM = NON_IGNORABLE;
public MyEnum next(int value) {
return (MyEnum) internalNext(value);
}
protected MyEnum() {}
}
*/
static final boolean USE_FILE = true;
static final boolean DEBUG = false;
static public void main(String[] args) throws Exception {
tryFileUnicodeProperty();
check();
int iterations = 1;
//testStreamCompressor();
UnicodeMap umap = new UnicodeMap();
umap.put(0,"abcdefg");
if (false) for (int i = 0; i < 256; ++i) {
umap.put(i, String.valueOf(i&0xF0));
}
int total = testUnicodeMapSerialization(1, iterations, "dummy", umap);
//if (true) return;
//UnicodeLabel ul;
ICUPropertyFactory p = ICUPropertyFactory.make();
total = 0;
BreakIterator bk = BreakIterator.getWordInstance(Locale.ENGLISH);
Matcher nameMatch = Pattern.compile("Name").matcher("");
UnicodeProperty gc = p.getProperty("General_Category");
UnicodeSet checkSet = gc.getSet("Cn").addAll(gc.getSet("Co")).addAll(gc.getSet("Cs")).complement();
UnicodeSetIterator checkSetIterator = new UnicodeSetIterator(checkSet);
UnicodeProperty hangulSyllableType = p.getProperty("Hangul_Syllable_Type");
UnicodeSet hangulSyllable = hangulSyllableType.getSet("LVT_Syllable").addAll(hangulSyllableType.getSet("LV_Syllable"));
for (Iterator pnames = p.getAvailableNames().iterator(); pnames
.hasNext();) {
String pname = (String) pnames.next();
if (!nameMatch.reset(pname).matches()) continue;
System.out.println();
UnicodeProperty up = p.getProperty(pname);
int ptype = up.getType();
System.out.print("Name:\t" + pname + "\tType:\t" + up.getTypeName(ptype));
if (up.isType(up.STRING_MASK)) {
boolean excludeHangul = pname.startsWith("isNF");
umap = new UnicodeMap();
checkSetIterator.reset();
while (checkSetIterator.next()) {
int i = checkSetIterator.codepoint;
if (excludeHangul && hangulSyllable.contains(i)) continue;
String value = up.getValue(i);
if (equals(i, value)) continue;
umap.put(i, value);
//System.out.println("Adding " + Utility.hex(i) + ", " + Utility.hex(value));
}
} else {
UnicodeProperty sampleProp = p.getProperty(pname);
umap = sampleProp.getUnicodeMap();
if (pname.equals("Name")) {
umap = fixNameMap(bk, umap);
}
}
total = testUnicodeMapSerialization(iterations, total, pname, umap);
}
String[] hanProps = {"kIICore", "kRSUnicode"};
for (int i = 0; i < hanProps.length; ++i) {
String pname = hanProps[i];
if (!nameMatch.reset(pname).matches()) continue;
testHanProp(iterations, total, pname, "Han");
}
System.out.println();
System.out.println("Done");
}
static void check() throws IOException, ClassNotFoundException {
UnicodeMap m = new UnicodeMap();
m.put(1,"abc");
ByteArrayOutputStream out = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(out);
oos.writeBoolean(true);
oos.writeUTF("abcdefg");
oos.writeObject(m);
oos.close();
int size = out.size();
byte[] buffer = out.toByteArray();
System.out.println(showBuffer(buffer, size));
InputStream in = new ByteArrayInputStream(buffer, 0, (int)size);
ObjectInputStream ois = new ObjectInputStream(in);
System.out.println(ois.readBoolean());
System.out.println(ois.readUTF());
System.out.println(ois.readObject());
ois.close();
}
/**
*
*/
private static boolean equals(int i, String value) {
int len = value.length();
if (len < 0 || len > 2) return false;
if (len == 1) return i == value.charAt(0);
if (i <= 0xFFFF) return false;
return i == UTF16.charAt(value,0);
}
/**
*
*/
private static void testHanProp(int iterations, int total, String pname, String type) throws IOException, ClassNotFoundException {
System.out.println();
UnicodeMap umap = Default.ucd().getHanValue(pname);
System.out.println(umap);
umap.setMissing("na");
System.out.print("Name:\t" + pname + "\tType:\t" + type);
total = testUnicodeMapSerialization(iterations, total, pname, umap);
}
static String outdircore = "C:\\DATA\\bin\\UCD_Data";
static String outdir = outdircore + "4.1.0\\";
/**
* @param pname
*
*/
private static int testUnicodeMapSerialization(int iterations, int total, String pname, UnicodeMap umap) throws IOException, ClassNotFoundException {
System.out.print("\tValue Count:\t" + umap.getAvailableValues().size());
String filename = outdir + pname + ".bin";
OutputStream out;
ByteArrayOutputStream baout = null;
if (USE_FILE) {
out = new FileOutputStream(filename);
} else {
out = baout = new ByteArrayOutputStream();
}
out = new GZIPOutputStream(out);
ObjectOutputStream oos = new ObjectOutputStream(out);
//Random rand = new Random();
/* if (false) {
oos.writeObject(umap);
oos.close();
buffer = baout.toByteArray();
in = new ByteArrayInputStream(buffer, 0, baout.size());
ois = new ObjectInputStream(in);
reverseMap = (UnicodeMap) ois.readObject();
}
*/
// UnicodeMap.StreamCompressor sc = new UnicodeMap.StreamCompressor();
// int test = (int)Math.abs(rand.nextGaussian()*100000);
// System.out.print(Integer.toString(test, 16).toUpperCase());
// sc.writeInt(out, test);
// out.close();
//oos.writeBoolean(true);
//oos.writeUTF("abcdefg");
oos.writeObject(umap);
oos.close();
long size;
byte[] buffer;
if (USE_FILE) {
size = new File(filename).length();
} else {
size = baout.size();
buffer = baout.toByteArray();
if (DEBUG) System.out.println(showBuffer(buffer, size));
}
System.out.print("\t"+"Size:\t" + size);
// only measure read time
UnicodeMap reverseMap = null;
long start = System.currentTimeMillis();
for (int i = iterations; i > 0; --i) {
InputStream in;
if (USE_FILE) {
in = new FileInputStream(filename);
} else {
in = new ByteArrayInputStream(buffer, 0, (int)size);
}
in = new GZIPInputStream(in);
// int x = sc.readInt(in);
// if (x != test) System.out.println("Failure");
// System.out.println("\t=> " + Integer.toString(x, 16).toUpperCase());
ObjectInputStream ois = new ObjectInputStream(in);
//System.out.println(ois.readBoolean());
//System.out.println(ois.readUTF());
try {
reverseMap = (UnicodeMap) ois.readObject();
} catch (java.io.OptionalDataException e1) {
System.out.println(e1.eof + "\t" + e1.length);
// TODO Auto-generated catch block
e1.printStackTrace();
}
ois.close();
}
long end = System.currentTimeMillis();
if (!reverseMap.equals(umap)) {
System.out.println("Failed roundtrip");
for (int i = 0; i <= 0x10FFFF; ++i) {
String main = (String) umap.getValue(i);
String rev = (String) reverseMap.getValue(i);
if (UnicodeMap.areEqual(main, rev))
continue;
System.out.println(Utility.hex(i) + "\t'" + main + "',\t'"
+ rev + "'");
}
}
//out.toByteArray();
total += size;
System.out.print("\tTime:\t" + (end - start) / (iterations * 1.0)
+ "\tmsecs (raw:\t" + ((end - start) / 1000.0) + "\tsecs)");
/* with Vanilla Serialization
* Size: 24131
* Time: 1.9488 msecs (raw: 9.744 secs)
* With my serialization
* Size: 19353
* Time: 0.8652 msecs (raw: 4.326 secs)
* With my serialization, and compression of ints
* Size: 8602
* Time: 2.784 msecs (raw: 1.392 secs)
* With delta encoding
* Size: 5226
* Time: 1.924 msecs (raw: 0.962 secs)
* Name:
* Size: 776926
* Time: 180.3 msecs (raw: 1.803 secs)
*/
return total;
}
/**
*
*/
private static String showBuffer(byte[] buffer, long size) {
StringBuffer result = new StringBuffer();
for (int j = 0; j < size; ++j) {
if (j != 0) result.append(' ');
result.append(Utility.hex(buffer[j]&0xFF,2));
}
return result.toString();
}
/**
*
*/
private static void testStreamCompressor() throws IOException {
Object[] tests = {
UTF16.valueOf(0x10FFFF),"\u1234", "abc",
new Long(-3), new Long(12345),
new Short(Short.MAX_VALUE), new Short(Short.MIN_VALUE),
new Integer(Integer.MAX_VALUE), new Integer(Integer.MIN_VALUE),
new Long(Long.MIN_VALUE), new Long(Long.MAX_VALUE)};
for (int i = 0; i < tests.length; ++i) {
Object source = tests[i];
ByteArrayOutputStream out = new ByteArrayOutputStream(100);
ObjectOutputStream out2 = new ObjectOutputStream(out);
ByteArrayInputStream in;
ObjectInputStream ois;
byte[] buffer;
DataOutputCompressor sc = new DataOutputCompressor(out2);
long y = 0;
if (source instanceof String) {
sc.writeUTF((String)source);
} else {
y = ((Number)source).longValue();
sc.writeLong(y);
}
out2.close();
buffer = out.toByteArray();
showBytes(buffer, out.size());
System.out.println();
in = new ByteArrayInputStream(buffer, 0, out.size());
ObjectInputStream in2 = new ObjectInputStream(in);
DataInputCompressor isc = new DataInputCompressor(in2);
boolean success = false;
Object result;
boolean isString = source instanceof String;
long x = 0;
if (isString) {
result = isc.readUTF();
System.out.println(i + "\t" + source
+ "\t" + result
+ (source.equals(result) ? "\tSuccess" : "\tBitter Failure"));
} else {
x = isc.readLong();
result = new Long(x);
System.out.println(i + "\t" + y
+ x
+ "\t" + Utility.hex(y)
+ "\t" + Utility.hex(x)
+ (x == y ? "\tSuccess" : "\tBitter Failure"));
}
in2.close();
}
}
/**
*
*/
private static void showBytes(byte[] buffer, int len) {
for (int i = 0; i < len; ++i) {
System.out.print(Utility.hex(buffer[i]&0xFF,2) + " ");
}
}
/**
*
*/
private static UnicodeMap fixNameMap(BreakIterator bk, UnicodeMap umap) {
UnicodeMap temp = new UnicodeMap();
Counter counter = new Counter();
for (int i = 0; i < 0x10FFFF; ++i) {
String name = (String) umap.getValue(i);
if (name == null)
continue;
if (name.startsWith("CJK UNIFIED IDEOGRAPH-"))
name = "*";
else if (name.startsWith("CJK COMPATIBILITY IDEOGRAPH-"))
name = "#";
else if (name.startsWith("HANGUL SYLLABLE ")) name = "@";
bk.setText(name);
int start = 0;
while (true) {
int end = bk.next();
if (end == bk.DONE)
break;
String word = name.substring(start, end);
counter.add(word, Math.max(0, word.length() - 2));
start = end;
}
temp.put(i, name);
}
if (false) {
Map m = counter.getSortedByCount();
int count = 0;
int running = 0;
for (Iterator it = m.keySet().iterator(); it.hasNext();) {
Counter.RWInteger c = (Counter.RWInteger) it.next();
String value = (String) m.get(c);
running += c.value;
System.out.println(count++ + "\t" + c + "\t" + running
+ "\t" + value);
}
for (UnicodeMap.MapIterator it2 = new UnicodeMap.MapIterator(
temp); it2.nextRange();) {
System.out.println(Utility.hex(it2.codepoint) + "\t"
+ Utility.hex(it2.codepointEnd) + "\t"
+ it2.value);
}
}
umap = temp;
return umap;
}
/**
*
*/
private static void tryFileUnicodeProperty() {
UnicodeProperty.Factory factory = FileUnicodeProperty.Factory.make("4.1.0");
System.out.println(factory.getAvailableNames());
UnicodeProperty prop = factory.getProperty("White_Space");
System.out.println(prop.getUnicodeMap());
prop = factory.getProperty("kRSUnicode");
System.out.println();
System.out.println(prop.getUnicodeMap());
}
public static class FileUnicodeProperty extends UnicodeProperty {
private File file;
private String version;
private UnicodeMap map;
private FileUnicodeProperty(File file, String version) {
this.file = file;
this.version = version;
String base = file.getName();
setName(base.substring(0, base.length()-4)); // subtract .bin
}
public static class Factory extends UnicodeProperty.Factory {
private Factory() {}
public static Factory make(String version) {
Factory result = new Factory();
File f = new File(outdircore + version + "\\");
File[] files = f.listFiles();
for (int i = 0; i < files.length; ++i) {
result.add(new FileUnicodeProperty(files[i], version));
}
return result;
}
}
protected List _getAvailableValues(List result) {
if (map == null) make();
return (List) map.getAvailableValues(result);
}
protected String _getVersion() {
return version;
}
/* (non-Javadoc)
* @see com.ibm.icu.dev.test.util.UnicodeProperty#_getValue(int)
*/
protected String _getValue(int codepoint) {
if (map == null) make();
return (String)map.getValue(codepoint);
}
/**
*
*/
private void make() {
try {
InputStream in = new FileInputStream(file.getCanonicalPath());
ObjectInputStream ois = new ObjectInputStream(in);
map = (UnicodeMap) ois.readObject();
ois.close();
} catch (Exception e) {
throw (InternalError)new InternalError("Can't create property").initCause(e);
}
}
protected List _getNameAliases(List result) {
result.add(getName());
return result;
}
protected List _getValueAliases(String valueAlias, List result) {
return result;
}
}
}