unicodetools/com/ibm/text/UCD/CheckCollator.java - external/github.com/unicode-org/icu - Git at Google

 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CheckCollator.java,v $
 * $Date: 2002/08/09 23:56:24 $
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */

 // http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html

 package com.ibm.text.UCD;

 import java.util.*;
 import java.io.*;
 import java.text.NumberFormat;

 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;

 /**
  * This is a quick and dirty program to get some idea of collation performance, comparing old Java to new stuff.
  */
 abstract public class CheckCollator {
     static final String PREFIX = "C:\\ICUInternal\\icu4c\\collation-perf-data\\TestNames_";
     static final boolean DO_RAW = false;

     static final NumberFormat nf = NumberFormat.getInstance();
     static final NumberFormat percent = NumberFormat.getPercentInstance();
     static {
         nf.setMaximumFractionDigits(2);
     }

     public static void main(String[] args) throws IOException {

         // later, drive off of args

         // choices are: Asian, Chinese, Japanese, Japanese_h, Japanese_k, Korean, Latin, Russian, Thai
         //test(Locale.KOREAN, "Korean");
         test(Locale.ENGLISH, "Latin");
         test(Locale.FRENCH, "Latin");
         test(Locale.JAPANESE, "Japanese");
     }

     public static void test(Locale loc, String name) throws IOException {

         System.out.println();
         System.out.println("Testing " + loc.getDisplayName() + ", file: " + name);
         System.out.println();

         // get test data

         String fileName = PREFIX + name + ".txt";

         FileInputStream fis = new FileInputStream(fileName);
         InputStreamReader isr = new InputStreamReader(fis, "UnicodeLittle");
         BufferedReader br = new BufferedReader(isr, 32*1024);

         int counter = 0;

         ArrayList list = new ArrayList();
         while (true) {
             String line = Utility.readDataLine(br);
             if (line == null) break;
             if (line.length() == 0) continue;
             Utility.dot(counter++);
             list.add(line);
         }
         System.out.println("Read " + counter + " lines in file");

         int limit = 800; // put a limit on it to save time

         // pump it up if there aren't very many
         while (list.size() < limit) {
             list.addAll(list);
         }

         int size = list.size();


         // later, adjust these so we always get a reasonble number of tries

         int extraIterations = 200;
         if (size > limit) size = limit;

         String[] tests = new String [size];

         for (int i = 0; i < size; ++i) {
             tests[i] = (String) list.get(i);
         }

         // get collators

         com.ibm.icu.text.Collator newCol = com.ibm.icu.text.Collator.getInstance(loc);
         java.text.Collator oldCol = java.text.Collator.getInstance(loc);


         double startTime, endTime;
         double delta, oldDelta;
         String probe;


         // load classes at least once before starting

         newCol.compare("a", "b");
         oldCol.compare("a", "b");

         // ================================================
         // check sort key size

         int stringSize = 0, newSize = 0, oldSize = 0;

         for (int i = 0; i < size; ++i) {
             stringSize += tests[i].length() * 2;
             byte[] newKey = newCol.getCollationKey(tests[i]).toByteArray();
             newSize += newKey.length;
             byte[] oldKey = oldCol.getCollationKey(tests[i]).toByteArray();
             oldSize += oldKey.length;
         }
         delta = stringSize/(size + 0.0);
         System.out.println("string size: " + nf.format(delta) + " bytes per key");
         System.out.println();

         delta = oldDelta = (oldSize/(size + 0.0));
         System.out.println("old sortkey size: " + nf.format(delta) + " bytes per key ");
         delta = (newSize/(size + 0.0));
         System.out.println("new sortkey size: " + nf.format(delta) + " bytes per key " + percent.format(delta/oldDelta));
         System.out.println();

         // ================================================
         // Sort Key: old time

         // get overhead time
         counter = 0;
         startTime = System.currentTimeMillis();

         for (int i = 0; i < size; ++i) {
             for (int j = 0; j < size; ++j) {
                 counter++;
             }
         }
         endTime = System.currentTimeMillis();
         double overhead = (1000*(endTime - startTime) / counter);
         System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");

         counter = 0;
         startTime = System.currentTimeMillis();

         for (int i = 0; i < size; ++i) {
             probe = tests[i];
             for (int k = 0; k < extraIterations; ++k) {
                 oldCol.getCollationKey(probe);
                 counter++;
             }
         }
         endTime = System.currentTimeMillis();
         oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
         System.out.println("Old sort key time: " + nf.format(delta)
             + " micros (" + counter + " iterations)");

         // Sort Key: new time

         counter = 0;
         startTime = System.currentTimeMillis();

         for (int i = 0; i < size; ++i) {
             probe = tests[i];
             for (int k = 0; k < extraIterations; ++k) {
                 newCol.getCollationKey(probe);
                 counter++;
             }
         }
         endTime = System.currentTimeMillis();
         delta = (1000*(endTime - startTime) / counter) - overhead;
         System.out.println("New sort key time: " + nf.format(delta)
             + " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
         System.out.println();

         // ================================================
         // Raw Compare

         if (DO_RAW) {
             // get overhead time
             counter = 0;
             startTime = System.currentTimeMillis();
             int opt = 0; // to keep the compiler from optimizing out

             for (int i = 0; i < size; ++i) {
                 probe = tests[i];
                 for (int j = 0; j < size; ++j) {
                     opt ^= probe.compareTo(tests[j]);
                     counter++;
                 }
             }
             endTime = System.currentTimeMillis();
             overhead = (1000*(endTime - startTime) / counter);
             System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");

             // Raw Compare: old time

             counter = 0;
             startTime = System.currentTimeMillis();

             for (int i = 0; i < size; ++i) {
                 probe = tests[i];
                 for (int j = 0; j < size; ++j) {
                     opt ^= oldCol.compare(probe, tests[j]);
                     counter++;
                 }
             }
             endTime = System.currentTimeMillis();
             oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
             System.out.println("Old raw compare time: " + nf.format(delta)
                 + " micros (" + counter + " iterations)");

             // Raw Compare: new time

             counter = 0;
             startTime = System.currentTimeMillis();

             for (int i = 0; i < size; ++i) {
                 probe = tests[i];
                 for (int j = 0; j < size; ++j) {
                     opt ^= newCol.compare(probe, tests[j]);
                     counter++;
                 }
             }
             endTime = System.currentTimeMillis();
             delta = (1000*(endTime - startTime) / counter) - overhead;
             System.out.println("New raw compare time: " + nf.format(delta)
                 + " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
             System.out.println();
         }

         // ================================================
         // Binary Search
         // note: I don't worry about getting the binary search precisely right, since I just want to
         // see which strings would get compared.

         // overhead

         int iterations = (size * extraIterations);
         startTime = System.currentTimeMillis();
         Arrays.sort(tests);
         int opt2 = 0; // keep from optimizing out

         for (int i = 0; i < size; ++i) {
             probe = tests[i];
             for (int k = 0; k < extraIterations; ++k) {
                 opt2 ^= Arrays.binarySearch(tests, probe);
             }
         }
         endTime = System.currentTimeMillis();
         overhead = delta = (1000*(endTime - startTime) / iterations);
         System.out.println("Overhead: " + nf.format(delta)
             + " micros (" + iterations + " iterations)");

         // old time

         startTime = System.currentTimeMillis();
         Arrays.sort(tests, oldCol);

         for (int i = 0; i < size; ++i) {
             probe = tests[i];
             for (int k = 0; k < extraIterations; ++k) {
                 opt2 ^= Arrays.binarySearch(tests, probe, oldCol);
             }
         }
         endTime = System.currentTimeMillis();
         oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
         System.out.println("Old binary search time: " + nf.format(delta)
             + " micros (" + iterations + " iterations)");


         // new time

         Arrays.sort(tests, newCol);

         startTime = System.currentTimeMillis();

         for (int i = 0; i < size; ++i) {
             probe = tests[i];
             for (int k = 0; k < extraIterations; ++k) {
                 opt2 ^= Arrays.binarySearch(tests, probe, newCol);
             }
         }
         endTime = System.currentTimeMillis();
         delta = (1000*(endTime - startTime) / iterations) - overhead;
         System.out.println("New binary search time: " + nf.format(delta)
             + " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));
         System.out.println();

         // ================================================
         // Sort

         String[] sortTests = (String[]) tests.clone();
         extraIterations = 5;
         iterations = (size * extraIterations);

         // overhead

         startTime = System.currentTimeMillis();

         for (int i = 0; i < size; ++i) {
             for (int k = 0; k < extraIterations; ++k) {
                 System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
                 Arrays.sort(sortTests);
             }
         }
         endTime = System.currentTimeMillis();
         overhead = delta = (1000*(endTime - startTime) / iterations);
         System.out.println("overhead: " + nf.format(delta)
             + " micros (" + iterations + " iterations)");

         // old time

         startTime = System.currentTimeMillis();

         for (int i = 0; i < size; ++i) {
             for (int k = 0; k < extraIterations; ++k) {
                 System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
                 Arrays.sort(sortTests, oldCol);
             }
         }
         endTime = System.currentTimeMillis();
         oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
         System.out.println("Old sort time: " + nf.format(delta)
             + " micros (" + iterations + " iterations)");

         // new time

         startTime = System.currentTimeMillis();

         for (int i = 0; i < size; ++i) {
             for (int k = 0; k < extraIterations; ++k) {
                 System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
                 Arrays.sort(sortTests, newCol);
             }
         }
         endTime = System.currentTimeMillis();
         delta = (1000*(endTime - startTime) / iterations) - overhead;
         System.out.println("New sort time: " + nf.format(delta)
             + " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));

     }
 }
	/**
	*******************************************************************************
	* Copyright (C) 1996-2001, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*
	* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CheckCollator.java,v $
	* $Date: 2002/08/09 23:56:24 $
	* $Revision: 1.2 $
	*
	*******************************************************************************
	*/

	// http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html

	package com.ibm.text.UCD;

	import java.util.*;
	import java.io.*;
	import java.text.NumberFormat;

	import com.ibm.text.utility.*;
	import com.ibm.icu.text.UTF16;
	import com.ibm.icu.text.UnicodeSet;

	/**
	* This is a quick and dirty program to get some idea of collation performance, comparing old Java to new stuff.
	*/
	abstract public class CheckCollator {
	static final String PREFIX = "C:\\ICUInternal\\icu4c\\collation-perf-data\\TestNames_";
	static final boolean DO_RAW = false;

	static final NumberFormat nf = NumberFormat.getInstance();
	static final NumberFormat percent = NumberFormat.getPercentInstance();
	static {
	nf.setMaximumFractionDigits(2);
	}

	public static void main(String[] args) throws IOException {

	// later, drive off of args

	// choices are: Asian, Chinese, Japanese, Japanese_h, Japanese_k, Korean, Latin, Russian, Thai
	//test(Locale.KOREAN, "Korean");
	test(Locale.ENGLISH, "Latin");
	test(Locale.FRENCH, "Latin");
	test(Locale.JAPANESE, "Japanese");
	}

	public static void test(Locale loc, String name) throws IOException {

	System.out.println();
	System.out.println("Testing " + loc.getDisplayName() + ", file: " + name);
	System.out.println();

	// get test data

	String fileName = PREFIX + name + ".txt";

	FileInputStream fis = new FileInputStream(fileName);
	InputStreamReader isr = new InputStreamReader(fis, "UnicodeLittle");
	BufferedReader br = new BufferedReader(isr, 32*1024);

	int counter = 0;

	ArrayList list = new ArrayList();
	while (true) {
	String line = Utility.readDataLine(br);
	if (line == null) break;
	if (line.length() == 0) continue;
	Utility.dot(counter++);
	list.add(line);
	}
	System.out.println("Read " + counter + " lines in file");

	int limit = 800; // put a limit on it to save time

	// pump it up if there aren't very many
	while (list.size() < limit) {
	list.addAll(list);
	}

	int size = list.size();


	// later, adjust these so we always get a reasonble number of tries

	int extraIterations = 200;
	if (size > limit) size = limit;

	String[] tests = new String [size];

	for (int i = 0; i < size; ++i) {
	tests[i] = (String) list.get(i);
	}

	// get collators

	com.ibm.icu.text.Collator newCol = com.ibm.icu.text.Collator.getInstance(loc);
	java.text.Collator oldCol = java.text.Collator.getInstance(loc);


	double startTime, endTime;
	double delta, oldDelta;
	String probe;


	// load classes at least once before starting

	newCol.compare("a", "b");
	oldCol.compare("a", "b");

	// ================================================
	// check sort key size

	int stringSize = 0, newSize = 0, oldSize = 0;

	for (int i = 0; i < size; ++i) {
	stringSize += tests[i].length() * 2;
	byte[] newKey = newCol.getCollationKey(tests[i]).toByteArray();
	newSize += newKey.length;
	byte[] oldKey = oldCol.getCollationKey(tests[i]).toByteArray();
	oldSize += oldKey.length;
	}
	delta = stringSize/(size + 0.0);
	System.out.println("string size: " + nf.format(delta) + " bytes per key");
	System.out.println();

	delta = oldDelta = (oldSize/(size + 0.0));
	System.out.println("old sortkey size: " + nf.format(delta) + " bytes per key ");
	delta = (newSize/(size + 0.0));
	System.out.println("new sortkey size: " + nf.format(delta) + " bytes per key " + percent.format(delta/oldDelta));
	System.out.println();

	// ================================================
	// Sort Key: old time

	// get overhead time
	counter = 0;
	startTime = System.currentTimeMillis();

	for (int i = 0; i < size; ++i) {
	for (int j = 0; j < size; ++j) {
	counter++;
	}
	}
	endTime = System.currentTimeMillis();
	double overhead = (1000*(endTime - startTime) / counter);
	System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");

	counter = 0;
	startTime = System.currentTimeMillis();

	for (int i = 0; i < size; ++i) {
	probe = tests[i];
	for (int k = 0; k < extraIterations; ++k) {
	oldCol.getCollationKey(probe);
	counter++;
	}
	}
	endTime = System.currentTimeMillis();
	oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
	System.out.println("Old sort key time: " + nf.format(delta)
	+ " micros (" + counter + " iterations)");

	// Sort Key: new time

	counter = 0;
	startTime = System.currentTimeMillis();

	for (int i = 0; i < size; ++i) {
	probe = tests[i];
	for (int k = 0; k < extraIterations; ++k) {
	newCol.getCollationKey(probe);
	counter++;
	}
	}
	endTime = System.currentTimeMillis();
	delta = (1000*(endTime - startTime) / counter) - overhead;
	System.out.println("New sort key time: " + nf.format(delta)
	+ " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
	System.out.println();

	// ================================================
	// Raw Compare

	if (DO_RAW) {
	// get overhead time
	counter = 0;
	startTime = System.currentTimeMillis();
	int opt = 0; // to keep the compiler from optimizing out

	for (int i = 0; i < size; ++i) {
	probe = tests[i];
	for (int j = 0; j < size; ++j) {
	opt ^= probe.compareTo(tests[j]);
	counter++;
	}
	}
	endTime = System.currentTimeMillis();
	overhead = (1000*(endTime - startTime) / counter);
	System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");

	// Raw Compare: old time

	counter = 0;
	startTime = System.currentTimeMillis();

	for (int i = 0; i < size; ++i) {
	probe = tests[i];
	for (int j = 0; j < size; ++j) {
	opt ^= oldCol.compare(probe, tests[j]);
	counter++;
	}
	}
	endTime = System.currentTimeMillis();
	oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
	System.out.println("Old raw compare time: " + nf.format(delta)
	+ " micros (" + counter + " iterations)");

	// Raw Compare: new time

	counter = 0;
	startTime = System.currentTimeMillis();

	for (int i = 0; i < size; ++i) {
	probe = tests[i];
	for (int j = 0; j < size; ++j) {
	opt ^= newCol.compare(probe, tests[j]);
	counter++;
	}
	}
	endTime = System.currentTimeMillis();
	delta = (1000*(endTime - startTime) / counter) - overhead;
	System.out.println("New raw compare time: " + nf.format(delta)
	+ " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
	System.out.println();
	}

	// ================================================
	// Binary Search
	// note: I don't worry about getting the binary search precisely right, since I just want to
	// see which strings would get compared.

	// overhead

	int iterations = (size * extraIterations);
	startTime = System.currentTimeMillis();
	Arrays.sort(tests);
	int opt2 = 0; // keep from optimizing out

	for (int i = 0; i < size; ++i) {
	probe = tests[i];
	for (int k = 0; k < extraIterations; ++k) {
	opt2 ^= Arrays.binarySearch(tests, probe);
	}
	}
	endTime = System.currentTimeMillis();
	overhead = delta = (1000*(endTime - startTime) / iterations);
	System.out.println("Overhead: " + nf.format(delta)
	+ " micros (" + iterations + " iterations)");

	// old time

	startTime = System.currentTimeMillis();
	Arrays.sort(tests, oldCol);

	for (int i = 0; i < size; ++i) {
	probe = tests[i];
	for (int k = 0; k < extraIterations; ++k) {
	opt2 ^= Arrays.binarySearch(tests, probe, oldCol);
	}
	}
	endTime = System.currentTimeMillis();
	oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
	System.out.println("Old binary search time: " + nf.format(delta)
	+ " micros (" + iterations + " iterations)");


	// new time

	Arrays.sort(tests, newCol);

	startTime = System.currentTimeMillis();

	for (int i = 0; i < size; ++i) {
	probe = tests[i];
	for (int k = 0; k < extraIterations; ++k) {
	opt2 ^= Arrays.binarySearch(tests, probe, newCol);
	}
	}
	endTime = System.currentTimeMillis();
	delta = (1000*(endTime - startTime) / iterations) - overhead;
	System.out.println("New binary search time: " + nf.format(delta)
	+ " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));
	System.out.println();

	// ================================================
	// Sort

	String[] sortTests = (String[]) tests.clone();
	extraIterations = 5;
	iterations = (size * extraIterations);

	// overhead

	startTime = System.currentTimeMillis();

	for (int i = 0; i < size; ++i) {
	for (int k = 0; k < extraIterations; ++k) {
	System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
	Arrays.sort(sortTests);
	}
	}
	endTime = System.currentTimeMillis();
	overhead = delta = (1000*(endTime - startTime) / iterations);
	System.out.println("overhead: " + nf.format(delta)
	+ " micros (" + iterations + " iterations)");

	// old time

	startTime = System.currentTimeMillis();

	for (int i = 0; i < size; ++i) {
	for (int k = 0; k < extraIterations; ++k) {
	System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
	Arrays.sort(sortTests, oldCol);
	}
	}
	endTime = System.currentTimeMillis();
	oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
	System.out.println("Old sort time: " + nf.format(delta)
	+ " micros (" + iterations + " iterations)");

	// new time

	startTime = System.currentTimeMillis();

	for (int i = 0; i < size; ++i) {
	for (int k = 0; k < extraIterations; ++k) {
	System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
	Arrays.sort(sortTests, newCol);
	}
	}
	endTime = System.currentTimeMillis();
	delta = (1000*(endTime - startTime) / iterations) - overhead;
	System.out.println("New sort time: " + nf.format(delta)
	+ " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));

	}
	}