| /** |
| ******************************************************************************* |
| * Copyright (C) 1996-2001, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| * |
| * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CheckCollator.java,v $ |
| * $Date: 2002/08/09 23:56:24 $ |
| * $Revision: 1.2 $ |
| * |
| ******************************************************************************* |
| */ |
| |
| // http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html |
| |
| package com.ibm.text.UCD; |
| |
| import java.util.*; |
| import java.io.*; |
| import java.text.NumberFormat; |
| |
| import com.ibm.text.utility.*; |
| import com.ibm.icu.text.UTF16; |
| import com.ibm.icu.text.UnicodeSet; |
| |
| /** |
| * This is a quick and dirty program to get some idea of collation performance, comparing old Java to new stuff. |
| */ |
| abstract public class CheckCollator { |
| static final String PREFIX = "C:\\ICUInternal\\icu4c\\collation-perf-data\\TestNames_"; |
| static final boolean DO_RAW = false; |
| |
| static final NumberFormat nf = NumberFormat.getInstance(); |
| static final NumberFormat percent = NumberFormat.getPercentInstance(); |
| static { |
| nf.setMaximumFractionDigits(2); |
| } |
| |
| public static void main(String[] args) throws IOException { |
| |
| // later, drive off of args |
| |
| // choices are: Asian, Chinese, Japanese, Japanese_h, Japanese_k, Korean, Latin, Russian, Thai |
| //test(Locale.KOREAN, "Korean"); |
| test(Locale.ENGLISH, "Latin"); |
| test(Locale.FRENCH, "Latin"); |
| test(Locale.JAPANESE, "Japanese"); |
| } |
| |
| public static void test(Locale loc, String name) throws IOException { |
| |
| System.out.println(); |
| System.out.println("Testing " + loc.getDisplayName() + ", file: " + name); |
| System.out.println(); |
| |
| // get test data |
| |
| String fileName = PREFIX + name + ".txt"; |
| |
| FileInputStream fis = new FileInputStream(fileName); |
| InputStreamReader isr = new InputStreamReader(fis, "UnicodeLittle"); |
| BufferedReader br = new BufferedReader(isr, 32*1024); |
| |
| int counter = 0; |
| |
| ArrayList list = new ArrayList(); |
| while (true) { |
| String line = Utility.readDataLine(br); |
| if (line == null) break; |
| if (line.length() == 0) continue; |
| Utility.dot(counter++); |
| list.add(line); |
| } |
| System.out.println("Read " + counter + " lines in file"); |
| |
| int limit = 800; // put a limit on it to save time |
| |
| // pump it up if there aren't very many |
| while (list.size() < limit) { |
| list.addAll(list); |
| } |
| |
| int size = list.size(); |
| |
| |
| // later, adjust these so we always get a reasonble number of tries |
| |
| int extraIterations = 200; |
| if (size > limit) size = limit; |
| |
| String[] tests = new String [size]; |
| |
| for (int i = 0; i < size; ++i) { |
| tests[i] = (String) list.get(i); |
| } |
| |
| // get collators |
| |
| com.ibm.icu.text.Collator newCol = com.ibm.icu.text.Collator.getInstance(loc); |
| java.text.Collator oldCol = java.text.Collator.getInstance(loc); |
| |
| |
| double startTime, endTime; |
| double delta, oldDelta; |
| String probe; |
| |
| |
| // load classes at least once before starting |
| |
| newCol.compare("a", "b"); |
| oldCol.compare("a", "b"); |
| |
| // ================================================ |
| // check sort key size |
| |
| int stringSize = 0, newSize = 0, oldSize = 0; |
| |
| for (int i = 0; i < size; ++i) { |
| stringSize += tests[i].length() * 2; |
| byte[] newKey = newCol.getCollationKey(tests[i]).toByteArray(); |
| newSize += newKey.length; |
| byte[] oldKey = oldCol.getCollationKey(tests[i]).toByteArray(); |
| oldSize += oldKey.length; |
| } |
| delta = stringSize/(size + 0.0); |
| System.out.println("string size: " + nf.format(delta) + " bytes per key"); |
| System.out.println(); |
| |
| delta = oldDelta = (oldSize/(size + 0.0)); |
| System.out.println("old sortkey size: " + nf.format(delta) + " bytes per key "); |
| delta = (newSize/(size + 0.0)); |
| System.out.println("new sortkey size: " + nf.format(delta) + " bytes per key " + percent.format(delta/oldDelta)); |
| System.out.println(); |
| |
| // ================================================ |
| // Sort Key: old time |
| |
| // get overhead time |
| counter = 0; |
| startTime = System.currentTimeMillis(); |
| |
| for (int i = 0; i < size; ++i) { |
| for (int j = 0; j < size; ++j) { |
| counter++; |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| double overhead = (1000*(endTime - startTime) / counter); |
| System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros"); |
| |
| counter = 0; |
| startTime = System.currentTimeMillis(); |
| |
| for (int i = 0; i < size; ++i) { |
| probe = tests[i]; |
| for (int k = 0; k < extraIterations; ++k) { |
| oldCol.getCollationKey(probe); |
| counter++; |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead; |
| System.out.println("Old sort key time: " + nf.format(delta) |
| + " micros (" + counter + " iterations)"); |
| |
| // Sort Key: new time |
| |
| counter = 0; |
| startTime = System.currentTimeMillis(); |
| |
| for (int i = 0; i < size; ++i) { |
| probe = tests[i]; |
| for (int k = 0; k < extraIterations; ++k) { |
| newCol.getCollationKey(probe); |
| counter++; |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| delta = (1000*(endTime - startTime) / counter) - overhead; |
| System.out.println("New sort key time: " + nf.format(delta) |
| + " micros (" + counter + " iterations) " + percent.format(delta/oldDelta)); |
| System.out.println(); |
| |
| // ================================================ |
| // Raw Compare |
| |
| if (DO_RAW) { |
| // get overhead time |
| counter = 0; |
| startTime = System.currentTimeMillis(); |
| int opt = 0; // to keep the compiler from optimizing out |
| |
| for (int i = 0; i < size; ++i) { |
| probe = tests[i]; |
| for (int j = 0; j < size; ++j) { |
| opt ^= probe.compareTo(tests[j]); |
| counter++; |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| overhead = (1000*(endTime - startTime) / counter); |
| System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros"); |
| |
| // Raw Compare: old time |
| |
| counter = 0; |
| startTime = System.currentTimeMillis(); |
| |
| for (int i = 0; i < size; ++i) { |
| probe = tests[i]; |
| for (int j = 0; j < size; ++j) { |
| opt ^= oldCol.compare(probe, tests[j]); |
| counter++; |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead; |
| System.out.println("Old raw compare time: " + nf.format(delta) |
| + " micros (" + counter + " iterations)"); |
| |
| // Raw Compare: new time |
| |
| counter = 0; |
| startTime = System.currentTimeMillis(); |
| |
| for (int i = 0; i < size; ++i) { |
| probe = tests[i]; |
| for (int j = 0; j < size; ++j) { |
| opt ^= newCol.compare(probe, tests[j]); |
| counter++; |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| delta = (1000*(endTime - startTime) / counter) - overhead; |
| System.out.println("New raw compare time: " + nf.format(delta) |
| + " micros (" + counter + " iterations) " + percent.format(delta/oldDelta)); |
| System.out.println(); |
| } |
| |
| // ================================================ |
| // Binary Search |
| // note: I don't worry about getting the binary search precisely right, since I just want to |
| // see which strings would get compared. |
| |
| // overhead |
| |
| int iterations = (size * extraIterations); |
| startTime = System.currentTimeMillis(); |
| Arrays.sort(tests); |
| int opt2 = 0; // keep from optimizing out |
| |
| for (int i = 0; i < size; ++i) { |
| probe = tests[i]; |
| for (int k = 0; k < extraIterations; ++k) { |
| opt2 ^= Arrays.binarySearch(tests, probe); |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| overhead = delta = (1000*(endTime - startTime) / iterations); |
| System.out.println("Overhead: " + nf.format(delta) |
| + " micros (" + iterations + " iterations)"); |
| |
| // old time |
| |
| startTime = System.currentTimeMillis(); |
| Arrays.sort(tests, oldCol); |
| |
| for (int i = 0; i < size; ++i) { |
| probe = tests[i]; |
| for (int k = 0; k < extraIterations; ++k) { |
| opt2 ^= Arrays.binarySearch(tests, probe, oldCol); |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead; |
| System.out.println("Old binary search time: " + nf.format(delta) |
| + " micros (" + iterations + " iterations)"); |
| |
| |
| // new time |
| |
| Arrays.sort(tests, newCol); |
| |
| startTime = System.currentTimeMillis(); |
| |
| for (int i = 0; i < size; ++i) { |
| probe = tests[i]; |
| for (int k = 0; k < extraIterations; ++k) { |
| opt2 ^= Arrays.binarySearch(tests, probe, newCol); |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| delta = (1000*(endTime - startTime) / iterations) - overhead; |
| System.out.println("New binary search time: " + nf.format(delta) |
| + " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta)); |
| System.out.println(); |
| |
| // ================================================ |
| // Sort |
| |
| String[] sortTests = (String[]) tests.clone(); |
| extraIterations = 5; |
| iterations = (size * extraIterations); |
| |
| // overhead |
| |
| startTime = System.currentTimeMillis(); |
| |
| for (int i = 0; i < size; ++i) { |
| for (int k = 0; k < extraIterations; ++k) { |
| System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array |
| Arrays.sort(sortTests); |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| overhead = delta = (1000*(endTime - startTime) / iterations); |
| System.out.println("overhead: " + nf.format(delta) |
| + " micros (" + iterations + " iterations)"); |
| |
| // old time |
| |
| startTime = System.currentTimeMillis(); |
| |
| for (int i = 0; i < size; ++i) { |
| for (int k = 0; k < extraIterations; ++k) { |
| System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array |
| Arrays.sort(sortTests, oldCol); |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead; |
| System.out.println("Old sort time: " + nf.format(delta) |
| + " micros (" + iterations + " iterations)"); |
| |
| // new time |
| |
| startTime = System.currentTimeMillis(); |
| |
| for (int i = 0; i < size; ++i) { |
| for (int k = 0; k < extraIterations; ++k) { |
| System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array |
| Arrays.sort(sortTests, newCol); |
| } |
| } |
| endTime = System.currentTimeMillis(); |
| delta = (1000*(endTime - startTime) / iterations) - overhead; |
| System.out.println("New sort time: " + nf.format(delta) |
| + " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta)); |
| |
| } |
| } |