| //##header |
| /* |
| ********************************************************************** |
| * Copyright (c) 2002-2007, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * Author: Mark Davis |
| ********************************************************************** |
| */ |
| //#ifndef FOUNDATION |
| package com.ibm.icu.dev.test.cldr; |
| |
| import java.io.File; |
| |
| //import org.unicode.cldr.util.LanguageTagParser; |
| //import org.unicode.cldr.util.Utility; |
| import java.io.IOException; |
| import java.io.PrintWriter; |
| import java.io.StringWriter; |
| import java.text.ParseException; |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Date; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.TreeMap; |
| import java.util.Iterator; |
| import java.util.TreeSet; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import javax.xml.parsers.SAXParser; |
| import javax.xml.parsers.SAXParserFactory; |
| |
| //import org.unicode.cldr.test.CLDRTest; |
| //import org.unicode.cldr.tool.GenerateCldrTests; |
| import org.xml.sax.Attributes; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.helpers.DefaultHandler; |
| |
| import com.ibm.icu.util.Currency; |
| import com.ibm.icu.util.TimeZone; |
| import com.ibm.icu.util.ULocale; |
| import com.ibm.icu.dev.test.TestFmwk; |
| |
| import com.ibm.icu.text.Collator; |
| import com.ibm.icu.text.DateFormat; |
| import com.ibm.icu.text.NumberFormat; |
| import com.ibm.icu.text.SimpleDateFormat; |
| import com.ibm.icu.text.Transliterator; |
| import com.ibm.icu.text.UTF16; |
| import com.ibm.icu.text.UnicodeSet; |
| |
| /** |
| * This is a file that runs the CLDR tests for ICU4J, to verify that ICU4J implements them |
| * correctly. |
| * WARNING: |
| * 1. for this to work right, you have to have downloaded the CLDR data, and |
| * then set the CLDR directory correctly, using |
| * -DCLDR_DIRECTORY=<top level of cldr> |
| * 2. You probably also need to increase memory, eg with -Xmx512m |
| * 3. For speed, you should also use -DCLDR_DTD_CACHE=C:\cldrcache\, where |
| * C:\cldrcache\ is a temp directory to keep the program from hitting the net for |
| * each file access. |
| * 4. You may use other environment variables to narrow what you test. Eg |
| * -DXML_MATCH=".*" -DTEST_MATCH="zone.*" -DZONE_MATCH="(?!America/Argentina).*" |
| * a. -DXML_MATCH="de.*" (or whatever regex you want) to just |
| * test certain locales. |
| * b. -DTEST_MATCH="zone.*" (or whatever regex you want) to just test collation, numbers, etc. |
| * c. -DZONE_MATCH=".*Moscow.*" (to only test certain zones) |
| * @author medavis |
| */ |
| public class TestCLDRVsICU extends TestFmwk { |
| static final boolean DEBUG = false; |
| |
| //ULocale uLocale = ULocale.ENGLISH; |
| //Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere |
| //static PrintWriter log; |
| SAXParser SAX; |
| static Matcher LOCALE_MATCH, TEST_MATCH, ZONE_MATCH; |
| static String CLDR_DIRECTORY; |
| static { |
| System.out.println(); |
| LOCALE_MATCH = getEnvironmentRegex("XML_MATCH", ".*"); |
| TEST_MATCH = getEnvironmentRegex("TEST_MATCH", ".*"); |
| ZONE_MATCH = getEnvironmentRegex("ZONE_MATCH", ".*"); // example |
| |
| // WARNING: THIS IS TEMPORARY UNTIL I GET THE FILES STRAIGHTENED OUT |
| CLDR_DIRECTORY = getEnvironmentString("CLDR_DIRECTORY", "C:\\Unicode-CVS2\\cldr\\"); |
| System.out.println(); |
| } |
| |
| /** |
| * |
| */ |
| private static Matcher getEnvironmentRegex(String key, String defaultValue) { |
| return Pattern.compile(getEnvironmentString(key, defaultValue)).matcher(""); |
| } |
| |
| /** |
| * |
| */ |
| private static String getEnvironmentString(String key, String defaultValue) { |
| String temp = System.getProperty(key); |
| if (temp == null) temp = defaultValue; |
| else System.out.print("-D" + key + "=\"" + temp + "\" "); |
| return temp; |
| } |
| |
| public static void main(String[] args) throws Exception { |
| |
| new TestCLDRVsICU().run(args); |
| } |
| |
| Set allLocales = new TreeSet(); |
| |
| public void TestFiles() throws SAXException, IOException { |
| // only get ICU's locales |
| Set s = new TreeSet(); |
| addLocales(NumberFormat.getAvailableULocales(), s); |
| addLocales(DateFormat.getAvailableULocales(), s); |
| addLocales(Collator.getAvailableULocales(), s); |
| |
| // filter, to make tracking down bugs easier |
| |
| for (Iterator it = s.iterator(); it.hasNext();) { |
| String locale = (String)it.next(); |
| if (!LOCALE_MATCH.reset(locale).matches()) continue; |
| _test(locale); |
| } |
| } |
| |
| public void addLocales(ULocale[] list, Collection s) { |
| for (int i = 0; i < list.length; ++i) { |
| allLocales.add(list[i].toString()); |
| s.add(list[i].getLanguage()); |
| } |
| } |
| |
| public String getLanguage(ULocale uLocale) { |
| String result = uLocale.getLanguage(); |
| String script = uLocale.getScript(); |
| if (script.length() != 0) result += "_" + script; |
| return result; |
| } |
| |
| public void _test(String localeName) throws SAXException, IOException { |
| //uLocale = new ULocale(localeName); |
| //oLocale = uLocale.toLocale(); |
| |
| File f = new File(CLDR_DIRECTORY + "common\\test\\"+ localeName + ".xml"); |
| logln("Testing " + f.getCanonicalPath()); |
| SAX.parse(f, DEFAULT_HANDLER); |
| } |
| |
| static Transliterator toUnicode = Transliterator.getInstance("any-hex"); |
| static public String showString(String in) { |
| return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")"; |
| } |
| // ============ SAX Handler Infrastructure ============ |
| |
| abstract public class Handler { |
| Map settings = new TreeMap(); |
| String name; |
| List currentLocales = new ArrayList(); |
| int failures = 0; |
| |
| void setName(String name) { |
| this.name = name; |
| } |
| void set(String attributeName, String attributeValue) { |
| //if (DEBUG) logln(attributeName + " => " + attributeValue); |
| settings.put(attributeName, attributeValue); |
| } |
| void checkResult(String value) { |
| if ("true".equals(settings.get("draft"))) { |
| return; // skip draft |
| } |
| ULocale ul = new ULocale("xx"); |
| try { |
| for (int i = 0; i < currentLocales.size(); ++i) { |
| ul = (ULocale)currentLocales.get(i); |
| //loglnSAX(" Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name); |
| handleResult(ul, value); |
| if (failures != 0) { |
| errln("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")"); |
| failures = 0; |
| } |
| } |
| } catch (Exception e) { |
| StringWriter sw = new StringWriter(); |
| PrintWriter pw = new PrintWriter(sw); |
| e.printStackTrace(pw); |
| pw.flush(); |
| errln("Exception: Locale: " + ul + ",\tValue: <" + value + ">\r\n" + sw.toString()); |
| } |
| } |
| public void loglnSAX(String message) { |
| String temp = message + "\t[" + name; |
| for (Iterator it = settings.keySet().iterator(); it.hasNext();) { |
| String attributeName = (String) it.next(); |
| String attributeValue = (String) settings.get(attributeName); |
| temp += " " + attributeName + "=<" + attributeValue + ">"; |
| } |
| logln(temp + "]"); |
| } |
| int lookupValue(Object x, Object[] list) { |
| for (int i = 0; i < list.length; ++i) { |
| if (x.equals(list[i])) return i; |
| } |
| loglnSAX("Unknown String: " + x); |
| return -1; |
| } |
| abstract void handleResult(ULocale currentLocale, String value) throws Exception; |
| /** |
| * @param attributes |
| */ |
| public void setAttributes(Attributes attributes) { |
| String localeList = attributes.getValue("locales"); |
| String[] currentLocaleString = new String[50]; |
| com.ibm.icu.impl.Utility.split(localeList, ' ', currentLocaleString); |
| currentLocales.clear(); |
| for (int i = 0; i < currentLocaleString.length; ++i) { |
| if (currentLocaleString[i].length() == 0) continue; |
| if (allLocales.contains("")) { |
| logln("Skipping locale, not in ICU4J: " + currentLocaleString[i]); |
| continue; |
| } |
| currentLocales.add(new ULocale(currentLocaleString[i])); |
| } |
| if (DEBUG) logln("Setting locales: " + currentLocales); |
| } |
| } |
| |
| public Handler getHandler(String name, Attributes attributes) { |
| if (DEBUG) logln("Creating Handler: " + name); |
| Handler result = (Handler) RegisteredHandlers.get(name); |
| if (result == null) logln("Unexpected test type: " + name); |
| else { |
| result.setAttributes(attributes); |
| } |
| return result; |
| } |
| |
| public void addHandler(String name, Handler handler) { |
| if (!TEST_MATCH.reset(name).matches()) handler = new NullHandler(); |
| handler.setName(name); |
| RegisteredHandlers.put(name, handler); |
| } |
| Map RegisteredHandlers = new HashMap(); |
| |
| class NullHandler extends Handler { |
| void handleResult(ULocale currentLocale, String value) throws Exception {} |
| } |
| |
| // ============ Statics for Date/Number Support ============ |
| |
| static TimeZone utc = TimeZone.getTimeZone("GMT"); |
| static DateFormat iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); |
| { |
| iso.setTimeZone(utc); |
| } |
| static int[] DateFormatValues = {-1, DateFormat.SHORT, DateFormat.MEDIUM, DateFormat.LONG, DateFormat.FULL}; |
| static String[] DateFormatNames = {"none", "short", "medium", "long", "full"}; |
| |
| static String[] NumberNames = {"standard", "integer", "decimal", "percent", "scientific", "GBP"}; |
| |
| |
| // ============ Handler for Collation ============ |
| static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]"); |
| |
| static String remove(String in, UnicodeSet toRemove) { |
| int cp; |
| StringBuffer result = new StringBuffer(); |
| for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) { |
| cp = UTF16.charAt(in, i); |
| if (!toRemove.contains(cp)) UTF16.append(result, cp); |
| } |
| return result.toString(); |
| } |
| |
| { |
| addHandler("collation", new Handler() { |
| public void handleResult(ULocale currentLocale, String value) { |
| Collator col = Collator.getInstance(currentLocale); |
| String lastLine = ""; |
| int count = 0; |
| for (int pos = 0; pos < value.length();) { |
| int nextPos = value.indexOf('\n', pos); |
| if (nextPos < 0) |
| nextPos = value.length(); |
| String line = value.substring(pos, nextPos); |
| line = remove(line, controlsAndSpace); // HACK for SAX |
| if (line.trim().length() != 0) { // HACK for SAX |
| int comp = col.compare(lastLine, line); |
| if (comp > 0) { |
| failures++; |
| errln("\tLine " + (count + 1) + "\tFailure: " |
| + showString(lastLine) + " should be leq " |
| + showString(line)); |
| } else if (DEBUG) { |
| logln("OK: " + line); |
| } |
| lastLine = line; |
| } |
| pos = nextPos + 1; |
| count++; |
| } |
| } |
| }); |
| |
| // ============ Handler for Numbers ============ |
| addHandler("number", new Handler() { |
| public void handleResult(ULocale locale, String result) { |
| NumberFormat nf = null; |
| double v = Double.NaN; |
| for (Iterator it = settings.keySet().iterator(); it.hasNext();) { |
| String attributeName = (String) it.next(); |
| String attributeValue = (String) settings |
| .get(attributeName); |
| if (attributeName.equals("input")) { |
| v = Double.parseDouble(attributeValue); |
| continue; |
| } |
| // must be either numberType at this point |
| int index = lookupValue(attributeValue, NumberNames); |
| if (DEBUG) logln("Getting number format for " + locale); |
| switch(index) { |
| case 0: nf = NumberFormat.getInstance(locale); break; |
| case 1: nf = NumberFormat.getIntegerInstance(locale); break; |
| case 2: nf = NumberFormat.getNumberInstance(locale); break; |
| case 3: nf = NumberFormat.getPercentInstance(locale); break; |
| case 4: nf = NumberFormat.getScientificInstance(locale); break; |
| default: nf = NumberFormat.getCurrencyInstance(locale); |
| nf.setCurrency(Currency.getInstance(attributeValue)); break; |
| } |
| String temp = nf.format(v).trim(); |
| result = result.trim(); // HACK because of SAX |
| if (!temp.equals(result)) { |
| errln("Number: Locale: " + locale |
| + ", \tType: " + attributeValue |
| + ", \tDraft: " + settings.get("draft") |
| + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">"); |
| } |
| |
| } |
| } |
| }); |
| |
| // ============ Handler for Dates ============ |
| addHandler("date", new Handler() { |
| public void handleResult(ULocale locale, String result) throws ParseException { |
| int dateFormat = 0; |
| int timeFormat = 0; |
| Date date = new Date(); |
| for (Iterator it = settings.keySet().iterator(); it.hasNext();) { |
| String attributeName = (String) it.next(); |
| String attributeValue = (String) settings |
| .get(attributeName); |
| if (attributeName.equals("input")) { |
| date = iso.parse(attributeValue); |
| continue; |
| } |
| // must be either dateType or timeType at this point |
| int index = lookupValue(attributeValue, DateFormatNames); |
| if (attributeName.equals("dateType")) |
| dateFormat = index; |
| else |
| timeFormat = index; |
| |
| } |
| SimpleDateFormat dt = getDateFormat(locale, dateFormat, timeFormat); |
| dt.setTimeZone(utc); |
| String temp = dt.format(date).trim(); |
| result = result.trim(); // HACK because of SAX |
| if (!temp.equals(result)) { |
| errln("DateTime: Locale: " + locale |
| + ", \tDate: " + DateFormatNames[dateFormat] |
| + ", \tTime: " + DateFormatNames[timeFormat] |
| + ", \tDraft: " + settings.get("draft") |
| + ", \tCLDR: <" + result + ">, ICU: <" + temp + ">"); |
| } |
| } |
| |
| /** |
| * |
| */ |
| private SimpleDateFormat getDateFormat(ULocale locale, int dateFormat, int timeFormat) { |
| if (DEBUG) logln("Getting date/time format for " + locale); |
| if (DEBUG && "ar_EG".equals(locale.toString())) { |
| System.out.println("debug here"); |
| } |
| DateFormat dt; |
| if (dateFormat == 0) { |
| dt = DateFormat.getTimeInstance(DateFormatValues[timeFormat], locale); |
| if (DEBUG) System.out.print("getTimeInstance"); |
| } else if (timeFormat == 0) { |
| dt = DateFormat.getDateInstance(DateFormatValues[dateFormat], locale); |
| if (DEBUG) System.out.print("getDateInstance"); |
| } else { |
| dt = DateFormat.getDateTimeInstance(DateFormatValues[dateFormat], DateFormatValues[timeFormat], locale); |
| if (DEBUG) System.out.print("getDateTimeInstance"); |
| } |
| if (DEBUG) System.out.println("\tinput:\t" + dateFormat + ", " + timeFormat + " => " + ((SimpleDateFormat)dt).toPattern()); |
| return (SimpleDateFormat)dt; |
| } |
| }); |
| |
| // ============ Handler for Zones ============ |
| addHandler("zoneFields", new Handler() { |
| String date = ""; |
| String zone = ""; |
| String parse = ""; |
| String pattern = ""; |
| |
| public void handleResult(ULocale locale, String result) throws ParseException { |
| for (Iterator it = settings.keySet().iterator(); it.hasNext();) { |
| String attributeName = (String) it.next(); |
| String attributeValue = (String) settings.get(attributeName); |
| if (attributeName.equals("date")) { |
| date = attributeValue; |
| } else if (attributeName.equals("field")) { |
| pattern = attributeValue; |
| } else if (attributeName.equals("zone")) { |
| zone = attributeValue; |
| } else if (attributeName.equals("parse")) { |
| parse = attributeValue; |
| } |
| } |
| if (!ZONE_MATCH.reset(zone).matches()) return; |
| Date dateValue = iso.parse(date); |
| SimpleDateFormat field = new SimpleDateFormat(pattern, locale); |
| field.setTimeZone(TimeZone.getTimeZone(zone)); |
| String temp = field.format(dateValue).trim(); |
| // SKIP PARSE FOR NOW |
| result = result.trim(); // HACK because of SAX |
| if (!temp.equals(result)) { |
| temp = field.format(dateValue).trim(); // call again for debugging |
| errln("Zone Format: Locale: " + locale |
| + ", \tZone: " + zone |
| + ", \tDate: " + date |
| + ", \tField: " + pattern |
| + ", \tDraft: " + settings.get("draft") |
| + ", \tCLDR: <" + result |
| + ">, \tICU: <" + temp + ">"); |
| } |
| } |
| }); |
| } |
| |
| // ============ Gorp for SAX ============ |
| |
| { |
| try { |
| SAXParserFactory factory = SAXParserFactory.newInstance(); |
| factory.setValidating(true); |
| SAX = factory.newSAXParser(); |
| } catch (Exception e) { |
| throw new IllegalArgumentException("can't start"); |
| } |
| } |
| |
| DefaultHandler DEFAULT_HANDLER = new DefaultHandler() { |
| static final boolean DEBUG = false; |
| StringBuffer lastChars = new StringBuffer(); |
| //boolean justPopped = false; |
| Handler handler; |
| |
| public void startElement( |
| String uri, |
| String localName, |
| String qName, |
| Attributes attributes) |
| throws SAXException { |
| //data.put(new ContextStack(contextStack), lastChars); |
| //lastChars = ""; |
| try { |
| if (qName.equals("cldrTest")) { |
| // skip |
| } else if (qName.equals("result")) { |
| for (int i = 0; i < attributes.getLength(); ++i) { |
| handler.set(attributes.getQName(i), attributes.getValue(i)); |
| } |
| } else { |
| handler = getHandler(qName, attributes); |
| //handler.set("locale", uLocale.toString()); |
| } |
| //if (DEBUG) logln("startElement:\t" + contextStack); |
| //justPopped = false; |
| } catch (RuntimeException e) { |
| e.printStackTrace(); |
| throw e; |
| } |
| } |
| public void endElement(String uri, String localName, String qName) |
| throws SAXException { |
| try { |
| //if (DEBUG) logln("endElement:\t" + contextStack); |
| if (qName.equals("result")) handler.checkResult(lastChars.toString()); |
| else if (qName.length() != 0) { |
| //logln("Unexpected contents of: " + qName + ", <" + lastChars + ">"); |
| } |
| lastChars.setLength(0); |
| //justPopped = true; |
| } catch (RuntimeException e) { |
| e.printStackTrace(); |
| throw e; |
| } |
| } |
| // Have to hack around the fact that the character data might be in pieces |
| public void characters(char[] ch, int start, int length) |
| throws SAXException { |
| try { |
| String value = new String(ch,start,length); |
| if (DEBUG) logln("characters:\t" + value); |
| lastChars.append(value); |
| //justPopped = false; |
| } catch (RuntimeException e) { |
| e.printStackTrace(); |
| throw e; |
| } |
| } |
| |
| // just for debugging |
| |
| public void notationDecl (String name, String publicId, String systemId) |
| throws SAXException { |
| logln("notationDecl: " + name |
| + ", " + publicId |
| + ", " + systemId |
| ); |
| } |
| |
| public void processingInstruction (String target, String data) |
| throws SAXException { |
| logln("processingInstruction: " + target + ", " + data); |
| } |
| |
| public void skippedEntity (String name) |
| throws SAXException |
| { |
| logln("skippedEntity: " + name |
| ); |
| } |
| |
| public void unparsedEntityDecl (String name, String publicId, |
| String systemId, String notationName) |
| throws SAXException { |
| logln("unparsedEntityDecl: " + name |
| + ", " + publicId |
| + ", " + systemId |
| + ", " + notationName |
| ); |
| } |
| |
| }; |
| } |
| //#endif |