main/tests/collate/src/com/ibm/icu/dev/test/format/RbnfLenientScannerTest.java - external/github.com/unicode-org/icu - Git at Google

 /*
  *******************************************************************************
  * Copyright (C) 2009-2014, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  */
 package com.ibm.icu.dev.test.format;

 import java.text.NumberFormat;
 import java.text.ParseException;
 import java.util.Locale;
 import java.util.Random;

 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.impl.text.RbnfScannerProviderImpl;
 import com.ibm.icu.text.RbnfLenientScannerProvider;
 import com.ibm.icu.text.RuleBasedNumberFormat;
 import com.ibm.icu.util.ULocale;

 public class RbnfLenientScannerTest extends TestFmwk {
     private static final RbnfLenientScannerProvider provider = new RbnfScannerProviderImpl();

     public static void main(String[] args) {
         try {
           new RbnfLenientScannerTest().run(args);
         }
         catch (Throwable e) {
             System.out.println("Entire test failed because of exception: "
                                + e.toString());
             e.printStackTrace();
         }
     }

     /**
      * Ensure that the default provider is instantiated and used if none is set
      * and lenient parse is on.
      */
     public void TestDefaultProvider() {
         RuleBasedNumberFormat formatter
             = new RuleBasedNumberFormat(Locale.US,
                                         RuleBasedNumberFormat.SPELLOUT);
         formatter.setLenientScannerProvider(null);
         formatter.setLenientParseMode(true);
         String[][] lpTestData = {
             { "2 thousand six HUNDRED   fifty-7", "2,657" },
         };

         if (logKnownIssue("9503", "Lenient parse problems with English RBNF")) {
             logln("Can't do lenient parse test due to http://bugs.icu-project.org/trac/ticket/9503");
         } else {
             doLenientParseTest(formatter, lpTestData);
         }
     }

     /**
      * Perform a simple spot check on the English spellout rules
      */
     public void TestEnglishSpellout() {
         RuleBasedNumberFormat formatter
             = new RuleBasedNumberFormat(Locale.US,
                                         RuleBasedNumberFormat.SPELLOUT);
         formatter.setLenientScannerProvider(provider);
         formatter.setLenientParseMode(true);
         String[][] lpTestData = {
             { "FOurhundred     thiRTY six", "436" },
             // test spaces before fifty-7 causing lenient parse match of "fifty-" to " fifty"
             // leaving "-7" for remaining parse, resulting in 2643 as the parse result.
             { "fifty-7", "57" },
             { " fifty-7", "57" },
             { "  fifty-7", "57" },
             { "2 thousand six HUNDRED   fifty-7", "2,657" },
             { "fifteen hundred and zero", "1,500" }
         };

         if (logKnownIssue("9503", null)) {
             logln("Can't do lenient parse test due to http://bugs.icu-project.org/trac/ticket/9503");
         } else {
             doLenientParseTest(formatter, lpTestData);
         }
     }

     /**
      * Perform a simple spot check on the duration-formatting rules
      */
     public void TestDurations() {
         RuleBasedNumberFormat formatter
             = new RuleBasedNumberFormat(Locale.US,
                                         RuleBasedNumberFormat.DURATION);
         formatter.setLenientScannerProvider(provider);
         formatter.setLenientParseMode(true);
         String[][] lpTestData = {
             { "2-51-33", "10,293" }
         };
         doLenientParseTest(formatter, lpTestData);
     }

     /**
      * Perform a simple spot check on the French spellout rules
      */
     public void TestFrenchSpellout() {
         RuleBasedNumberFormat formatter
             = new RuleBasedNumberFormat(Locale.FRANCE,
                                         RuleBasedNumberFormat.SPELLOUT);
         formatter.setLenientScannerProvider(provider);
         formatter.setLenientParseMode(true);
         String[][] lpTestData = {
             { "trente-et-un", "31" },
             { "un cent quatre vingt dix huit", "198" }
         };
         doLenientParseTest(formatter, lpTestData);
     }

     /**
      * Perform a simple spot check on the German spellout rules
      */
     public void TestGermanSpellout() {
         RuleBasedNumberFormat formatter
             = new RuleBasedNumberFormat(Locale.GERMANY,
                                         RuleBasedNumberFormat.SPELLOUT);
         formatter.setLenientScannerProvider(provider);
         formatter.setLenientParseMode(true);
         String[][] lpTestData = {
             { "ein Tausend sechs Hundert fuenfunddreissig", "1,635" }
         };
         doLenientParseTest(formatter, lpTestData);
     }

     public void TestAllLocales() {
         StringBuffer errors = null;
         ULocale[] locales = ULocale.getAvailableLocales();
         String[] names = {
             " (spellout) ",
             " (ordinal)  ",
             " (duration) "
         };
         double[] numbers = {45.678, 1, 2, 10, 11, 100, 110, 200, 1000, 1111, -1111};
         Random r = null;

         // RBNF parse is extremely slow when lenient option is enabled.
         // For non-exhaustive mode, we only test a few locales.
         // "nl_NL", "be" had crash problem reported by #6534
         String[] parseLocales = {"en_US", "nl_NL", "be"};

         for (int i = 0; i < locales.length; ++i) {
             ULocale loc = locales[i];
             int count = numbers.length;
             boolean testParse = true;
             if (getInclusion() <= 5) {
                 testParse = false;
                 for (int k = 0; k < parseLocales.length; k++) {
                     if (loc.toString().equals(parseLocales[k])) {
                         testParse = true;
                         break;
                     }
                 }
             } else {
                 //RBNF parse is too slow.  Increase count only for debugging purpose for now.
                 //count = 100;
             }

             for (int j = 0; j < 3; ++j) {
                 RuleBasedNumberFormat fmt = new RuleBasedNumberFormat(loc, j+1);

                 for (int c = 0; c < count; c++) {
                     double n;
                     if (c < numbers.length) {
                         n = numbers[c];
                     } else {
                         if (r == null) {
                             r = createRandom();
                         }
                         n = ((int)(r.nextInt(10000) - 3000)) / 16d;
                     }

                     String s = fmt.format(n);
                     logln(loc.getName() + names[j] + "success format: " + n + " -> " + s);

                     if (testParse) {
                         // We do not validate the result in this test case,
                         // because there are cases which do not round trip by design.
                         try {
                             // non-lenient parse
                             fmt.setLenientParseMode(false);
                             Number num = fmt.parse(s);
                             logln(loc.getName() + names[j] + "success parse: " + s + " -> " + num);

                             // lenient parse
                             fmt.setLenientScannerProvider(provider);
                             fmt.setLenientParseMode(true);
                             num = fmt.parse(s);
                             logln(loc.getName() + names[j] + "success parse (lenient): " + s + " -> " + num);
                         } catch (ParseException pe) {
                             String msg = loc.getName() + names[j] + "ERROR:" + pe.getMessage();
                             logln(msg);
                             if (errors == null) {
                                 errors = new StringBuffer();
                             }
                             errors.append("\n" + msg);
                         }
                     }
                 }
             }
         }
         if (errors != null) {
             //TODO: We need to fix parse problems - see #6895 / #6896
             //errln(errors.toString());
             logln(errors.toString());
         }
     }

     void doLenientParseTest(RuleBasedNumberFormat formatter,
                             String[][] testData) {
         NumberFormat decFmt = NumberFormat.getInstance(Locale.US);

         try {
             for (int i = 0; i < testData.length; i++) {
                 String words = testData[i][0];
                 String expectedNumber = testData[i][1];
                 String actualNumber = decFmt.format(formatter.parse(words));

                 if (!actualNumber.equals(expectedNumber)) {
                     errln("Lenient-parse spot check failed: for "
                           + words + ", expected " + expectedNumber
                           + ", but got " + actualNumber);
                 }
             }
         }
         catch (Throwable e) {
             errln("Test failed with exception: " + e.toString());
             e.printStackTrace();
         }
     }
 }
	/*
	*******************************************************************************
	* Copyright (C) 2009-2014, International Business Machines Corporation and *
	* others. All Rights Reserved. *
	*******************************************************************************
	*/
	package com.ibm.icu.dev.test.format;

	import java.text.NumberFormat;
	import java.text.ParseException;
	import java.util.Locale;
	import java.util.Random;

	import com.ibm.icu.dev.test.TestFmwk;
	import com.ibm.icu.impl.text.RbnfScannerProviderImpl;
	import com.ibm.icu.text.RbnfLenientScannerProvider;
	import com.ibm.icu.text.RuleBasedNumberFormat;
	import com.ibm.icu.util.ULocale;

	public class RbnfLenientScannerTest extends TestFmwk {
	private static final RbnfLenientScannerProvider provider = new RbnfScannerProviderImpl();

	public static void main(String[] args) {
	try {
	new RbnfLenientScannerTest().run(args);
	}
	catch (Throwable e) {
	System.out.println("Entire test failed because of exception: "
	+ e.toString());
	e.printStackTrace();
	}
	}

	/**
	* Ensure that the default provider is instantiated and used if none is set
	* and lenient parse is on.
	*/
	public void TestDefaultProvider() {
	RuleBasedNumberFormat formatter
	= new RuleBasedNumberFormat(Locale.US,
	RuleBasedNumberFormat.SPELLOUT);
	formatter.setLenientScannerProvider(null);
	formatter.setLenientParseMode(true);
	String[][] lpTestData = {
	{ "2 thousand six HUNDRED fifty-7", "2,657" },
	};

	if (logKnownIssue("9503", "Lenient parse problems with English RBNF")) {
	logln("Can't do lenient parse test due to http://bugs.icu-project.org/trac/ticket/9503");
	} else {
	doLenientParseTest(formatter, lpTestData);
	}
	}

	/**
	* Perform a simple spot check on the English spellout rules
	*/
	public void TestEnglishSpellout() {
	RuleBasedNumberFormat formatter
	= new RuleBasedNumberFormat(Locale.US,
	RuleBasedNumberFormat.SPELLOUT);
	formatter.setLenientScannerProvider(provider);
	formatter.setLenientParseMode(true);
	String[][] lpTestData = {
	{ "FOurhundred thiRTY six", "436" },
	// test spaces before fifty-7 causing lenient parse match of "fifty-" to " fifty"
	// leaving "-7" for remaining parse, resulting in 2643 as the parse result.
	{ "fifty-7", "57" },
	{ " fifty-7", "57" },
	{ " fifty-7", "57" },
	{ "2 thousand six HUNDRED fifty-7", "2,657" },
	{ "fifteen hundred and zero", "1,500" }
	};

	if (logKnownIssue("9503", null)) {
	logln("Can't do lenient parse test due to http://bugs.icu-project.org/trac/ticket/9503");
	} else {
	doLenientParseTest(formatter, lpTestData);
	}
	}

	/**
	* Perform a simple spot check on the duration-formatting rules
	*/
	public void TestDurations() {
	RuleBasedNumberFormat formatter
	= new RuleBasedNumberFormat(Locale.US,
	RuleBasedNumberFormat.DURATION);
	formatter.setLenientScannerProvider(provider);
	formatter.setLenientParseMode(true);
	String[][] lpTestData = {
	{ "2-51-33", "10,293" }
	};
	doLenientParseTest(formatter, lpTestData);
	}

	/**
	* Perform a simple spot check on the French spellout rules
	*/
	public void TestFrenchSpellout() {
	RuleBasedNumberFormat formatter
	= new RuleBasedNumberFormat(Locale.FRANCE,
	RuleBasedNumberFormat.SPELLOUT);
	formatter.setLenientScannerProvider(provider);
	formatter.setLenientParseMode(true);
	String[][] lpTestData = {
	{ "trente-et-un", "31" },
	{ "un cent quatre vingt dix huit", "198" }
	};
	doLenientParseTest(formatter, lpTestData);
	}

	/**
	* Perform a simple spot check on the German spellout rules
	*/
	public void TestGermanSpellout() {
	RuleBasedNumberFormat formatter
	= new RuleBasedNumberFormat(Locale.GERMANY,
	RuleBasedNumberFormat.SPELLOUT);
	formatter.setLenientScannerProvider(provider);
	formatter.setLenientParseMode(true);
	String[][] lpTestData = {
	{ "ein Tausend sechs Hundert fuenfunddreissig", "1,635" }
	};
	doLenientParseTest(formatter, lpTestData);
	}

	public void TestAllLocales() {
	StringBuffer errors = null;
	ULocale[] locales = ULocale.getAvailableLocales();
	String[] names = {
	" (spellout) ",
	" (ordinal) ",
	" (duration) "
	};
	double[] numbers = {45.678, 1, 2, 10, 11, 100, 110, 200, 1000, 1111, -1111};
	Random r = null;

	// RBNF parse is extremely slow when lenient option is enabled.
	// For non-exhaustive mode, we only test a few locales.
	// "nl_NL", "be" had crash problem reported by #6534
	String[] parseLocales = {"en_US", "nl_NL", "be"};

	for (int i = 0; i < locales.length; ++i) {
	ULocale loc = locales[i];
	int count = numbers.length;
	boolean testParse = true;
	if (getInclusion() <= 5) {
	testParse = false;
	for (int k = 0; k < parseLocales.length; k++) {
	if (loc.toString().equals(parseLocales[k])) {
	testParse = true;
	break;
	}
	}
	} else {
	//RBNF parse is too slow. Increase count only for debugging purpose for now.
	//count = 100;
	}

	for (int j = 0; j < 3; ++j) {
	RuleBasedNumberFormat fmt = new RuleBasedNumberFormat(loc, j+1);

	for (int c = 0; c < count; c++) {
	double n;
	if (c < numbers.length) {
	n = numbers[c];
	} else {
	if (r == null) {
	r = createRandom();
	}
	n = ((int)(r.nextInt(10000) - 3000)) / 16d;
	}

	String s = fmt.format(n);
	logln(loc.getName() + names[j] + "success format: " + n + " -> " + s);

	if (testParse) {
	// We do not validate the result in this test case,
	// because there are cases which do not round trip by design.
	try {
	// non-lenient parse
	fmt.setLenientParseMode(false);
	Number num = fmt.parse(s);
	logln(loc.getName() + names[j] + "success parse: " + s + " -> " + num);

	// lenient parse
	fmt.setLenientScannerProvider(provider);
	fmt.setLenientParseMode(true);
	num = fmt.parse(s);
	logln(loc.getName() + names[j] + "success parse (lenient): " + s + " -> " + num);
	} catch (ParseException pe) {
	String msg = loc.getName() + names[j] + "ERROR:" + pe.getMessage();
	logln(msg);
	if (errors == null) {
	errors = new StringBuffer();
	}
	errors.append("\n" + msg);
	}
	}
	}
	}
	}
	if (errors != null) {
	//TODO: We need to fix parse problems - see #6895 / #6896
	//errln(errors.toString());
	logln(errors.toString());
	}
	}

	void doLenientParseTest(RuleBasedNumberFormat formatter,
	String[][] testData) {
	NumberFormat decFmt = NumberFormat.getInstance(Locale.US);

	try {
	for (int i = 0; i < testData.length; i++) {
	String words = testData[i][0];
	String expectedNumber = testData[i][1];
	String actualNumber = decFmt.format(formatter.parse(words));

	if (!actualNumber.equals(expectedNumber)) {
	errln("Lenient-parse spot check failed: for "
	+ words + ", expected " + expectedNumber
	+ ", but got " + actualNumber);
	}
	}
	}
	catch (Throwable e) {
	errln("Test failed with exception: " + e.toString());
	e.printStackTrace();
	}
	}
	}