| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /******************************************************************** |
| * Copyright (c) 2002-2014, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ********************************************************************/ |
| |
| /** |
| * UCAConformanceTest performs conformance tests defined in the data |
| * files. ICU ships with stub data files, as the whole test are too |
| * long. To do the whole test, download the test files. |
| */ |
| |
| package com.ibm.icu.dev.test.collator; |
| |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| |
| import org.junit.Before; |
| import org.junit.Test; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.JUnit4; |
| |
| import com.ibm.icu.dev.test.TestFmwk; |
| import com.ibm.icu.dev.test.TestUtil; |
| import com.ibm.icu.lang.UCharacter; |
| import com.ibm.icu.text.Collator; |
| import com.ibm.icu.text.RawCollationKey; |
| import com.ibm.icu.text.RuleBasedCollator; |
| import com.ibm.icu.text.UTF16; |
| import com.ibm.icu.util.ULocale; |
| import com.ibm.icu.util.VersionInfo; |
| |
| @RunWith(JUnit4.class) |
| public class UCAConformanceTest extends TestFmwk { |
| |
| public UCAConformanceTest() { |
| } |
| |
| @Before |
| public void init() throws Exception { |
| UCA = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); |
| comparer = new UTF16.StringComparator(true, false, UTF16.StringComparator.FOLD_CASE_DEFAULT); |
| } |
| |
| private RuleBasedCollator UCA; |
| private RuleBasedCollator rbUCA; |
| private UTF16.StringComparator comparer; |
| private boolean isAtLeastUCA62 = UCharacter.getUnicodeVersion().compareTo(VersionInfo.UNICODE_6_2) >= 0; |
| |
| @Test |
| public void TestTableNonIgnorable() { |
| setCollNonIgnorable(UCA); |
| openTestFile("NON_IGNORABLE"); |
| conformanceTest(UCA); |
| } |
| |
| @Test |
| public void TestTableShifted() { |
| setCollShifted(UCA); |
| openTestFile("SHIFTED"); |
| conformanceTest(UCA); |
| } |
| |
| @Test |
| public void TestRulesNonIgnorable() { |
| if (logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) { |
| return; |
| } |
| initRbUCA(); |
| if (rbUCA == null) { |
| return; |
| } |
| |
| setCollNonIgnorable(rbUCA); |
| openTestFile("NON_IGNORABLE"); |
| conformanceTest(rbUCA); |
| } |
| |
| @Test |
| public void TestRulesShifted() { |
| logln("This test is currently disabled, as it is impossible to " |
| + "wholly represent fractional UCA using tailoring rules."); |
| return; |
| /* |
| * initRbUCA(); if(rbUCA == null) { return; } |
| * |
| * setCollShifted(rbUCA); openTestFile("SHIFTED"); testConformance(rbUCA); |
| */ |
| } |
| |
| BufferedReader in; |
| |
| private void openTestFile(String type) { |
| String collationTest = "CollationTest_"; |
| String ext = ".txt"; |
| try { |
| in = TestUtil.getDataReader(collationTest + type + "_SHORT" + ext); |
| } catch (Exception e) { |
| try { |
| in = TestUtil.getDataReader(collationTest + type + ext); |
| } catch (Exception e1) { |
| try { |
| in = TestUtil.getDataReader(collationTest + type + "_STUB" + ext); |
| logln("INFO: Working with the stub file.\n" + "If you need the full conformance test, please\n" |
| + "download the appropriate data files from:\n" |
| + "http://unicode.org/cldr/trac/browser/trunk/common/uca"); |
| } catch (Exception e11) { |
| errln("ERROR: Could not find any of the test files"); |
| } |
| } |
| } |
| } |
| |
| private void setCollNonIgnorable(RuleBasedCollator coll) { |
| if (coll != null) { |
| coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION); |
| coll.setLowerCaseFirst(false); |
| coll.setCaseLevel(false); |
| coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.TERTIARY); |
| coll.setAlternateHandlingShifted(false); |
| } |
| } |
| |
| private void setCollShifted(RuleBasedCollator coll) { |
| if (coll != null) { |
| coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION); |
| coll.setLowerCaseFirst(false); |
| coll.setCaseLevel(false); |
| coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.QUATERNARY); |
| coll.setAlternateHandlingShifted(true); |
| } |
| } |
| |
| private void initRbUCA() { |
| if (rbUCA == null) { |
| String ucarules = UCA.getRules(true); |
| try { |
| rbUCA = new RuleBasedCollator(ucarules); |
| } catch (Exception e) { |
| errln("Failure creating UCA rule-based collator: " + e); |
| } |
| } |
| } |
| |
| private String parseString(String line) { |
| int i = 0, value; |
| StringBuilder result = new StringBuilder(), buffer = new StringBuilder(); |
| |
| for (;;) { |
| while (i < line.length() && Character.isWhitespace(line.charAt(i))) { |
| i++; |
| } |
| while (i < line.length() && Character.isLetterOrDigit(line.charAt(i))) { |
| buffer.append(line.charAt(i)); |
| i++; |
| } |
| if (buffer.length() == 0) { |
| // We hit something that was not whitespace/letter/digit. |
| // Should be ';' or end of string. |
| return result.toString(); |
| } |
| /* read one code point */ |
| value = Integer.parseInt(buffer.toString(), 16); |
| buffer.setLength(0); |
| result.appendCodePoint(value); |
| } |
| |
| } |
| |
| private static final int IS_SHIFTED = 1; |
| private static final int FROM_RULES = 2; |
| |
| private static boolean skipLineBecauseOfBug(String s, int flags) { |
| // Add temporary exceptions here if there are ICU bugs, until we can fix them. |
| // For examples see the ICU 52 version of this file. |
| return false; |
| } |
| |
| private static int normalizeResult(int result) { |
| return result < 0 ? -1 : result == 0 ? 0 : 1; |
| } |
| |
| private void conformanceTest(RuleBasedCollator coll) { |
| if (in == null || coll == null) { |
| return; |
| } |
| int skipFlags = 0; |
| if (coll.isAlternateHandlingShifted()) { |
| skipFlags |= IS_SHIFTED; |
| } |
| if (coll == rbUCA) { |
| skipFlags |= FROM_RULES; |
| } |
| |
| logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest"); |
| boolean withSortKeys = getProperty("ucaconfnosortkeys") == null; |
| |
| int lineNo = 0; |
| |
| String line = null, oldLine = null, buffer = null, oldB = null; |
| RawCollationKey sk1 = new RawCollationKey(), sk2 = new RawCollationKey(); |
| RawCollationKey oldSk = null, newSk = sk1; |
| |
| try { |
| while ((line = in.readLine()) != null) { |
| lineNo++; |
| if (line.length() == 0 || line.charAt(0) == '#') { |
| continue; |
| } |
| buffer = parseString(line); |
| |
| if (skipLineBecauseOfBug(buffer, skipFlags)) { |
| logln("Skipping line " + lineNo + " because of a known bug"); |
| continue; |
| } |
| |
| if (withSortKeys) { |
| coll.getRawCollationKey(buffer, newSk); |
| } |
| if (oldSk != null) { |
| boolean ok = true; |
| int skres = withSortKeys ? oldSk.compareTo(newSk) : 0; |
| int cmpres = coll.compare(oldB, buffer); |
| int cmpres2 = coll.compare(buffer, oldB); |
| |
| if (cmpres != -cmpres2) { |
| errln(String.format( |
| "Compare result not symmetrical on line %d: " |
| + "previous vs. current (%d) / current vs. previous (%d)", |
| lineNo, cmpres, cmpres2)); |
| ok = false; |
| } |
| |
| // TODO: Compare with normalization turned off if the input passes the FCD test. |
| |
| if (withSortKeys && cmpres != normalizeResult(skres)) { |
| errln("Difference between coll.compare (" + cmpres + ") and sortkey compare (" + skres |
| + ") on line " + lineNo); |
| ok = false; |
| } |
| |
| int res = cmpres; |
| if (res == 0 && !isAtLeastUCA62) { |
| // Up to UCA 6.1, the collation test files use a custom tie-breaker, |
| // comparing the raw input strings. |
| res = comparer.compare(oldB, buffer); |
| // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker, |
| // comparing the NFD versions of the input strings, |
| // which we do via setting strength=identical. |
| } |
| if (res > 0) { |
| errln("Line " + lineNo + " is not greater or equal than previous line"); |
| ok = false; |
| } |
| |
| if (!ok) { |
| errln(" Previous data line " + oldLine); |
| errln(" Current data line " + line); |
| if (withSortKeys) { |
| errln(" Previous key: " + CollationTest.prettify(oldSk)); |
| errln(" Current key: " + CollationTest.prettify(newSk)); |
| } |
| } |
| } |
| |
| oldSk = newSk; |
| oldB = buffer; |
| oldLine = line; |
| if (oldSk == sk1) { |
| newSk = sk2; |
| } else { |
| newSk = sk1; |
| } |
| } |
| } catch (Exception e) { |
| errln("Unexpected exception " + e); |
| } finally { |
| try { |
| in.close(); |
| } catch (IOException ignored) { |
| } |
| in = null; |
| } |
| } |
| } |