blob: 3d9d1a6f7281796f3876ab50689fd8b217ef3ba1 [file] [log] [blame]
package com.ibm.text.UCD;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.Tabber;
import com.ibm.icu.dev.test.util.TransliteratorUtilities;
import com.ibm.icu.dev.tool.UOption;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeMatcher;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.Utility;
public class TestUnicodeInvariants {
private static final int
HELP1 = 0,
FILE = 1,
RANGE = 2,
TABLE = 3
;
private static final UOption[] options = {
UOption.HELP_H(),
UOption.create("file", 'f', UOption.REQUIRES_ARG),
UOption.create("norange", 'n', UOption.NO_ARG),
UOption.create("table", 't', UOption.NO_ARG),
};
public static void main(String[] args) throws IOException {
UOption.parseArgs(args, options);
String file = "UnicodeInvariants.txt";
if (options[FILE].doesOccur) file = options[FILE].value;
boolean doRange = !options[RANGE].doesOccur;
System.out.println("File:\t" + file);
System.out.println("Ranges?\t" + doRange);
System.out.println("HTML?\t" + options[TABLE].doesOccur);
testInvariants(file, doRange);
}
/**
* Chain together several SymbolTables.
* @author Davis
*/
static class ChainedSymbolTable implements SymbolTable {
// TODO: add accessors?
private List symbolTables;
/**
* Each SymbolTable is each accessed in order by the other methods,
* so the first in the list is accessed first, etc.
* @param symbolTables
*/
ChainedSymbolTable(SymbolTable[] symbolTables) {
this.symbolTables = Arrays.asList(symbolTables);
}
public char[] lookup(String s) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
char[] result = st.lookup(s);
if (result != null) return result;
}
return null;
}
public UnicodeMatcher lookupMatcher(int ch) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
UnicodeMatcher result = st.lookupMatcher(ch);
if (result != null) return result;
}
return null;
}
// Warning: this depends on pos being left alone unless a string is returned!!
public String parseReference(String text, ParsePosition pos, int limit) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
String result = st.parseReference(text, pos, limit);
if (result != null) return result;
}
return null;
}
}
static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\~ \\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
public static void testInvariants(String outputFile, boolean doRange) throws IOException {
String[][] variables = new String[100][2];
int variableCount = 0;
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
out.write('\uFEFF'); // BOM
BufferedReader in = BagFormatter.openUTF8Reader("com/ibm/text/UCD/", outputFile);
BagFormatter errorLister = new BagFormatter();
errorLister.setMergeRanges(doRange);
errorLister.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
errorLister.setShowLiteral(TransliteratorUtilities.toXML);
if (options[TABLE].doesOccur) errorLister.setTabber(new Tabber.HTMLTabber());
BagFormatter showLister = new BagFormatter();
showLister.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
showLister.setMergeRanges(doRange);
showLister.setShowLiteral(TransliteratorUtilities.toXML);
if (options[TABLE].doesOccur) showLister.setTabber(new Tabber.HTMLTabber());
ChainedSymbolTable st = new ChainedSymbolTable(new SymbolTable[] {
ToolUnicodePropertySource.make(UCD.lastVersion).getSymbolTable("\u00D7"),
ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
ParsePosition pp = new ParsePosition(0);
int parseErrorCount = 0;
int testFailureCount = 0;
while (true) {
String line = in.readLine();
if (line == null) break;
if (line.startsWith("\uFEFF")) line = line.substring(1);
out.println(line);
line = line.trim();
int pos = line.indexOf('#');
if (pos >= 0) line = line.substring(0,pos).trim();
if (line.length() == 0) continue;
if (line.equalsIgnoreCase("Stop")) break;
// fix all the variables
String oldLine = line;
line = Utility.replace(line, variables, variableCount);
// detect variables
if (line.startsWith("Let")) {
int x = line.indexOf('=');
variables[variableCount][0] = line.substring(3,x).trim();
variables[variableCount][1] = line.substring(x+1).trim();
variableCount++;
if (false) System.out.println("Added variable: <" + variables[variableCount-1][0] + "><"
+ variables[variableCount-1][1] + ">");
continue;
}
// detect variables
if (line.startsWith("Show")) {
String part = line.substring(4).trim();
if (part.startsWith("Each")) {
part = part.substring(4).trim();
showLister.setMergeRanges(false);
}
pp.setIndex(0);
UnicodeSet leftSet = new UnicodeSet(part, pp, st);
showLister.showSetNames(out, leftSet);
showLister.setMergeRanges(doRange);
continue;
}
if (line.startsWith("Test")) {
line = line.substring(4).trim();
}
char relation = 0;
String rightSide = null;
String leftSide = null;
UnicodeSet leftSet = null;
UnicodeSet rightSet = null;
try {
pp.setIndex(0);
leftSet = new UnicodeSet(line, pp, st);
leftSide = line.substring(0,pp.getIndex());
eatWhitespace(line, pp);
relation = line.charAt(pp.getIndex());
if (!INVARIANT_RELATIONS.contains(relation)) {
throw new ParseException("Invalid relation, must be one of " + INVARIANT_RELATIONS.toPattern(false),
pp.getIndex());
}
pp.setIndex(pp.getIndex()+1); // skip char
eatWhitespace(line, pp);
int start = pp.getIndex();
rightSet = new UnicodeSet(line, pp, st);
rightSide = line.substring(start,pp.getIndex());
eatWhitespace(line, pp);
if (line.length() != pp.getIndex()) {
throw new ParseException("Extra characters at end", pp.getIndex());
}
} catch (ParseException e) {
out.println("PARSE ERROR:\t" + line.substring(0,e.getErrorOffset())
+ "<@>" + line.substring(e.getErrorOffset()));
out.println();
out.println("**** START Error Info ****");
out.println(e.getMessage());
out.println("**** END Error Info ****");
out.println();
parseErrorCount++;
continue;
} catch (IllegalArgumentException e) {
out.println("PARSE ERROR:\t" + line);
out.println();
out.println("**** START Error Info ****");
out.println(e.getMessage());
out.println("**** END Error Info ****");
out.println();
parseErrorCount++;
continue;
}
boolean ok = true;
switch(relation) {
case '=': case '\u2261': ok = leftSet.equals(rightSet); break;
case '<': case '\u2282': ok = rightSet.containsAll(leftSet) && !leftSet.equals(rightSet); break;
case '>': case '\u2283': ok = leftSet.containsAll(rightSet) && !leftSet.equals(rightSet); break;
case '\u2264': case '\u2286': ok = rightSet.containsAll(leftSet); break;
case '\u2265': case '\u2287': ok = leftSet.containsAll(rightSet); break;
case '!': ok = leftSet.containsNone(rightSet); break;
case '?': ok = !leftSet.equals(rightSet)
&& !leftSet.containsAll(rightSet)
&& !rightSet.containsAll(leftSet)
&& !leftSet.containsNone(rightSet);
break;
default: throw new IllegalArgumentException("Internal Error");
}
if (ok) continue;
out.println();
out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH));
out.println("**** START Error Info ****");
errorLister.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet);
out.println("**** END Error Info ****");
out.println();
testFailureCount++;
}
out.println();
out.println("**** SUMMARY ****");
out.println();
out.println("ParseErrorCount=" + parseErrorCount);
out.println("TestFailureCount=" + testFailureCount);
out.close();
System.out.println("ParseErrorCount=" + parseErrorCount);
System.out.println("TestFailureCount=" + testFailureCount);
}
/**
* @param line
* @param pp
*/
private static void eatWhitespace(String line, ParsePosition pp) {
int cp = 0;
int i;
for (i = pp.getIndex(); i < line.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(line, i);
if (!com.ibm.icu.lang.UCharacter.isUWhiteSpace(cp)) {
break;
}
}
pp.setIndex(i);
}
}